2025-08-14 小马的acRIP-seq

环境还是m6a的

conda activate m6a

cd /home/data/t210424/ac4c

rRNA测了比例竟然是0%所以没去

还有个ac4c.sh是用fastp做的清洗 也行 但没用

#!/bin/bash

# acRIP-seq 上游分析流程 (Trimmomatic 清洗 + Hisat2 映射 + MACS2 peak call)

# Version: 2025-08

# 用法: bash ac4c1.sh

# 运行前: conda activate m6a

############################################

# 用户可修改部分

############################################

THREADS=8

RAW_DIR=/home/data/t210424/ac4c/raw

CLEAN_DIR=~/ac4c/clean

BAM_DIR=~/ac4c/bam

PEAK_DIR=~/ac4c/peaks

GENOME_INDEX=/home/data/t210424/Genome/Mouse/mm10_hisat2_index

ADAPTERS=/home/data/t210424/miniconda3/envs/m6a/share/trimmomatic/adapters/TruSeq3-PE.fa

############################################

# 1. Clean:Trimmomatic

############################################

mkdir -p ${CLEAN_DIR}

echo "=== Step 1: Cleaning reads with Trimmomatic ==="

for file in ${RAW_DIR}/*R1.raw.fastq.gz; do

    base=$(basename "$file" .raw.fastq.gz)

    R1=$file

    R2=${file/R1.raw.fastq.gz/R2.raw.fastq.gz}

    echo "[Trimmomatic] Processing $base ..."

    trimmomatic PE -threads ${THREADS} -phred33 \

        "$R1" "$R2" \

        ${CLEAN_DIR}/${base}_R1.clean.fastq.gz ${CLEAN_DIR}/${base}_R1.unpaired.fastq.gz \

        ${CLEAN_DIR}/${base}_R2.clean.fastq.gz ${CLEAN_DIR}/${base}_R2.unpaired.fastq.gz \

        ILLUMINACLIP:${ADAPTERS}:2:30:10 \

        LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:30

done

############################################

# 2. Mapping 到参考基因组

############################################

mkdir -p ${BAM_DIR}

echo "=== Step 2: Mapping to genome with Hisat2 ==="

for R1 in ${CLEAN_DIR}/*_R1.clean.fastq.gz; do

    R2=${R1/_R1/_R2}

    sample=$(basename "$R1" _R1.clean.fastq.gz)

    echo "[Hisat2] Mapping $sample ..."

    hisat2 -p ${THREADS} -x ${GENOME_INDEX} \

        -1 "$R1" -2 "$R2" \

    | samtools view -@ ${THREADS} -bS - \

    | samtools sort -@ ${THREADS} -o ${BAM_DIR}/${sample}.sorted.bam

    samtools index ${BAM_DIR}/${sample}.sorted.bam

done

############################################

# 3. Peak calling with MACS2

############################################

mkdir -p ${PEAK_DIR}

echo "=== Step 3: Calling peaks with MACS2 ==="

for bam in ${BAM_DIR}/*.sorted.bam; do

    sample=$(basename "$bam" .sorted.bam)

    sample_dir=${PEAK_DIR}/${sample}

    mkdir -p $sample_dir

    echo "[MACS2] Calling peaks for $sample ..."

    macs2 callpeak -t "$bam" \

        -f BAM -g mm -n "$sample" \

        --outdir $sample_dir \

        --keep-dup all --nomodel

done

echo "=== 上游分析完成!BAM 文件在 ${BAM_DIR},Peaks 文件在 ${PEAK_DIR} ==="

这里忘了写input了所以不能用 peak文件夹已删 加了新的脚本

#!/bin/bash

# acRIP-seq 上游分析 pipeline (Trimmomatic + Hisat2 + MACS2 with Input)

# Version: 2025-08

# 用法: bash peakcall.sh

# 运行前: conda activate m6a

############################################

# 用户可修改部分

############################################

THREADS=8

RAW_DIR=/home/data/t210424/ac4c/raw

CLEAN_DIR=~/ac4c/clean

BAM_DIR=~/ac4c/bam

PEAK_DIR=~/ac4c/peaks

GENOME_INDEX=/home/data/t210424/Genome/Mouse/mm10_hisat2_index

# Input BAM 对照映射

declare -A INPUT_MAP

INPUT_MAP["RIP_1"]="L1mixJG073-input1234mix.R1.sorted.bam"

INPUT_MAP["RIP_2"]="L1mixJG073-input1234mix.R1.sorted.bam"

INPUT_MAP["RIP_3"]="L1mixJG073-input1234mix.R1.sorted.bam"

INPUT_MAP["RIP_4"]="L1mixJG073-input1234mix.R1.sorted.bam"

INPUT_MAP["RIP_5"]="L1mixJG074-input5678mix.R1.sorted.bam"

INPUT_MAP["RIP_6"]="L1mixJG074-input5678mix.R1.sorted.bam"

INPUT_MAP["RIP_7"]="L1mixJG074-input5678mix.R1.sorted.bam"

INPUT_MAP["RIP_8"]="L1mixJG074-input5678mix.R1.sorted.bam"

############################################

# 3. Peak calling with MACS2 (with Input)

############################################

mkdir -p ${PEAK_DIR}

echo "=== Step 3: Calling peaks with MACS2 ==="

for bam in ${BAM_DIR}/*.sorted.bam; do

    sample=$(basename "$bam" .sorted.bam)

    sample_dir=${PEAK_DIR}/${sample}

    mkdir -p $sample_dir

    # 从映射表选择 Input BAM

    rip_id=$(echo $sample | grep -oP 'RIP_\d+')

    INPUT_BAM=${BAM_DIR}/${INPUT_MAP[$rip_id]}

    echo "[MACS2] Calling peaks for $sample using Input $INPUT_BAM ..."

    macs2 callpeak -t "$bam" -c "$INPUT_BAM" \

        -f BAM -g mm -n "$sample" \

        --outdir $sample_dir \

        --keep-dup all --nomodel

done

echo "=== Pipeline 完成!BAM 文件在 ${BAM_DIR},peaks 在 ${PEAK_DIR} ==="

高亮

在做bam to bw的时候便于分组 BAM文件夹里的文件名全改成Sham1-4-IP.bam了

这个实在callpeak完了以后才改的

上面那个callpeak应该是要用的

为了可视化还做了IGV

bam2bw.sh这个脚本是把各个样本直接转bigwig格式看看了,没考虑input所以理论上也不能用

diff.sh是用的

0817update完全不对啊这个diff.sh 减出来的值是负的,ripseq的input的全转录组相当于bulk RNA seq 用的是IP/input

下面的代码不用看了直接转下一篇吧

#!/bin/bash

# ========= 配置 =========

bam_dir=~/ac4c/bam

bw_dir=~/ac4c/bigwig

mkdir -p $bw_dir

genome_size=2652783500  # mm10

bin_size=10

declare -A inputs

inputs=( ["Sham"]="Sham_Input.bam" ["SNI"]="SNI_Input.bam" )

# ---------- Step 2: IP-Input 差值 bigWig (使用 CPM 归一化) ----------

echo "Step 2: Generating IP-Input bigWig..."

for group in "${!inputs[@]}"

do

    input_bam="$bam_dir/${inputs[$group]}"

    echo "Processing group $group, input: $input_bam"

    for ip_bam in $bam_dir/${group}[1-9]*_IP.bam

    do

        sample=$(basename "$ip_bam" .bam)

        echo "Generating IP-Input bigWig for $sample..."

        bamCompare \

            -b1 "$ip_bam" \

            -b2 "$input_bam" \

            -o "$bw_dir/${sample}_minus_Input.bw" \

            --binSize $bin_size \

            --normalizeUsing CPM \

            --scaleFactorsMethod None \

            --ignoreDuplicates \

            --extendReads \

            --operation subtract

    done

done

echo "All done. BigWig files saved in $bw_dir"

做完以后还要跑个平均化

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容