环境还是m6a的
conda activate m6a
cd /home/data/t210424/ac4c
rRNA测了比例竟然是0%所以没去
还有个ac4c.sh是用fastp做的清洗 也行 但没用
#!/bin/bash
# acRIP-seq 上游分析流程 (Trimmomatic 清洗 + Hisat2 映射 + MACS2 peak call)
# Version: 2025-08
# 用法: bash ac4c1.sh
# 运行前: conda activate m6a
############################################
# 用户可修改部分
############################################
THREADS=8
RAW_DIR=/home/data/t210424/ac4c/raw
CLEAN_DIR=~/ac4c/clean
BAM_DIR=~/ac4c/bam
PEAK_DIR=~/ac4c/peaks
GENOME_INDEX=/home/data/t210424/Genome/Mouse/mm10_hisat2_index
ADAPTERS=/home/data/t210424/miniconda3/envs/m6a/share/trimmomatic/adapters/TruSeq3-PE.fa
############################################
# 1. Clean:Trimmomatic
############################################
mkdir -p ${CLEAN_DIR}
echo "=== Step 1: Cleaning reads with Trimmomatic ==="
for file in ${RAW_DIR}/*R1.raw.fastq.gz; do
base=$(basename "$file" .raw.fastq.gz)
R1=$file
R2=${file/R1.raw.fastq.gz/R2.raw.fastq.gz}
echo "[Trimmomatic] Processing $base ..."
trimmomatic PE -threads ${THREADS} -phred33 \
"$R1" "$R2" \
${CLEAN_DIR}/${base}_R1.clean.fastq.gz ${CLEAN_DIR}/${base}_R1.unpaired.fastq.gz \
${CLEAN_DIR}/${base}_R2.clean.fastq.gz ${CLEAN_DIR}/${base}_R2.unpaired.fastq.gz \
ILLUMINACLIP:${ADAPTERS}:2:30:10 \
LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:30
done
############################################
# 2. Mapping 到参考基因组
############################################
mkdir -p ${BAM_DIR}
echo "=== Step 2: Mapping to genome with Hisat2 ==="
for R1 in ${CLEAN_DIR}/*_R1.clean.fastq.gz; do
R2=${R1/_R1/_R2}
sample=$(basename "$R1" _R1.clean.fastq.gz)
echo "[Hisat2] Mapping $sample ..."
hisat2 -p ${THREADS} -x ${GENOME_INDEX} \
-1 "$R1" -2 "$R2" \
| samtools view -@ ${THREADS} -bS - \
| samtools sort -@ ${THREADS} -o ${BAM_DIR}/${sample}.sorted.bam
samtools index ${BAM_DIR}/${sample}.sorted.bam
done
############################################
# 3. Peak calling with MACS2
############################################
mkdir -p ${PEAK_DIR}
echo "=== Step 3: Calling peaks with MACS2 ==="
for bam in ${BAM_DIR}/*.sorted.bam; do
sample=$(basename "$bam" .sorted.bam)
sample_dir=${PEAK_DIR}/${sample}
mkdir -p $sample_dir
echo "[MACS2] Calling peaks for $sample ..."
macs2 callpeak -t "$bam" \
-f BAM -g mm -n "$sample" \
--outdir $sample_dir \
--keep-dup all --nomodel
done
echo "=== 上游分析完成!BAM 文件在 ${BAM_DIR},Peaks 文件在 ${PEAK_DIR} ==="
这里忘了写input了所以不能用 peak文件夹已删 加了新的脚本
#!/bin/bash
# acRIP-seq 上游分析 pipeline (Trimmomatic + Hisat2 + MACS2 with Input)
# Version: 2025-08
# 用法: bash peakcall.sh
# 运行前: conda activate m6a
############################################
# 用户可修改部分
############################################
THREADS=8
RAW_DIR=/home/data/t210424/ac4c/raw
CLEAN_DIR=~/ac4c/clean
BAM_DIR=~/ac4c/bam
PEAK_DIR=~/ac4c/peaks
GENOME_INDEX=/home/data/t210424/Genome/Mouse/mm10_hisat2_index
# Input BAM 对照映射
declare -A INPUT_MAP
INPUT_MAP["RIP_1"]="L1mixJG073-input1234mix.R1.sorted.bam"
INPUT_MAP["RIP_2"]="L1mixJG073-input1234mix.R1.sorted.bam"
INPUT_MAP["RIP_3"]="L1mixJG073-input1234mix.R1.sorted.bam"
INPUT_MAP["RIP_4"]="L1mixJG073-input1234mix.R1.sorted.bam"
INPUT_MAP["RIP_5"]="L1mixJG074-input5678mix.R1.sorted.bam"
INPUT_MAP["RIP_6"]="L1mixJG074-input5678mix.R1.sorted.bam"
INPUT_MAP["RIP_7"]="L1mixJG074-input5678mix.R1.sorted.bam"
INPUT_MAP["RIP_8"]="L1mixJG074-input5678mix.R1.sorted.bam"
############################################
# 3. Peak calling with MACS2 (with Input)
############################################
mkdir -p ${PEAK_DIR}
echo "=== Step 3: Calling peaks with MACS2 ==="
for bam in ${BAM_DIR}/*.sorted.bam; do
sample=$(basename "$bam" .sorted.bam)
sample_dir=${PEAK_DIR}/${sample}
mkdir -p $sample_dir
# 从映射表选择 Input BAM
rip_id=$(echo $sample | grep -oP 'RIP_\d+')
INPUT_BAM=${BAM_DIR}/${INPUT_MAP[$rip_id]}
echo "[MACS2] Calling peaks for $sample using Input $INPUT_BAM ..."
macs2 callpeak -t "$bam" -c "$INPUT_BAM" \
-f BAM -g mm -n "$sample" \
--outdir $sample_dir \
--keep-dup all --nomodel
done
echo "=== Pipeline 完成!BAM 文件在 ${BAM_DIR},peaks 在 ${PEAK_DIR} ==="
高亮
在做bam to bw的时候便于分组 BAM文件夹里的文件名全改成Sham1-4-IP.bam了
这个实在callpeak完了以后才改的
上面那个callpeak应该是要用的
为了可视化还做了IGV
bam2bw.sh这个脚本是把各个样本直接转bigwig格式看看了,没考虑input所以理论上也不能用
diff.sh是用的
0817update完全不对啊这个diff.sh 减出来的值是负的,ripseq的input的全转录组相当于bulk RNA seq 用的是IP/input
下面的代码不用看了直接转下一篇吧
#!/bin/bash
# ========= 配置 =========
bam_dir=~/ac4c/bam
bw_dir=~/ac4c/bigwig
mkdir -p $bw_dir
genome_size=2652783500 # mm10
bin_size=10
declare -A inputs
inputs=( ["Sham"]="Sham_Input.bam" ["SNI"]="SNI_Input.bam" )
# ---------- Step 2: IP-Input 差值 bigWig (使用 CPM 归一化) ----------
echo "Step 2: Generating IP-Input bigWig..."
for group in "${!inputs[@]}"
do
input_bam="$bam_dir/${inputs[$group]}"
echo "Processing group $group, input: $input_bam"
for ip_bam in $bam_dir/${group}[1-9]*_IP.bam
do
sample=$(basename "$ip_bam" .bam)
echo "Generating IP-Input bigWig for $sample..."
bamCompare \
-b1 "$ip_bam" \
-b2 "$input_bam" \
-o "$bw_dir/${sample}_minus_Input.bw" \
--binSize $bin_size \
--normalizeUsing CPM \
--scaleFactorsMethod None \
--ignoreDuplicates \
--extendReads \
--operation subtract
done
done
echo "All done. BigWig files saved in $bw_dir"
做完以后还要跑个平均化