/home/data/t210424/m6a/qs.sh做个bam文件过滤
#!/bin/bash
input_dir="/home/data/t210424/m6a"
output_dir="/home/data/t210424/m6a/filtered"
mkdir -p "$output_dir"
# 直接处理每个文件
for file in In_Sham_1.bam In_SNI_1.bam In_sh_1.bam RIP_sh_1.bam RIP_Sham_1.bam RIP_SNI_1.bam \
In_Sham_2.bam In_SNI_2.bam In_sh_2.bam RIP_sh_2.bam RIP_Sham_2.bam RIP_SNI_2.bam \
In_Sham_3.bam In_SNI_3.bam In_sh_3.bam RIP_sh_3.bam RIP_Sham_3.bam RIP_SNI_3.bam; do
output_file="${output_dir}/${file%.bam}.filtered.bam"
# 过滤 BAM 文件并输出到新的文件
samtools view -b -q 10 "${input_dir}/${file}" -o "${output_file}"
done
echo "QS 过滤完成。"
/home/data/t210424/m6a/filtered/merge.sh做合并bam
#!/bin/bash
# 定义输入和输出文件名
output_dir="/home/data/t210424/m6a/merged"
mkdir -p "$output_dir" # 创建输出目录
# 合并 RIP_Sham 样本
samtools merge "${output_dir}/RIP_Sham.bam" \
"RIP_Sham_1.filtered.bam" \
"RIP_Sham_2.filtered.bam" \
"RIP_Sham_3.filtered.bam"
# 合并 RIP_SNI 样本
samtools merge "${output_dir}/RIP_SNI.bam" \
"RIP_SNI_1.filtered.bam" \
"RIP_SNI_2.filtered.bam" \
"RIP_SNI_3.filtered.bam"
samtools merge "${output_dir}/RIP_sh.bam" \
"RIP_sh_1.filtered.bam" \
"RIP_sh_2.filtered.bam" \
"RIP_sh_3.filtered.bam"
samtools merge "${output_dir}/In_Sham.bam" \
"In_Sham_1.filtered.bam" \
"In_Sham_2.filtered.bam" \
"In_Sham_3.filtered.bam"
samtools merge "${output_dir}/In_SNI.bam" \
"In_SNI_1.filtered.bam" \
"In_SNI_2.filtered.bam" \
"In_SNI_3.filtered.bam"
samtools merge "${output_dir}/In_sh.bam" \
"In_sh_1.filtered.bam" \
"In_sh_2.filtered.bam" \
"In_sh_3.filtered.bam"
echo "合并完成:"
peak calling
#!/bin/bash
# 设置基因组大小
genome_size="mm" # 小鼠
# 定义输出目录
output_dir="/home/data/t210424/m6a/peaks"
mkdir -p "$output_dir"
# 进行 Peak Calling
# RIP_Sham
macs2 callpeak -t "RIP_Sham.bam" \
-c "In_Sham.bam" \
-f BAM -g "$genome_size" --outdir "$output_dir" \
-n "RIP_Sham_peaks" --nomodel --extsize 147
# RIP_sh
macs2 callpeak -t "RIP_sh.bam" \
-c "In_sh.bam" \
-f BAM -g "$genome_size" --outdir "$output_dir" \
-n "RIP_sh_peaks" --nomodel --extsize 147
# RIP_SNI
macs2 callpeak -t "RIP_SNI.bam" \
-c "In_SNI.bam" \
-f BAM -g "$genome_size" --outdir "$output_dir" \
-n "RIP_SNI_peaks" --nomodel --extsize 147
echo "所有组的 Peak Calling 完成"
chmod +x peakcalling.sh
./peakcalling.sh
call完以后用RIP_Sham_peaks_peaks.narrowPeak RIP_SNI_peaks_peaks.narrowPeak RIP_sh_peaks_peaks.narrowPeak做top consensus motif提取
转换染色体名称
使用awk脚本来修改.narrowPeak文件中的染色体名称。以下是一个示例脚本,将染色体名称从1、2等转换为chr1、chr2等:
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' RIP_Sham_peaks_peaks.narrowPeak > RIP_Sham_peaks_peaks_fixed.narrowPeak
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' RIP_sh_peaks_peaks.narrowPeak > RIP_sh_peaks_peaks_fixed.narrowPeak
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' RIP_SNI_peaks_peaks.narrowPeak > RIP_SNI_peaks_peaks_fixed.narrowPeak
提取峰的范围内的序列
使用转换后的.narrowPeak文件提取序列:
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_Sham_peaks_peaks_fixed.narrowPeak -s -name -tab -fo RIP_Sham_sequences.txt
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_sh_peaks_peaks_fixed.narrowPeak -s -name -tab -fo RIP_sh_sequences.txt
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_SNI_peaks_peaks_fixed.narrowPeak -s -name -tab -fo RIP_SNI_sequences.txt
awk'{gsub(/T/, "U", $0); print}'RIP_Sham_sequences.txt>RIP_Sham_sequences_with_U.txt
awk'{gsub(/T/, "U", $0); print}'RIP_SNI_sequences.txt>RIP_SNI_sequences_with_U.txt
awk'{gsub(/T/, "U", $0); print}'RIP_sh_sequences.txt>RIP_sh_sequences_with_U.txt
转换文件格式
将提取的序列文件从表格格式转换为FASTA格式
awk -F'\t' 'BEGIN{OFS="\n"} {print ">" $1; print $2}' RIP_SNI_sequences_with_U.txt > RIP_SNI_sequences.fasta
awk -F'\t' 'BEGIN{OFS="\n"} {print ">" $1; print $2}' RIP_Sham_sequences_with_U.txt > RIP_Sham_sequences.fasta
awk -F'\t' 'BEGIN{OFS="\n"} {print ">" $1; print $2}' RIP_sh_sequences_with_U.txt > RIP_sh_sequences.fasta
运行MEME
meme RIP_Sham_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 4 -maxw 12 -evt 0.01 -o RIP_Sham_motif1
meme RIP_sh_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 4 -maxw 12 -evt 0.01 -o RIP_sh_motif1
meme RIP_SNI_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 4 -maxw 12 -evt 0.01 -o RIP_SNI_motif1
第一份结果输出在/home/data/t210424/m6a/peaks