顺利运行的peakcall3.sh
#!/bin/bash
input_dir="/home/data/t210424/m6a/filtered"
output_dir="/home/data/t210424/m6a/peak3"
mkdir -p "$output_dir"
# 定义组名
groups=("sh" "Sham" "SNI")
# 遍历每个组
for group in "${groups[@]}"; do
# 创建组特定的输出目录
group_output_dir="$output_dir/${group}"
mkdir -p "$group_output_dir"
# 初始化一个数组来保存当前组的peak文件路径
peak_files=()
# 遍历每个重复样本
for ((i=1; i<=3; i++)); do
# 构建IP和Input文件的路径
ip_file="${input_dir}/RIP_${group}_${i}.filtered.bam"
input_file="${input_dir}/In_${group}_${i}.filtered.bam"
# 运行MACS2进行peak calling
macs2 callpeak -t "$ip_file" -c "$input_file" -f BAM -n "${group}_${i}" --outdir "$group_output_dir"
# 保存peak文件路径到数组
peak_files+=("$group_output_dir/${group}_${i}_peaks.narrowPeak")
done
# 使用bedtools intersect找出共同的peaks
if [ ${#peak_files[@]} -gt 1 ]; then
first_file="${peak_files[0]}"
rest_files=("${peak_files[@]:1}")
bedtools intersect -a "$first_file" -b "$rest_files" -wa > "$group_output_dir/${group}_common_peaks.narrowPeak"
fi
done
第二个脚本,做染色质名称转换,提取序列并生成文本文件,转换为FASTA格式
/home/data/t210424/m6a/peak3/getfasta.sh
#!/bin/bash
# 输入和输出目录
output_dir="/home/data/t210424/m6a/peak3"
genome_fasta="/home/data/t210424/Genome/Mouse/mm10.fa"
# 定义组名
groups=("sh" "Sham" "SNI")
# 修改.narrowPeak文件中的染色体名称
for group in "${groups[@]}"; do
common_peaks_file="${output_dir}/${group}/${group}_common_peaks.narrowPeak"
fixed_peaks_file="${output_dir}/${group}/${group}_common_peaks_fixed.narrowPeak"
if [ -f "$common_peaks_file" ]; then
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" \$1; print}' "$common_peaks_file" > "$fixed_peaks_file"
fi
done
# 提取序列并生成文本文件
for group in "${groups[@]}"; do
fixed_peaks_file="${output_dir}/${group}/${group}_common_peaks_fixed.narrowPeak"
output_sequences_file="${output_dir}/${group}/${group}_sequences.txt"
if [ -f "$fixed_peaks_file" ]; then
bedtools getfasta -fi "$genome_fasta" -bed "$fixed_peaks_file" -s -name -tab -fo "$output_sequences_file"
fi
done
# 将提取的序列文件从表格格式转换为FASTA格式
for group in "${groups[@]}"; do
sequences_file="${output_dir}/${group}/${group}_sequences.txt"
fasta_file="${output_dir}/${group}/${group}_sequences.fasta"
if [ -f "$sequences_file" ]; then
awk -F'\t' 'BEGIN{OFS="\n"} {print ">" \$1; print \$2}' "$sequences_file" > "$fasta_file"
fi
done
# 使用meme工具分析提取的FASTA序列
for group in "${groups[@]}"; do
fasta_file="${output_dir}/${group}/${group}_sequences.fasta"
meme_output_dir="${output_dir}/${group}/${group}_motif1"
if [ -f "$fasta_file" ]; then
meme "$fasta_file" -rna -mod zoops -nmotifs 3 -minw 4 -maxw 12 -evt 0.01 -o "$meme_output_dir"
fi
done
以上脚本报错,没找到问题,实际解决方法是分开写了
cd /home/data/t210424/m6a/peak3/Sham
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' sh_common_peaks.narrowPeak > RIP_sh_peaks_fixed.narrowPeak
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_sh_peaks_fixed.narrowPeak -s -name -tab -fo RIP_sh_sequences.txt
meme RIP_sh_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 6 -maxw 12 -evt 0.01 -o RIP_sh_motif
cd /home/data/t210424/m6a/peak3/Sham
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' Sham_common_peaks.narrowPeak > RIP_Sham_peaks_fixed.narrowPeak
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_Sham_peaks_fixed.narrowPeak -s -name -tab -fo RIP_Sham_sequences.txt
meme RIP_Sham_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 6 -maxw 12 -evt 0.01 -o RIP_Sham_motif
cd /home/data/t210424/m6a/peak3/SNI
awk -F'\t' 'BEGIN{OFS="\t"} {$1 = "chr" $1; print}' SNI_common_peaks.narrowPeak > RIP_SNI_peaks_fixed.narrowPeak
bedtools getfasta -fi /home/data/t210424/Genome/Mouse/mm10.fa -bed RIP_SNI_peaks_fixed.narrowPeak -s -name -tab -fo RIP_SNI_sequences.txt
meme RIP_SNI_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 6 -maxw 12 -evt 0.01 -o RIP_Sham_motif
然后用meme 改参数重试
meme RIP_sh_sequences.fasta -rna -mod zoops -minw 4 -maxw 10 -evt 1 -o RIP_sh_motif2
sham组没输出不能用
不带序号的是用
meme RIP_SNI_sequences.fasta -rna -mod zoops -nmotifs 3 -minw 6 -maxw 12 -evt 0.01 -o RIP_SNI_motif
结果有点合理也有点奇怪,然后序列长度不一致,应该不会用
序列数最大值改成11 输出结果都有,CA特征看着好像有点对劲,输出文件夹RIP_SNI_motif3 RIP_sh_motif3 RIP_Sham_motif3
RIP_SNI_sequences.fasta -rna -mod zoops -minw 4 -maxw 11 -evt 1 -o RIP_SNI_motif3
/home/data/t210424/m6a/peak3/RIP_Sham/sh/SNI_motif3发了这个