chip-seq实操(单端)
1.配置环境
conda create -n chip macs2
conda activate chip
conda install sra-tools fastqc multiqc -y
conda install samtools star=2.7.10b homer deeptools -y
conda install trimmomaticy
2.下载数据
Tcf1/Lef1 transcription factors and CD8 T cell identity.
GSM5017666,GSM5017667,GSM5017668,GSM5017669,GSM5017670,GSM5017671,GSM5017672,GSM5017673 单端测序
prefetch -O ./rawdata --option-file sra.txt
cd rawdata
ls|while read id;do (nohup fastq-dump --gzip -O ./ ./${id}/${id}.sra &);done
3.过滤+STAR比对
#!/bin/bash
mkdir ./rawdata/clean_rawdata
mkdir ./results
cat read sra.txt|while read id
do
trimmomatic SE -phred33 \
./rawdata/${id}.fastq.gz \
./rawdata/clean_rawdata/${id}.clean.fastq.gz \
ILLUMINACLIP:/data0/heboxiao/project/miniconda3/envs/chip/share/trimmomatic-0.39-2/adapters/TruSeq3-SE.fa:2:30:10:1:true \
LEADING:3 \
TRAILING:3 \
SLIDINGWINDOW:4:15 \
MINLEN:50
STAR --runThreadN 16 \
--genomeDir /data0/heboxiao/scratch/ref/mmu.star.index_2.7.10b \
--readFilesIn ./rawdata/clean_rawdata/${id}.clean.fastq.gz \
--readFilesCommand zcat \
--outFileNamePrefix ./results/${id} \
--outSAMtype BAM SortedByCoordinate
done
4.macs callpeaks
TCF1 input:SRR13422710 WT input:SRR13422703
tcf1:SRR13422713 SRR13422711 SRR13422712 SRR13422709
wt:SRR13422704 SRR13422705 SRR13422706 SRR13422707 SRR13422708
cat sra.txt|while read id;do samtools index ./results/${id}Aligned.sortedByCoord.out.bam;done
for id in SRR13422713 SRR13422711 SRR13422712 SRR13422709
do
macs2 callpeak -t ./results/${id}Aligned.sortedByCoord.out.bam \
-c ./results/SRR13422710Aligned.sortedByCoord.out.bam \
-f BAM \
-g mm \
-n tcf1.${id} \
--outdir ./macs2_res
done
for id in SRR13422704 SRR13422705 SRR13422706 SRR13422707 SRR13422708
do
macs2 callpeak -t ./results/${id}Aligned.sortedByCoord.out.bam \
-c ./results/SRR13422703Aligned.sortedByCoord.out.bam \
-f BAM \
-g mm \
-n wt.${id} \
--outdir ./macs2_res
done
将bam转换为bw进行可视化
bamCoverage -bs 10 -b ./results/SRR13422713Aligned.sortedByCoord.out.bam -o ./tcf1.bw --normalizeUsing BPM -p 16
bamCoverage -bs 10 -b ./results/SRR13422704Aligned.sortedByCoord.out.bam -o ./wt.bw --normalizeUsing BPM -p 16
TSS附近信号强度图
#单一样本绘图:
computeMatrix reference-point --referencePoint TSS \
-b 10000 \
-a 10000 \
-R /data0/heboxiao/scratch/ref/genome/GRCm39.tss.bed \
-S ./wt.bw \
--skipZeros \
-p 16 \
-o matrix1_wt_tss.gz \
--outFileSortedRegions regions1_wt_genes.bed
plotHeatmap -m matrix1_wt_tss.gz -out wt.heatmap.pdf --plotFileFormat pdf --dpi 720
plotProfile -m matrix1_wt_tss.gz -out wt.profile.pdf --plotFileFormat pdf --perGroup --dpi 720
5.R包注释peaks
library(ChIPseeker)
library(clusterProfiler)
library(TxDb.Mmusculus.UCSC.mm39.knownGene)
bedPeaksFile="./tcf1.SRR13422704_summits.bed"
peaks <- readPeakFile(bedPeaksFile)
keepChr <- !grepl("\\.",seqlevels(peaks))#只保留
seqlevels(peaks,pruning.mode="coarse") <- seqlevels(peaks)[keepChr]
peakanno <- annotatePeak(peaks,tssRegion = c(-3000,3000),
TxDb = TxDb.Mmusculus.UCSC.mm39.knownGene,
annoDb = "org.Mm.eg.db")
pealanno_res <- as.data.frame(peakanno)