chip-seq实操(单端)

1.配置环境

conda create -n chip macs2
conda activate chip
conda install sra-tools fastqc multiqc -y
conda install samtools star=2.7.10b homer deeptools -y
conda install trimmomaticy

2.下载数据

Tcf1/Lef1 transcription factors and CD8 T cell identity.

GSM5017666,GSM5017667,GSM5017668,GSM5017669,GSM5017670,GSM5017671,GSM5017672,GSM5017673 单端测序

prefetch -O ./rawdata --option-file sra.txt
cd rawdata
ls|while read id;do (nohup fastq-dump --gzip -O ./ ./${id}/${id}.sra &);done

3.过滤+STAR比对

#!/bin/bash
mkdir ./rawdata/clean_rawdata
mkdir ./results
cat read sra.txt|while read id
do
trimmomatic SE -phred33 \
./rawdata/${id}.fastq.gz \
./rawdata/clean_rawdata/${id}.clean.fastq.gz \
ILLUMINACLIP:/data0/heboxiao/project/miniconda3/envs/chip/share/trimmomatic-0.39-2/adapters/TruSeq3-SE.fa:2:30:10:1:true \
LEADING:3 \
TRAILING:3 \
SLIDINGWINDOW:4:15 \
MINLEN:50
STAR --runThreadN 16 \
     --genomeDir /data0/heboxiao/scratch/ref/mmu.star.index_2.7.10b \
     --readFilesIn ./rawdata/clean_rawdata/${id}.clean.fastq.gz \
     --readFilesCommand zcat \
     --outFileNamePrefix ./results/${id} \
     --outSAMtype BAM SortedByCoordinate 
done

4.macs callpeaks

TCF1 input：SRR13422710 WT input：SRR13422703

tcf1：SRR13422713 SRR13422711 SRR13422712 SRR13422709

wt：SRR13422704 SRR13422705 SRR13422706 SRR13422707 SRR13422708

cat sra.txt|while read id;do samtools index ./results/${id}Aligned.sortedByCoord.out.bam;done

for id in SRR13422713 SRR13422711 SRR13422712 SRR13422709
do
macs2 callpeak -t ./results/${id}Aligned.sortedByCoord.out.bam \
               -c ./results/SRR13422710Aligned.sortedByCoord.out.bam \
               -f BAM \
               -g mm \
               -n tcf1.${id} \
               --outdir ./macs2_res
done
for id in SRR13422704 SRR13422705 SRR13422706 SRR13422707 SRR13422708
do
macs2 callpeak -t ./results/${id}Aligned.sortedByCoord.out.bam \
               -c ./results/SRR13422703Aligned.sortedByCoord.out.bam \
               -f BAM \
               -g mm \
               -n wt.${id} \
               --outdir ./macs2_res
done

将bam转换为bw进行可视化

bamCoverage -bs 10 -b ./results/SRR13422713Aligned.sortedByCoord.out.bam -o ./tcf1.bw --normalizeUsing BPM -p 16
bamCoverage -bs 10 -b ./results/SRR13422704Aligned.sortedByCoord.out.bam -o ./wt.bw --normalizeUsing BPM -p 16

TSS附近信号强度图

#单一样本绘图：
computeMatrix reference-point --referencePoint TSS \
-b 10000 \
-a 10000 \
-R /data0/heboxiao/scratch/ref/genome/GRCm39.tss.bed \
-S ./wt.bw \
--skipZeros \
-p 16 \
-o matrix1_wt_tss.gz \
--outFileSortedRegions regions1_wt_genes.bed

plotHeatmap -m matrix1_wt_tss.gz -out wt.heatmap.pdf --plotFileFormat pdf --dpi 720
plotProfile -m matrix1_wt_tss.gz -out wt.profile.pdf --plotFileFormat pdf --perGroup --dpi 720

5.R包注释peaks

library(ChIPseeker)
library(clusterProfiler)
library(TxDb.Mmusculus.UCSC.mm39.knownGene)
bedPeaksFile="./tcf1.SRR13422704_summits.bed"
peaks <- readPeakFile(bedPeaksFile)
keepChr <- !grepl("\\.",seqlevels(peaks))#只保留
seqlevels(peaks,pruning.mode="coarse") <- seqlevels(peaks)[keepChr]
peakanno <- annotatePeak(peaks,tssRegion = c(-3000,3000),
                         TxDb = TxDb.Mmusculus.UCSC.mm39.knownGene,
                         annoDb = "org.Mm.eg.db")
pealanno_res <- as.data.frame(peakanno)

【纯代码】chip-seq实践