最近遇到一个问题,用DiffBind找差异peaks,但是网上没有找到合适(直接用)的参考,网上的例子都是基于Chip-seq数据举例,鲜见ATAC-seq(no input),甚至官方文档也没有举例。苦于此,经过摸索,修改代码,终于搞定了!废话不多说,直接贴代码:
library(DiffBind)
dbObj <- dba(sampleSheet="1.csv")
dbObj=dba.count(dbObj)
pdf(file="PCA_plot.pdf",width = 7,height = 7)
dba.plotPCA(dbObj, attributes=DBA_FACTOR, label=DBA_ID)
dev.off()
pdf(file="heatmap_plot.pdf",width = 7,height = 7)
plot(dbObj)
dev.off()
# Establishing a contrast
dbObj <- dba.contrast(dbObj, categories=DBA_FACTOR,minMembers = 2)
dbObj <- dba.analyze(dbObj, method=DBA_ALL_METHODS)
# summary of results
dba.show(dbObj, bContrasts=T)
# overlapping peaks identified by the two different tools (DESeq2 and edgeR)
pdf(file="overlap_DESeq2_edgeR.pdf",width = 7,height = 7)
dba.plotVenn(dbObj,contrast=1,method=DBA_ALL_METHODS)
dev.off()
comp1.edgeR <- dba.report(dbObj, method=DBA_EDGER, contrast = 1, th=1)
comp1.deseq <- dba.report(dbObj, method=DBA_DESEQ2, contrast = 1, th=1)
# EdgeR
out <- as.data.frame(comp1.edgeR)
write.table(out, file="results_edgeR.txt", sep="\t", quote=F, col.names = NA)
# DESeq2
out <- as.data.frame(comp1.deseq)
write.table(out, file="results_deseq2.txt", sep="\t", quote=F, col.names = NA)
# Create bed files for each keeping only significant peaks (p < 0.05)
# EdgeR
out <- as.data.frame(comp1.edgeR)
edge.bed <- out[ which(out$FDR < 0.05),
c("seqnames", "start", "end", "strand", "Fold")]
write.table(edge.bed, file="results_edgeR_sig.bed", sep="\t", quote=F, row.names=F, col.names=F)
# DESeq2
out <- as.data.frame(comp1.deseq)
deseq.bed <- out[ which(out$FDR < 0.05),
c("seqnames", "start", "end", "strand", "Fold")]
write.table(deseq.bed, file="results_deseq2_sig.bed", sep="\t", quote=F, row.names=F, col.names=F)
最重要的应该是samplesheet,如下:
samplesheet