在 macs2 callpeak 之后,我们想要知道:the distribution of peaks, like intron/exon and so on.
So we need this R package CHIPseeker.
rm(list=ls())
setwd("~/workspace/BI_project/Chip-seq/3.Mapping/macs2/")
# 加载 ChIPseeker 和其他必要的库
library(ChIPseeker)
library(GenomicRanges)
# 读取峰值文件
peak_file <- "output_cKO_all.bed"
# 读取BED文件到GRanges对象
peak_gr <- readPeakFile(peak_file)
# -------------------------------------------------------------------------
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
# Annotation-------------------------------------------------------------------------
peakAnno <- annotatePeak(peak_gr,
TxDb=txdb, # 指定小鼠基因组
tssRegion=c(-2000, 2000))
# 查看注释结果是否正确
head(as.data.frame(peakAnno))
# plot --------------------------------------------------------------------
# 绘制注释结果的饼图
plotAnnoPie(peakAnno)
# 绘制峰值分布的柱状图
plotAnnoBar(peakAnno)
# 绘制 TSS 周围的峰值分布图
plotDistToTSS(peakAnno, title = "Distribution of Peaks relative to TSS")
# performe GO annalysis -------------------------------------------------------------------------
# 提取注释到基因的基因名
gene_list <- as.data.frame(peakAnno)$geneId
gene_list <- as.numeric(gene_list)
library(clusterProfiler)
library(org.Mm.eg.db) # 如果是小鼠基因组,使用这个注释包;如果是人类基因组,用 org.Hs.eg.db
# 进行 GO 功能富集分析
go_enrich <- enrichGO(gene = gene_list,
OrgDb = org.Mm.eg.db,
keyType = "ENTREZID",
ont = "BP", # Biological Process
pAdjustMethod = "BH",
qvalueCutoff = 0.05)
# 可视化富集结果
barplot(go_enrich, showCategory = 10)
# 进行 KEGG 通路富集分析
kegg_enrich <- enrichKEGG(gene = gene_list,
organism = "mmu", # 小鼠为 mmu,人类为 hsa
pAdjustMethod = "BH",
qvalueCutoff = 0.05)
# 可视化 KEGG 通路结果
dotplot(kegg_enrich, showCategory = 10)
出图展示:
image.png
image.png
image.png
image.png
image.png