clusterProfiler是进行富集分析最常用的工具。在我们进行差异分析得到很多差异基因之后,我们可以分别对上调的基因和下调的基因进行差异分析
library(clusterProfiler)
library(org.Hs.eg.db)
x <- genes
eg <- bitr(x, fromType="SYMBOL", toType=c("ENTREZID","ENSEMBL"), OrgDb="org.Hs.eg.db"); head(eg)
genelist <- eg$ENTREZID
genelist <- genelist[-duplicated(genelist)]
go <- enrichGO(genelist, OrgDb = org.Hs.eg.db, ont='ALL',pAdjustMethod = 'BH',pvalueCutoff = 0.05,
qvalueCutoff = 0.2,keyType = 'ENTREZID')
barplot(go,showCategory=20,drop=T)
dotplot(go,showCategory=50)
还可以绘制GO的网络关系图,但是值得注意的是这里的数据只能是富集一个GO通路(BP、CC或MF)的数据
go.BP <- enrichGO(genelist, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'BH',
pvalueCutoff = 0.05, qvalueCutoff = 0.2,keyType = 'ENTREZID')
plotGOgraph(go.BP)
使用其他数据库,以Hallmark为例:
library(clusterProfiler)
library(org.Hs.eg.db)
library(msigdbr)
DEG_edgeR <- readxl::read_xlsx('downstream_data/DE/fibrocyte_Fibrosis_vs_others.xlsx')
x <- (DEG_edgeR %>% filter(logFC > 0, PValue < 0.15))$gene
x
eg <- clusterProfiler::bitr(x, fromType = "SYMBOL", toType = c("ENTREZID", "ENSEMBL"), OrgDb = "org.Hs.eg.db")
genelist <- eg$ENTREZID
genelist <- genelist[-duplicated(genelist)]
m_t2g <- msigdbr(species = "Homo sapiens", category = "H") %>%
dplyr::select(gs_name, entrez_gene)
go <- enricher(
genelist,
pvalueCutoff = 1,
pAdjustMethod = "fdr",
qvalueCutoff = 1,TERM2GENE = m_t2g,
)
dotplot(go, showCategory = 20)