前面获得的来自于不同celltype的marker,或者不同cluster的marker,或者每个celltype中不同分组的差异基因,上述marker构成的列表形式,可以直接输入,获得富集结果的list以及图片。
1. 加载包
library(Seurat)
library(ggplot2)
library(openxlsx)
library(homologene)
library(clusterProfiler)
library(org.Hs.eg.db)
library(org.Mm.eg.db)
library(stringr)
2. 富集分析,org指定物种"mmu"或者"hsa",markers_list为单细胞数据中获得的maker的list
EnrichmentForMarkerlist <- function(markers_list,org,item.size=8,tag,path.out){
if (org=="mmu") {
db <- "org.Mm.eg.db"
}else if(org=="hsa"){
db <- "org.Hs.eg.db"
}
GOenrich_result_list <- list()
KEGGenrich_result_list <- list()
for (i in names(markers_list)){
print(paste("cluster",i,"enrichment",sep=" "))
if (nrow(markers_list[[i]]) == 0){
print(paste("nrow cluster",i,"is",nrow(markers_list[[i]]),sep=" "))
next
}
#GO富集分析
print(paste("cluster",i,"GO entichment",sep=" "))
GOenrich_result <- enrichGO(rownames(markers_list[[i]]),db,keyType="SYMBOL", ont="all", pvalueCutoff=0.05)
#画三种分类的图
print(paste("cluster",i,"GO all entichment",sep=" "))
if (nrow(as.data.frame(GOenrich_result)) > 0){
GO_bar_split <- barplot(GOenrich_result, split = "ONTOLOGY",showCategory=20) + #GOenrich_result不能是矩阵
facet_grid(ONTOLOGY~., scale = "free") +
scale_y_discrete(labels=function(x) str_wrap(x, width=100)) + #取消了iterm强制换行,需要加载stringr包
theme(axis.text.y=element_text(size=7))
try(ggsave(paste0("cluster_",i,"_GO_bar_all_plot.png"),GO_bar_split,path=path.out,width=15,height=10))
#只画BP的图
print(paste("cluster",i,"GO BP entichment",sep=" "))
GOenrich_result_BP <- enrichGO(rownames(markers_list[[i]]),db,keyType="SYMBOL", ont="BP", pvalueCutoff=0.05)
GO_bar_BP <- barplot(GOenrich_result_BP,showCategory=30,drop=T) +
theme(axis.text.y=element_text(size=item.size)) +
scale_y_discrete(labels=function(x) str_wrap(x, width=100)) #取消了iterm强制换行,需要加载stringr包
try(ggsave(paste0("cluster_",i,"_GO_bar_BP_plot.png"),GO_bar_BP,path=path.out,limitsize = FALSE))
}
#KEGG富集分析
ID_transform_table <- bitr(rownames(markers_list[[i]]),fromType="SYMBOL",toType="ENTREZID",OrgDb=db)
ID_Entrez_gene_set <- ID_transform_table$ENTREZID
print("KEGGenrich start")
KEGGenrich_result <- enrichKEGG(ID_Entrez_gene_set, organism = org, keyType = 'kegg', pvalueCutoff = 0.05,pAdjustMethod = 'BH', minGSSize = 10,maxGSSize = 500,use_internal_data = FALSE)
#绘制气泡图
print("KEGGenrich start map")
try(KEGG_dot <- dotplot(KEGGenrich_result, showCategory=20) + theme(axis.text.y=element_text(size=item.size)) +
scale_y_discrete(labels=function(x) str_wrap(x, width=100)))
print("map saved")
try(ggsave(paste0("cluster_",i,"_KEGG_dot_plot.png"),KEGG_dot,path=path.out,width=10,height=6))
try(GOenrich_result_list[[i]] <- GOenrich_result_BP)
try(KEGGenrich_result_list[[i]] <- KEGGenrich_result)
print("result saved")
}
print("start output")
write.xlsx(GOenrich_result_list,file = paste0(path.out,"/",tag,'_GOenrich_result_list.xlsx'),row.names=T,sep='\t',overwrite=T)
write.xlsx(KEGGenrich_result_list,file = paste0(path.out,"/",tag,'_KEGGenrich_result_list.xlsx'),row.names=T,sep='\t',overwrite=T)
saveRDS(GOenrich_result_list,file=paste0(path.out,"/",tag,'_GO_EnrichResult.rds'))
saveRDS(KEGGenrich_result_list,file=paste0(path.out,"/",tag,'_KEGG_EnrichResult.rds'))
}
3.加载数据
EnrichmentForMarkerlist(markers_list=all_cluster_markers,org="hsa",item.size=8,tag="clusters",path.out=path_out)