根据b站基因课视频修改,按照输入文件名输出四个富集图像。
knitr::opts_chunk$set(echo = TRUE)
library(readr)
library(dplyr)
library(clusterProfiler)
library(tidyr)
library(ggplot2)
library(org.hsa.eg.db)
#-------------------------------------------------------------------------------
#文件读取(只改变输入文件即可)
#-------------------------------------------------------------------------------
doc_name <- "DESeq2.DE_results" //两两比对的DE_results结果
#准备注释文件,根据教程生成 //https://www.jianshu.com/p/f2e4dbaae719
pathway2gene <- AnnotationDbi::select(org.hsa.db,
keys = keys(org.hsa.db),
columns = c("Pathway","KO")) %>%
na.omit() %>%
dplyr::select(Pathway, GID)
#准备基因列表
DE_results <- read_delim(doc_name,
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
#筛选前200个差异基因作为kegg富集对象
#-------------------------------------------------------------------------------
DE_results_kegg <- arrange(DE_results,desc(abs(log2FoldChange)))%>%
dplyr::slice(1:2000)%>%
dplyr::select(gene,log2FoldChange)
ekp <- enricher(DE_results_kegg$gene[1:200],
TERM2GENE = pathway2gene,
TERM2NAME = pathway2name,
pvalueCutoff = 0.05, # 表示全部保留,可以设为0.05作为阈值
qvalueCutoff = 0.05, # 表示全部保留,可以设为0.05作为阈值
pAdjustMethod = "BH",
minGSSize = 1)
png(filename = paste(doc_name, "_kegg.png", sep = ""), width = 700, height = 700, res = 100)
dotplot(ekp, showCategory = 10) # 只显示前10个类别
dev.off()
# 基因差异列表
geneList <- DE_results_kegg$log2FoldChange
names(geneList) <- DE_results_kegg$gene
geneList <- sort(geneList, decreasing = T)
png(filename = paste(doc_name, "_kegg_cent.png", sep = ""), width = 2000, height = 2000, res = 200)
cnetplot(ekp,
color.params = list(foldChange = geneList, edge = TRUE),
showCategory = 3,
node_label = "all", # category | gene | all | none
circular = TRUE)
dev.off()
#gos富集分析
#-------------------------------------------------------------------------------
ego <- enrichGO(gene=na.omit(DE_results_kegg$gene[1:200]),
OrgDb=org.hsa.eg.db,
keyType="GID",
ont="ALL", #CC/BP/MF可选
qvalueCutoff = 0.05,
pvalueCutoff =0.05)
png(filename = paste(doc_name, "_go.png", sep = ""), width = 700, height = 700, res = 100)
dotplot(ego, showCategory = 10) # 只显示前10个类别
dev.off()
#gos富集分析ggplot作图
go.res <- data.frame(ego)
goBP <- subset(go.res,subset = (ONTOLOGY == "BP"))[1:10,]
goCC <- subset(go.res,subset = (ONTOLOGY == "CC"))[1:10,]
goMF <- subset(go.res,subset = (ONTOLOGY == "MF"))[1:10,]
go.df <- rbind(goBP,goCC,goMF)
go.df$Description <- factor(go.df$Description,levels = rev(go.df$Description))
go_bar <- ggplot(data = go.df, # 绘图使用的数据
aes(x = Description, y = Count,fill = ONTOLOGY))+ # 横轴坐标及颜色分类填充
geom_bar(stat = "identity",width = 0.9)+ # 绘制条形图及宽度设置
coord_flip()+theme_bw()+ # 横纵坐标反转及去除背景色
scale_x_discrete(labels = function(x) str_wrap(x,width = 120))+ # 设置term名称过长时换行
labs(x = "GO terms",y = "GeneNumber",title = "Barplot of sx_jx Enriched GO Terms")+ # 设置坐标轴标题及标题
theme(axis.title = element_text(size = 13), # 坐标轴标题大小
axis.text = element_text(size = 11), # 坐标轴标签大小
plot.title = element_text(size = 14,hjust = 0.5,face = "bold"), # 标题设置
legend.title = element_text(size = 13), # 图例标题大小
legend.text = element_text(size = 11), # 图例标签大小
plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm")) # 图边距
ggsave(go_bar,filename = paste(doc_name, "_go_barplot.png", sep = ""),width = 15,height = 7)
#-------------------------------------------------------------------------------