主要用到两个包,一个是GO分析的clusterProfiler包,一个是画图用的GOplot包。
R包下载及加载
install.packages("clusterProfiler")
install.packages("org.Hs.eg.db")
install.packages("GOplot")
install.packages("stringr")
library(clusterProfiler)
library(org.Hs.eg.db)
library(GOplot)
library(stringr)
导入前面的差异分析数据
load("GSE54236_deg.Rdata")
deg[1:4,1:4] #查看一下数据
# logFC AveExpr t P.Value
#CLEC1B -3.495104 8.500043 -8.506637 1.114997e-14
#CXCL14 -3.441625 10.542112 -7.771404 8.318569e-13
#FCN2 -3.394742 10.373211 -8.619044 5.693455e-15
#EPCAM -3.373903 8.789720 -7.337014 9.829431e-12
deg = deg[deg$g != "stable",]
gene = rownames(deg) #拿到差异表达基因名
GO分析
#基因转ID
sig_DP_entrezId = mapIds(x = org.Hs.eg.db,
keys = gene,
keytype = "SYMBOL",
column = "ENTREZID")
#去除NA值
table(is.na(sig_DP_entrezId))
sig_DP_entrezId <- na.omit(sig_DP_entrezId)
#GO分析
go_ALL <- enrichGO(gene = sig_DP_entrezId,
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
ont = "ALL",
pvalueCutoff = 0.5,
qvalueCutoff = 0.5,
readable = TRUE)
#将结果转为数据框
go_all <- as.data.frame(go_ALL@result)
GO结果画圈图前数据处理
colnames(go_all)
#[1] "ONTOLOGY" "ID" "Description" "GeneRatio"
#[5] "BgRatio" "pvalue" "p.adjust" "qvalue"
#[9] "geneID" "Count"
go_all=go_all[,c(1,2,3,9,7)] #只需要1,2,3,9,7列的数据
go_all$geneID <- str_replace_all(go_all$geneID,"/",",") #将geneID列的基因名用逗号替换/
names(go_all)=c("Category","ID","term","Genes","adj_pval") #重新命名列名
gene=data.frame(ID=gene,logFC=deg$logFC)
circ <- circle_dat(go_all,gene)
画圈图
GOCircle(circ, nsub = 10)
#参数nsub表示需要展示的GO Term数,可自己设置
#另一种画法
process=head(go_all[,3])
chord <- chord_dat(circ,gene,process)
#png
png(filename = "Chord.png",width = 1000,height = 1000)
GOChord(chord, space = 0.02, gene.order = 'logFC',
gene.space = 0.25, gene.size = 5)
dev.off()