根据rawdata算出来FPKM,然后进行单个基因与其他基因
之间相关性的计算
mutwt = read.csv("路径wtmut12-fpkm.csv", row.names=1,header=T)
#转置一下
mutwt_t <- t(mutwt)
转置后文件内容为:纵坐标是样本名称,横坐标是不同基因的Ensembl
#先写空向量
gene_name<-c()##也可用vector
cor_r<-c()
pvalue<-c()
#用循环来算相关性,
for (i in 1:ncol(mutwt_t)){
g1=colnames(mutwt_t)[i]
c_r=cor(mutwt_t$目标基因,mutwt_t[,i],method="pearson")
p=cor.test(mutwt_t$目标基因,mutwt_t[,i],method ="pearson")[[3]]
gene_name=c(gene_name,g1)
cor_r=c(cor_r,c_r)
pvalue=c(pvalue,p)
}
cor_result <- data.frame(gene_name, cor_r, pvalue)
write.csv(cor_result,"路径/cor_result_withsymbol.csv",quote=F)
这样就算好了基因间的相关性系数
接下来做GSEA分析
#GSEA分析
cor_result_rowname <- read.csv("路径/目标基因与其他基因相关性分析.csv", row.names=1, header = T)
row.names(cor_result_rowname) <- cor_result_rowname[,1]
#转换ID 需要ENTREZID作GSEA分析
geneList_tr <- bitr(row.names(cor_result_rowname),
fromType = "ENSEMBL",
toType = c("ENTREZID", "SYMBOL"),
OrgDb = org.Hs.eg.db)
colnames(cor_result_rowname)[1] <- c("ENSEMBL")
cor_result_withsymbol <- merge(cor_result_rowname, geneList_tr)
#按相关性系数进行降序排序
sortlist = cor_result_withsymbol[order(cor_result_withsymbol$cor_r,decreasing = T),]
newlist <- sortlist$cor_r
names(newlist) <- cor_result_withsymbol$ENTREZID
#去掉Na,如果不去掉可能会导致下一步失败
newlist<-na.omit(newlist)
准备作图
library(stringr)
library(clusterProfiler)
library(org.Hs.eg.db)
go_result <- gseGO(newlist,
ont = "BP",
OrgDb = org.Hs.eg.db,
keyType = "ENTREZID",
pvalueCutoff = 0.05,
pAdjustMethod = "BH")#p值校正方法
kegg_result <- gseKEGG(
newlist,
organism = "hsa",
keyType = "kegg",
exponent = 1,
minGSSize = 10,
maxGSSize = 500,
eps = 1e-10,
pvalueCutoff = 0.05,
pAdjustMethod = "BH",
verbose = TRUE,
use_internal_data = FALSE,
seed = FALSE,
by = "fgsea")
head(kegg_result)
dim(kegg_result)
#按照enrichment score从高到低排序,便于查看富集通路
write.table(kegg_result, "路径/GSEA富集_cor.txt",sep = "\t",quote = F,col.names = T,row.names = F)
可以在这个表中看具体的通路信息
开始画图
library(enrichplot)
#感兴趣的通路写在一个path里
paths <- c("hsa04724", "hsa04727","hsa04150","hsa04151")
gseaplot2(kegg_result, paths, pvalue_table = TRUE)
gseaplot2(
kegg_result, #gseaResult object,即GSEA结果
"hsa04727",#富集的ID编号
#标题
color = "green",#GSEA线条颜色
base_size = 11,#基础字体大小
rel_heights = c(1.5, 0.5, 1),#副图的相对高度
subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图,subplots=1#只要第一个图
pvalue_table = FALSE, #是否添加 pvalue table
ES_geom = "line") #running enrichment score用先还是用点ES_geom = "dot"