参考网站
R获取指定GO term和KEGG pathway的gene list基因集
使用R进行Gene Ontology(GO)富集分析过程中如何导出某一特定GO词条下所有的基因
笨方法可以去KEGG官网复制然后分列提取
library("KEGGREST")
names <- keggGet("hsa05215")[[1]]$GENE
namesodd <- names[seq(0,length(names),2)]
namestrue <- gsub("\\;.*","",namesodd)
write.csv(namestrue, file = "hsa05215",quote = F, row.names = F)
names <- keggGet("hsa04932")[[1]]$GENE
namesodd <- names[seq(0,length(names),2)]
namestrue <- gsub("\\;.*","",namesodd)
write.csv(namestrue, file = "hsa04932_Non-alcoholic fatty liver disease (NAFLD)",quote = F, row.names = F)
GO Term例子
本例子df输入数据结构为
df数据结构
rm(list=ls())
suppressMessages(library(tidyverse))
library(org.Hs.eg.db)
suppressMessages(library(plyr)) #用来合并不等长数据(rbind.fill)
get_GO_Gene_list<-function(rt){
gene_list<-data.frame()
for (i in 1:nrow(rt)){
go_id<-as.character(rt[i,2])
id <- data.frame(mget(go_id, org.Hs.egGO2ALLEGS)[[1]])
genes<-select(org.Hs.eg.db, keys=as.character(id[,1]), columns=c("SYMBOL","ENTREZID"), keytype="ENTREZID")
symbols<-as.character(genes$SYMBOL)
symbols<-symbols[!duplicated(symbols)]
genes_df<-t(as.data.frame(symbols))
gene_list<-rbind.fill(gene_list,as.data.frame(genes_df))
}
gene_list<-t(gene_list)
colnames(gene_list)<-as.character(rt[,1])
gene_list[is.na(gene_list)] <- ""
return(gene_list)
}
df_gene_list<-get_GO_Gene_list(df)