从MSigDB获取指定的基因集
msigdbr_species()
# hallmarks
m_df = msigdbr(
species = "Homo sapiens",
category = "C2")
gs_names = unique(m_df$gs_name)
# search and import to list
keyword = c('KEGG')
if(T){
print(length(grep(keyword,gs_names)))
print(gs_names[grep(keyword,gs_names)])
}
gs_names[grep("ARACHIDONIC",gs_names)]
gs_names[grep("GLYCEROPHO",gs_names)]
add_list <- gs_names[grep(keyword,gs_names)]
list <- c()
list <- append(list,add_list)
list <- append(list,c(
"REACTOME_MITOCHONDRIAL_FATTY_ACID_BETA_OXIDATION_OF_UNSATURATED_FATTY_ACIDS",
"BIOCARTA_WNT_PATHWAY",
"KEGG_WNT_SIGNALING_PATHWAY",
"REACTOME_YAP1_AND_WWTR1_TAZ_STIMULATED_GENE_EXPRESSION",
"WNT_SIGNALING",
"REACTOME_HEDGEHOG_LIGAND_BIOGENESIS",
"KEGG_HEDGEHOG_SIGNALING_PATHWAY",
"REACTOME_SIGNALING_BY_HIPPO",
"BIOCARTA_NFKB_PATHWAY",
"BIOCARTA_NOTCH_PATHWAY",
"KEGG_NOTCH_SIGNALING_PATHWAY"
))
m_df_re <- m_df[which(m_df$gs_name%in%list),]
genes <- m_df_re$gene_symbol
m_list = m_df_re %>% split(x = .$gene_symbol, f = .$gs_name)
并用GSVA包包装成一个GeneSet
m_list <- m_df%>%
split(x = .$gene_symbol, f = .$gs_name)
head(m_list)
names(m_list)
geneSets <- lapply(names(m_list), function(name) {
GeneSet(geneIds = unique(m_list[[name]]),
setName = name,
geneIdType = SymbolIdentifier())
})
Hallmarks <- GeneSetCollection(geneSets)
print(length(Hallmarks))