【R常用代码】MSigDB基因集包装为GeneSet

从MSigDB获取指定的基因集

msigdbr_species()
# hallmarks
m_df = msigdbr(
  species = "Homo sapiens", 
  category = "C2")
gs_names = unique(m_df$gs_name)

# search and import to list
keyword = c('KEGG')

if(T){
  print(length(grep(keyword,gs_names)))
  print(gs_names[grep(keyword,gs_names)])
}
gs_names[grep("ARACHIDONIC",gs_names)]
gs_names[grep("GLYCEROPHO",gs_names)]


add_list <- gs_names[grep(keyword,gs_names)]
list <- c()
list <- append(list,add_list)
list <- append(list,c(
  "REACTOME_MITOCHONDRIAL_FATTY_ACID_BETA_OXIDATION_OF_UNSATURATED_FATTY_ACIDS",
  "BIOCARTA_WNT_PATHWAY",
  "KEGG_WNT_SIGNALING_PATHWAY",
  "REACTOME_YAP1_AND_WWTR1_TAZ_STIMULATED_GENE_EXPRESSION",
  "WNT_SIGNALING",
  "REACTOME_HEDGEHOG_LIGAND_BIOGENESIS",
  "KEGG_HEDGEHOG_SIGNALING_PATHWAY",
  "REACTOME_SIGNALING_BY_HIPPO",
  "BIOCARTA_NFKB_PATHWAY",
  "BIOCARTA_NOTCH_PATHWAY",
  "KEGG_NOTCH_SIGNALING_PATHWAY"
))
m_df_re <- m_df[which(m_df$gs_name%in%list),]
genes <- m_df_re$gene_symbol
m_list = m_df_re %>% split(x = .$gene_symbol, f = .$gs_name)

并用GSVA包包装成一个GeneSet

  m_list <- m_df%>%
    split(x = .$gene_symbol, f = .$gs_name)
  head(m_list)
  names(m_list)
  
  geneSets <- lapply(names(m_list), function(name) {
    
    GeneSet(geneIds = unique(m_list[[name]]),
            setName = name,
            geneIdType = SymbolIdentifier())
  })
  
  Hallmarks <- GeneSetCollection(geneSets)
  print(length(Hallmarks))
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容