分析目的
- 不同于PCA聚类,geneNMF算法更适合局部聚类,比如epithelial cell做进一步的亚群分类。此外,类似的软件还有Hotspot, NMF等;
- 分出module可根据其代表性gene做功能富集分析,结果更加聚焦;
- 代表性gene也是后续靶向分析的gene,与cell function相关;
- 不同的module可作为不同的cluster,用作后续拟时序等分析。
代码及结果展示
1.#加载数据
load("/data/wanghao/yangjie_project/Project/1.OC_multi_sites/2.analysis/2.2_cell_cluster_merge/scdata1.Rdata")
scdata3 <- scdata1[,scdata1@meta.data$celltype2 == "Meso-like Fibroblast"]
2.#设置默认变量
Idents(scdata3) = scdata3$celltype2
DefaultAssay(scdata3) <- 'RNA'
3.#run NMF
seu.list <- SplitObject(scdata3, split.by = "orig.ident")
geneNMF.programs <- multiNMF(seu.list, assay="RNA",
k=2:10,
min.exp = 0.05,
nfeatures=2000)
4.#merge
geneNMF.metaprograms <- getMetaPrograms(geneNMF.programs,
metric = "cosine",
weight.explained = 0.5,
nMP=9,
max.genes=200)
5.#check
geneNMF.metaprograms$metaprograms.metrics
6.#plot
pdf('./geneNMF_program.pdf')
GeneNMF::plotMetaPrograms(geneNMF.metaprograms,similarity.cutoff = c(0.1,1))
dev.off()
7.#module genes
MP_genes <- unlist(geneNMF.metaprograms$metaprograms.genes)MP <- rep(names(geneNMF.metaprograms$metaprograms.genes), lengths(geneNMF.metaprograms$metaprograms.genes))
MP_genes <- data.frame(gene = MP_genes, MPs = MP)
聚类的module再做功能富集分析
run GO enrichment
1.#GO enrich功能富集
library(clusterProfiler)
Gene_ID <- bitr(MP_genes$gene, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")#构建文件并分析
data <- merge(Gene_ID,MP_genes,by.x='SYMBOL',by.y='gene')
data_GO <- compareCluster(
ENTREZID~MPs,
data=data,
fun="enrichGO",
OrgDb="org.Hs.eg.db",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05
)
2.#去除冗余terms
data_GO_sim <- simplify(data_GO,
cutoff=0.7,
by="p.adjust",
select_fun=min)
3.#作图及输出表格
p <- dotplot(data_GO_sim, showCategory=5,font.size = 8)
ggsave(p,filename='./MP_enrich.png',height=10,width=8) #需要自己挑选与课题相关的通路
data_GO_sim_fil <- data_GO_sim@compareClusterResult %>% dplyr::filter(p.adjust <=0.05)
library(openxlsx)
write.xlsx(data_GO_sim_fil,'Fibro_MP_GOenrich.xlsx')
run GSEA
library(msigdbr)
library(fgsea)
top_p <- lapply(geneNMF.metaprograms$metaprograms.genes, function(program) {
runGSEA(program, universe=rownames(scdata3), category = "C5", subcategory = "GO:BP")
})