ClusterGVis包是中国药科大学Jun Zhang博士开发的系列可视化工具包之一,前期该包主要设计用于Bluk-seq表达矩阵的处理,可同时绘制聚类+分组表达趋势折线图+功能注释的组合图,通过一张热图可以了解差异基因可以划分成几个cluster,每个cluster的表达随着时间是如何变化,以及这些cluster变化的基因通过GO或者KEGG功能注释了解其功能。后续增加了 prepareDataFromscRNA 函数来整理准备单细胞的数据
Step 1. 鉴定出细胞群的marker基因
# 安装并加载所需的R包
# install.packages("devtools")
# devtools::install_github("junjunlab/ClusterGVis")
library(ClusterGVis)
load("scRNA_harmony.Rdata")
markers <- FindAllMarkers(object = scRNA_harmony, test.use="wilcox" ,
only.pos = TRUE,
logfc.threshold = 0.25)
top10_gene = markers %>% group_by(cluster) %>% top_n(n = 5, wt=avg_log2FC)
head(top10_gene)
## # A tibble: 6 × 7
## # Groups: cluster [1]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 2.81 0.883 0.241 0 CD4 memory T cells IL7R
## 2 0 2.83 0.388 0.085 0 CD4 memory T cells CCR7
## 3 0 2.98 0.304 0.047 0 CD4 memory T cells CD40LG
## 4 0 2.82 0.245 0.057 0 CD4 memory T cells LEF1
## 5 0 2.73 0.228 0.054 0 CD4 memory T cells PASK
## 6 0 2.83 0.194 0.028 0 CD4 memory T cells MAL
Step 2. 准备绘图的输入数据
prepareDataFromscRNA以 Seurat 对象和差异表达结果作为输入。如果showAverage参数设置为 TRUE,则表示该函数将计算并绘制同一亚组细胞中基因的平均表达。如果设置为 FALSE,则将绘制所有单个细胞的数据。默认情况下,该函数使用Seurat 对象中 "RNA" 检测的数据槽
Idents(scRNA_harmony) <- scRNA_harmony$celltype_anno
# [1] 以细胞群的均值作为输入,且默认对基因进行去重
st.data1 <- prepareDataFromscRNA(object = scRNA_harmony,
diffData = top10_gene,
showAverage = TRUE)
# [2] 不对基因进行去重处理,添加后缀
st.data2 <- prepareDataFromscRNA(object = scRNA_harmony,
diffData = top10_gene,
showAverage = TRUE,
keep.uniqGene = FALSE,
sep = "_") #"gene_1","gene_2",....
# [3] 以每个细胞单独的表达作为输入
st.data3 <- prepareDataFromscRNA(object = scRNA_harmony,
diffData = top10_gene,
showAverage = FALSE)
Step 3. 通路富集分析,选取Top通路
enrich.go <- enrichCluster(object = st.data1, # st.data2,st.data3
OrgDb = org.Hs.eg.db,
type = "BP",
organism = "hsa",
pvalueCutoff = 0.5,
topn = 3,
seed = 5201314)
# check
head(enrich.go)
## group Description pvalue ratio
## GO:0050870 C1 positive regulation of T cell activation 2.074642e-06 44.44444
## GO:1903039 C1 positive regulation of leukocyte cell-cell adhesion 3.102708e-06 44.44444
## GO:0022409 C1 positive regulation of cell-cell adhesion 6.151074e-06 44.44444
## GO:0042267 C2 natural killer cell mediated cytotoxicity 3.286714e-08 40.00000
Step 4. 可视化
4.1 以细胞群的均值作为输入,且默认对基因进行去重
# add gene name
markGenes = unique(top10_gene$gene)[sample(1:length(unique(top10_gene$gene)),40, replace = F)]
# 线图
visCluster(object = st.data, plot.type = "line")
# 对集群进行排序
visCluster(object = st.data,
plot.type = "heatmap",
column_names_rot = 45,
markGenes = markGenes,
cluster.order = c(1:14))
# 添加注释
pdf('ClusterGVis_1.pdf', height = 12, width = 14, onefile = F)
visCluster(object = st.data1,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich,
line.side = "left",
cluster.order = c(1:14),
go.col = rep(jjAnno::useMyCol("stallion",n = 14),each = 3),
add.bar = T)
dev.off()
ClusterGVis_1
4.2不对基因进行去重处理,添加后缀
# line plot
visCluster(object = st.data2, plot.type = "line")
# heatmap plot
pdf('ClusterGVis_2.pdf',height = 6, width = 6, onefile = F)
visCluster(object = st.data2,
plot.type = "heatmap",
column_names_rot = 45,
markGenes = c("MS4A1","MS4A1_1","MS4A1_2"),
cluster.order = c(1:14))
dev.off()
ClusterGVis_2
4.3 显示所有细胞的表达
# heatmap plot
visCluster(object = st.data3,
plot.type = "heatmap",
markGenes = unique(top10_gene$gene),
column_title_rot = 45,
cluster.order = 1:14)
# 添加注释
pdf('ClusterGVis_3.pdf',height = 12,width = 16,onefile = F)
visCluster(object = st.data3,
plot.type = "both",
column_title_rot = 45,
markGenes = unique(top10_gene$gene),
markGenes.side = "left",
annoTerm.data = enrich,
line.side = "left",
cluster.order = c(1:14),
add.bar = T)
dev.off()
ClusterGVis_3.png
4.4 同时执行 GO 和 KEGG
enrich.kegg <- enrichCluster(object = st.data1,
OrgDb = org.Hs.eg.db,
type = "KEGG",
organism = "hsa",
pvalueCutoff = 0.05,
topn = 3,
seed = 5201314)
head(enrich.kegg)
## group Description pvalue ratio
## hsa05340...1 C1 Primary immunodeficiency 1.909178e-04 40.0
## hsa04060...2 C1 Cytokine-cytokine receptor interaction 3.984338e-04 60.0
## hsa04934 C1 Cushing syndrome 3.153205e-03 40.0
## hsa04612 C2 Antigen processing and presentation 4.189612e-14 87.5
## hsa04650 C2 Natural killer cell mediated cytotoxicity 1.572798e-12 87.5
## hsa05332 C2 Graft-versus-host disease 4.624360e-08 50.0
visCluster(object = st.data1,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 14),each = 3),
annoKegg.data = enrich.kegg,
#kegg.col = rep(jjAnno::useMyCol("stallion",n = 14),each = 3), # 这里我的kegg,有的组富集不到3
line.side = "left",
cluster.order = 1:14,
#sample.group = rep(c("sample1","sample2","sample3"),each = 2) # 这里我没有分组
)
# 格式化注释面板
visCluster(object = st.data1,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 14),each = 3),
by.go = "anno_block",
annoKegg.data = enrich.kegg,
#kegg.col = rep(jjAnno::useMyCol("stallion",n = 14),each = 3),
by.kegg = "anno_block",
word_wrap = F,add_new_line = F,
line.side = "left",
cluster.order = 1:14,
#sample.group = rep(c("sample1","sample2","sample3"),each = 2)
)
# 添加 GO 和 KEGG 的条形图
pdf('ClusterGVis_4.pdf',height = 16, width = 22,onefile = F)
visCluster(object = st.data1,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = enrich.go,
go.col = rep(jjAnno::useMyCol("calm",n = 14),each = 3),
add.bar = T,
by.go = "anno_block",
annoKegg.data = enrich.kegg,
# kegg.col = rep(jjAnno::useMyCol("stallion",n = 14),each = 3),
add.kegg.bar = T,
by.kegg = "anno_block",
word_wrap = F, add_new_line = F,
line.side = "left",
cluster.order = 1:14,
# sample.group = rep(c("sample1","sample2","sample3"),each = 2)
)
dev.off()
ClusterGVis_4.png
# 将 GO 和 KEGG 结果合并并绘制在一个面板中,使用不同的颜色区分
library(dplyr)
all <- rbind(enrich.go %>% group_by(group) %>% slice_head(n = 2) %>%
mutate(Description = paste("BP: ",Description,sep = "")),
enrich.kegg %>% group_by(group) %>% slice_head(n = 2) %>%
mutate(Description = paste("KEGG: ",Description,sep = ""))) %>%
arrange(group)
# plot
pdf('ClusterGVis_5.pdf',height = 16,width = 22,onefile = F)
visCluster(object = st.data1,
plot.type = "both",
column_names_rot = 45,
show_row_dend = F,
markGenes = markGenes,
markGenes.side = "left",
annoTerm.data = all,
go.col = c(rep(jjAnno::useMyCol("calm",n = 14),each = 2),
rep(jjAnno::useMyCol("circus",n = 14),each = 2)),
line.side = "left",
word_wrap = F,add_new_line = F,
by.go = "anno_block",
add.bar = T,
cluster.order = 1:14,
#sample.group = rep(c("sample1","sample2","sample3"),each = 2)
)
dev.off()
ClusterGVis_5.png