这里我们要复现和学习的是一篇nature文章的图表,这篇文章提供了figure的代码以及数据,我们要复现的是Figure1b。图很简单,就是两部分组成,左边是聚类树,celltype的聚类,右侧是基因表达气泡图。虽然文章提供了代码,但是按照它的代码是不能复现这个图的,需要手动修饰才能达到效果,这里我们自行作图,参考一部分来完成这个图。
(reference:An atlas of epithelial cell states and plasticity in lung adenocarcinoma)
加载单细胞数据:
setwd('D:\\KS项目\\公众号文章\\复现nature文章-聚类+气泡图')
library(Seurat)
library(dplyr)
library(ggplot2)
library(corrplot)
library(RColorBrewer)
#load data
uterus <- readRDS('./uterus.rds')
首先是左侧聚类树,原文用的是;PCA input data for hierachical clustering,文章中并没有过多解释,看示例数据理解我认为用的是每个celltype PCA的平均值。
#PCA mean
mat.pca = uterus@reductions$pca@cell.embeddings
mat.pca = mat.pca[rownames(uterus@meta.data),]
mat.pca = as.data.frame(mat.pca)
mat.pca$celltype <- uterus@meta.data$celltype
library(tidyverse)
result <- mat.pca %>%
group_by(celltype) %>%
summarise(across(PC_1:PC_40, mean, na.rm = TRUE))
result <- result %>%
column_to_rownames(var = "celltype")
result <- t(result)
library(dendextend)
library(circlize)
hcAirway = hclust(dist(t(as.matrix((result)))),method = 'ward.D')
hcAirwayd = as.dendrogram(hcAirway)
hcAirwayd %>% set("leaves_pch", 19) -> hcAirwayd
pdf(paste0('AAA',Sys.Date(),'.pdf'),useDingbats = F)
par(mar = c(20,2,1,1))
plot(hcAirwayd,type = 'triangle')
colored_bars(colors = rep('black',6),dend = hcAirwayd, sort_by_labels_order = F)
dev.off()
image.png
以上plot代码来源于文章,很显然不适于修饰个性化,以及后面的拼图,所以我们使用ggplot系统作图:这就像样了。需要注意的是:点里面添加文字,我数据没有意义,直接随便添加的数字!
library(ggplot2)
library(ggdendro)
hc = hclust(dist(t(as.matrix((result)))),method = 'ward.D')
hc = as.dendrogram(hc)
hc_dat<-dendro_data(hc,type="triangle")
#plot
ggplot() +
geom_segment(data = hc_dat$segments, aes(x = x, y = -y, xend = xend, yend = -yend),size=1, color='#8c6bb1') +
geom_text(data = hc_dat$labels, aes(x = x, y = -y+5, label = label), hjust = 0, angle = 0) +
geom_point(data = hc_dat$labels, aes(x = x, y = -y, color = label),size=6)+
geom_text(data = hc_dat$labels, aes(x = x, y = -y, label = x))+
theme_dendro()+
coord_flip()+
ylim(-50,10)+
NoLegend()+
scale_color_manual(values = c("#d2981a", "#a53e1f", "#457277", "#8f657d", "#42819F", "#86AA7D", "#CBB396"))
image.png
这个聚类也不一定要按照这篇nature的这样用PC数据聚类,一般情况完全可以使用基因表达数据来做,而且从结果看,数据聚类更加合理:
#也可以使用基因表达数据绘制聚类树
Markers <- FindAllMarkers(uterus,only.pos= T, min.pct = 0.2, logfc.threshold = 0.25,verbose = F,max.cells.per.ident = 2000)
Markers <- subset(Markers[grep("^RP[L|S]",Markers$gene, ignore.case = FALSE,invert=TRUE),],subset=p_val_adj < 0.05)
Markers_av <- AverageExpression(uterus,group.by = "cell_type",features = unique(Markers$gene),assays = "RNA")
Markers_av <- Markers_av$RNA
gene_cell_exp <- t(scale(t(Markers_av),scale = T,center = T))
hc1 = hclust(dist(t(as.matrix((gene_cell_exp)))),method = 'ward.D')
hc1 = as.dendrogram(hc1)
hc_dat1<-dendro_data(hc1,type="triangle")
ggplot() +
geom_segment(data = hc_dat1$segments,
aes(x = x, y = -y, xend = xend, yend = -yend),#plot tree
size=1, color='#8c6bb1') +
geom_text(data = hc_dat1$labels, aes(x = x, y = -y+5, label = label),
hjust = 0, angle = 0)+
coord_flip()
接下来绘制基因表达气泡图,相信大家很熟悉了,我们公众号检索“气泡图”,有很多种个性化的修饰,不甚枚举。这里下方的注释需要自行手动修改,也有注释方式,比较繁琐,不再演示!
Idents(uterus) <- 'cell_type'
DefaultAssay(uterus) <- "RNA"
SMC_genes <- c("ACTA2", "RGS5")
MAC_genes <- c("MS4A6A", "CD68","LYZ")
LY_genes <- c("CCL5", "STK17B","PTPRC")
SF_genes <- c("DCN", "COL6A3", "LUM")
EC_genes <- c("PECAM1","PCDH17", "VWF")
UEP_genes <- c("EPCAM", "CDH1")
EP_genes <- c("FOXJ1","CDHR3","DYDC2")
levels(uterus) <-rev(c("Uepi","Ly","Cepi","Mac", "Sfib", "SMC","Endo"))
features <- list("Uepi" = UEP_genes,
"Ly" = LY_genes,
"Cepi" = EP_genes,
"Mac" = MAC_genes,
"Sfib" = SF_genes,
"SMC" = SMC_genes,
"Endo" = EC_genes)
p <- DotPlot(object = uterus, features=features)#一般plot,获取作图数据
#ggplot
#这里我们没有采取原文的做法,使用分面图
#下方的注释需要最后手动修改
dat <- p$data
p2 = ggplot(p$data, aes(x = features.plot, y = id)) +
geom_point(aes(size = pct.exp, fill = avg.exp.scaled),shape=21) +
facet_grid(facets = ~feature.groups, switch = "x", scales = "free_x", space = "free_x") +
scale_radius(breaks = c(25, 50, 75, 100), range = c(0,6)) +
theme_minimal() +
scale_fill_gradientn(colors = colorRampPalette(c('#fcfbfd', '#efedf5', '#dadaeb', '#bcbddc', '#9e9ac8', '#807dba', '#6a51a3', '#4a1486'))(50))+
theme(axis.text.x = element_text(angle = 90,
face = 1, size = 10,
family = "Arial Narrow",
hjust = 1, vjust = 0.5,
color = "black"),
axis.text.y = element_blank(),#y轴不显示文字,便于拼图
legend.text = element_text(size = 8, face = 1, family = "Arial Narrow"),
legend.title = element_text(size = 10, face = 1, family = "Arial Narrow"),
legend.position = 'top' ,
strip.placement = "outside",
strip.text.x = element_blank(),
axis.title = element_blank(),
axis.line.x = element_line(color = "#d2981a",size=2))+
guides(colour = guide_colourbar(title.vjust = 0.9, title.hjust = 0))+
labs(size = "Expression(%)", fill = "Average Expression")
image.png
组合figure:
library(patchwork)
p1+p2+plot_layout(ncol = 2, widths = c(0.5,1))
image.png