单细胞数据挖掘实战:文献复现(十)完结篇

单细胞数据挖掘实战:文献复现(一)批量读取数据

单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控

单细胞数据挖掘实战:文献复现(三)降维、聚类和细胞注释

单细胞数据挖掘实战:文献复现(四)细胞比例饼图

单细胞数据挖掘实战:文献复现(五)细胞亚群并可视化

单细胞数据挖掘实战:文献复现(六)标记基因及可视化

单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分

单细胞数据挖掘实战:文献复现(八)marker基因在Hom-MG、 Act-MG 和 Mo/MΦ 细胞中的表达情况

单细胞数据挖掘实战:文献复现(九)基因GO分析及cnetplot可视化结果

这篇文献的最后一个结果是介绍MHCII基因小胶质细胞表达的性别相关差异,结果说明MHCII 和Cd74基因的表达在男性神经胶质瘤的小胶质细胞和单核细胞/巨噬细胞中更为丰富,结果通过Fig. 5来展示。

一、加载R包

if (T) {
  if(!require(BiocManager))install.packages("BiocManager")
  if(!require(Seurat))install.packages("Seurat")
  if(!require(Matrix))install.packages("Matrix")
  if(!require(ggplot2))install.packages("ggplot2")
  if(!require(cowplot))install.packages("cowplot")
  if(!require(magrittr))install.packages("magrittr")
  if(!require(dplyr))install.packages("dplyr")
  if(!require(purrr))install.packages("purrr")
  if(!require(ggrepel))install.packages("ggrepel")
  if(!require(ggridges))install.packages("ggridges")
  if(!require(ggpubr))install.packages("ggpubr")
}

二、添加sex_condition

table(seu_object$orig.ident)
#GSM4039241-F-ctrl-1  GSM4039242-F-ctrl-2 GSM4039243-F-tumor-1 
#                5151                 4781                 4878 
#GSM4039244-F-tumor-2  GSM4039245-M-ctrl-1  GSM4039246-M-ctrl-2 
#                5467                 4786                 5218 
#GSM4039247-M-tumor-1 GSM4039248-M-tumor-2 
#                4091                 4891
group <- c(rep("F_ctrl",times = 5151),
           rep("F_ctrl",times = 4781),
           rep("F_tumor",times = 4878),
           rep("F_tumor",times = 5467),
           rep("M_ctrl",times = 4786),
           rep("M_ctrl",times = 5218),
           rep("M_tumor",times = 4091),
           rep("M_tumor",times = 4891))
seu_object$sex_condition <- group
# Figure 5a
DimPlot(seu_object, group.by = "sex_condition")
1.png

三、Act-MG 和 Mo/MΦ 中不同性别的 DEG

table(Idents(seu_object))
#   MG Mo/MΦ   BAM 
#31959  5624  1680
#选出"MG","Mo/MΦ"
seu_object <- subset(seu_object, idents = c("MG","Mo/MΦ"))
table(Idents(seu_object))
#   MG Mo/MΦ 
#31959  5624
#将Microglia细分为"h.Microglia"和"a.Microglia"
Idents(seu_object) <- seu_object$seurat_clusters
seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object), 
                                                 from=c(0:4,6:11,13,14,16,18,19), 
                                                 to = c("h.Microglia",  "h.Microglia", "a.Microglia",
                                                        "h.Microglia", "Macrophages", "h.Microglia",  
                                                        "h.Microglia", "Macrophages","a.Microglia", 
                                                        "a.Microglia", "Macrophages","a.Microglia", 
                                                        "h.Microglia", "Macrophages" ,"Macrophages", 
                                                        "Macrophages"))
Idents(seu_object) <- seu_object$cell_type_4_groups
table(Idents(seu_object))
#h.Microglia a.Microglia Macrophages 
#      24728        7231        5624
object_tmp <- seu_object
object_tmp$sex <- substr(seu_object$sex_condition,1,1)
object_tmp$sex_cell_type <- paste0(object_tmp$sex, "_", as.character(Idents(seu_object)))
Idents(object_tmp) <- object_tmp$sex_cell_type
table(Idents(object_tmp))
#F_h.Microglia F_a.Microglia F_Macrophages M_h.Microglia M_Macrophages 
#        13273          3496          2691         11455          2933 
#M_a.Microglia 
#         3735 
#找marker基因
markers_male_ActMG <- FindMarkers(object = object_tmp, ident.1 = "M_a.Microglia", 
                                  ident.2 = "F_a.Microglia", only.pos = TRUE, min.pct = 0.25, 
                                  logfc.threshold = 0.0001)

markers_female_ActMG <- FindMarkers(object = object_tmp, ident.1 = "F_a.Microglia", 
                                    ident.2 = "M_a.Microglia", only.pos = TRUE, min.pct = 0.25, 
                                    logfc.threshold = 0.0001)
markers_female_ActMG$avg_log2FC <- markers_female_ActMG$avg_log2FC*-1

markers_male_MoM <- FindMarkers(object = object_tmp, ident.1 = "M_Macrophages", 
                                ident.2 = "F_Macrophages", only.pos = TRUE, min.pct = 0.25, 
                                logfc.threshold = 0.0001)

markers_female_MoM <- FindMarkers(object = object_tmp, ident.1 = "F_Macrophages", 
                                  ident.2 = "M_Macrophages", only.pos = TRUE, min.pct = 0.25, 
                                  logfc.threshold = 0.0001)
markers_female_MoM$avg_log2FC <- markers_female_MoM$avg_log2FC*-1
#放入一个list中循环进行条件筛选
markers_list <- list(markers_male_ActMG, markers_female_ActMG, markers_male_MoM, markers_female_MoM)
names(markers_list) <- c("markers_male_ActMG", "markers_female_ActMG", "markers_male_MoM", "markers_female_MoM")
markers_list <- lapply(markers_list, function(x) {
  x$gene <- rownames(x)
  x$padj_log10<- -log10(x$p_val_adj)
  x[x$padj_log10 == Inf, "padj_log10"]<-284
  x$log2FC <- log2(exp(x$avg_log2FC))
  x <- x %>% mutate(threshold = c(p_val_adj < 0.05 & abs(log2FC) > 0.5 & (pct.1>=0.25 | pct.2 >=0.25)))
  x$color <- x$threshold
  x[x$color == F, "color"] <- "below threshold"
  x[x$color == T, "color"]<-"male"
  x[x$color=="male" & x$log2FC<0, "color"]<-"female"
  x$color<-factor(x$color, levels=c("female", "male", "below threshold"))
  x<-na.omit(x)
  x <-x[rev(order(x$color)),]
  rownames(x)<-seq_along(x$p_val)
  x
})
#按性别进行整合
markers_ActMG_sex <- rbind(cbind(markers_list$markers_male_ActMG, sex="M"),
                           cbind(markers_list$markers_female_ActMG, sex = "F"))

markers_MoM_sex <- rbind(cbind(markers_list$markers_male_MoM, sex="M"),
                         cbind(markers_list$markers_female_MoM, sex = "F"))
markers_MoM_sex[markers_MoM_sex$padj_log10 > 149, "padj_log10"] <- 149

markers_ActMG_sex$color <- factor(markers_ActMG_sex$color, levels=c("female", "male", "below threshold"))
markers_MoM_sex$color <- factor(markers_MoM_sex$color, levels=c("female", "male", "below threshold"))
#画火山图
col_male<- "#4FCAFF"
col_female<- "#FFFF00"

MG<-ggplot(markers_ActMG_sex, aes(x=log2FC, y=padj_log10, fill=color))+
  geom_jitter(size=3, shape=21, color="black", stroke=0.01)+
  geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC>0,],
                  aes(label=gene), nudge_x = 2, nudge_y = 2,
                  color="black", size=6, force=5)+
  geom_text_repel(data=markers_ActMG_sex[markers_ActMG_sex$threshold==T & markers_ActMG_sex$log2FC<0,],
                  aes(label=gene), nudge_x = -2, nudge_y = 2,
                  color="black", size=6, force=5)+
  scale_fill_manual(values=c(col_female,col_male, "gray90"))+
  scale_x_continuous(limits=c(-4,4))+
  labs(title= "Act-MG",x="avg log2FC", y="log10 padj")+
  theme_light(base_size=18)+
  theme(panel.grid = element_blank(), legend.title = element_blank(),
        legend.text = element_text(size=18))+
  guides(fill = guide_legend(override.aes = list(size=6)))

MoM<-ggplot(markers_MoM_sex, aes(x=log2FC, y=padj_log10, fill=color))+
  geom_jitter(size=3, shape=21, color="black")+
  geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC>0 & markers_MoM_sex$gene != "AB124611",],
                  aes(label=gene), nudge_x = 2, nudge_y = 5,
                  color="black", size=6)+
  geom_text_repel(data=markers_MoM_sex[markers_MoM_sex$threshold==T & markers_MoM_sex$log2FC<0,],
                  aes(label=gene), nudge_x = -2.5, nudge_y = 2,
                  color="black", size=6)+
  scale_fill_manual(values=c(col_female,col_male, "gray90"))+
  scale_x_continuous(limits=c(-4,4))+
  labs(title= "Mo/M ", x="avg log2FC", y="log10 padj")+
  theme_light(base_size=18)+
  theme(panel.grid = element_blank(), legend.title = element_blank(),
        legend.text = element_text(size=18))+
  guides(fill = guide_legend(override.aes = list(size=6)))
#fig5b
pdf("fig5b.pdf",width = 20,height = 20)
ggarrange(MG, MoM, ncol=2, common.legend = T)
dev.off()
2.png
3.png

与文献结果基本一致。

Figure 5c

FeaturePlots for selected MHC II genes(Cd74, H2-Ab1, H2-Eb1, H2-Aa)

gene_to_plot <- "Cd74"
f5c_1 <- FeaturePlot(seu_object, features = gene_to_plot) + 
  scale_color_gradientn(colors=viridis::viridis(n=50)) +
  ggtitle(gene_to_plot)
gene_to_plot <- "H2-Ab1"
f5c_2 <- FeaturePlot(seu_object, features = gene_to_plot) + 
  scale_color_gradientn(colors=viridis::viridis(n=50)) +
  ggtitle(gene_to_plot)
gene_to_plot <- "H2-Eb1"
f5c_3 <- FeaturePlot(seu_object, features = gene_to_plot) + 
  scale_color_gradientn(colors=viridis::viridis(n=50)) +
  ggtitle(gene_to_plot)
gene_to_plot <- "H2-Aa"
f5c_4 <- FeaturePlot(seu_object, features = gene_to_plot) + 
  scale_color_gradientn(colors=viridis::viridis(n=50)) +
  ggtitle(gene_to_plot)
pdf("fig5c.pdf",width = 20,height = 5)
ggarrange(f5c_1, f5c_2,f5c_3,f5c_4, ncol=4, common.legend = T)
dev.off()
4.png

四、MHCII 基因和Cd74高表达的 Act-MG 和 intMoMΦ 群体中的富集情况

Figure 5d

rm(list = ls())
options(stringsAsFactors = F)
seu_object = readRDS("seu_object_merge_prediff.RDS")
seu_object <- ScaleData(seu_object)

#添加性别
table(seu_object$orig.ident)
group <- c(rep("F_ctrl",times = 5151),
           rep("F_ctrl",times = 4781),
           rep("F_tumor",times = 4878),
           rep("F_tumor",times = 5467),
           rep("M_ctrl",times = 4786),
           rep("M_ctrl",times = 5218),
           rep("M_tumor",times = 4091),
           rep("M_tumor",times = 4891))
seu_object$sex_condition <- group

#将Microglia细分为"h.Microglia"和"a.Microglia"
Idents(seu_object) <- seu_object$seurat_clusters
seu_object$cell_type_4_groups <- plyr::mapvalues(Idents(seu_object), 
                                                 from=c(0:11,13,14,16,18,19), 
                                                 to = c("h.Microglia",  "h.Microglia", "a.Microglia","h.Microglia","BAM" ,"Macrophages", "h.Microglia",  
                                                        "h.Microglia", "Macrophages","a.Microglia","a.Microglia", "Macrophages","a.Microglia", 
                                                        "h.Microglia", "Macrophages" ,"Macrophages","Macrophages"))



genes<-c("Cd74", "H2-Ab1", "H2-Eb1", "H2-Aa" )

gene_expression_data <- GetAssayData(object = seu_object, slot = "data")
gene_expression_data <- as.data.frame(t(gene_expression_data[genes,]))
gene_expression_data$cell_types_4_groups <- as.character(Idents(seu_object))
gene_expression_data$clusters_0.6 <- seu_object$RNA_snn_res.0.6

gene_expression_data$cell_types_6_groups <- NULL
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 11] <- "Mo"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 4] <- "intMoM"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(8,16,18,19)] <- "M"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 == 5] <- "BAM"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(0,1,3,6,7,14)] <- "h.Microglia"
gene_expression_data$cell_types_6_groups[gene_expression_data$clusters_0.6 %in% c(2,9,10,13)] <- "a.Microglia"

gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups)
gene_expression_data$cell_types_6_groups <- factor(gene_expression_data$cell_types_6_groups, 
                                                   levels = levels(gene_expression_data$cell_types_6_groups)[c(3,1,6,4,5,2)])

seu_object$sex <- substr(seu_object$sex_condition,1,1)
gene_expression_data$sex <- seu_object$sex

graphs<-list() 
for (i in seq_along(genes)){ 
  plot<-ggplot(gene_expression_data, aes(x="", y=cell_types_6_groups, fill=sex))+
    geom_density_ridges(aes_string(x=as.name(genes[i])), alpha=0.65, scale=1 )+
    scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
    xlab("expression level")+
    ylab("")+
    labs(title=genes[i])+
    theme_classic(base_size=18)+
    theme(axis.text = element_text(size=16), axis.text.y = element_text(face="bold", size=18),
          axis.title=element_text(size=16),
          title = element_text(size = 20), legend.text = element_text(size=18),
          legend.title = element_blank())
  graphs[[i]]<-plot
}
if(!require(ggpubr))install.packages("ggpubr")
pdf("fig_5d2.pdf", width = 20,height = 5)
ggarrange(plotlist=graphs, ncol=4, nrow=1, common.legend = T, legend="top")
dev.off()
5.png

Figure 5f

gene_expression_data$mhc2_score <- rowMeans(gene_expression_data[, c("H2-Ab1", "H2-Eb1", "H2-Aa")])
gene_expression_data <- gene_expression_data  %>% mutate(threshold = mhc2_score > 2)
gene_expression_data[gene_expression_data$threshold==T, "threshold"]<-"MHCII High"
gene_expression_data[gene_expression_data$threshold==F, "threshold"]<-"MHCII Low"
gene_expression_data$threshold <- factor(gene_expression_data$threshold, 
                                         levels = c("MHCII Low", "MHCII High"))

gene_expression_data_all_genes <- GetAssayData(object = seu_object, slot = "data")
gene_expression_data_ciita_mif <- as.data.frame(t(gene_expression_data_all_genes[c("Ciita", "Mif"), ]))
colnames(gene_expression_data_ciita_mif) <- c("Ciita", "Mif")
gene_expression_data <- cbind(gene_expression_data, gene_expression_data_ciita_mif)

gene_expression_data <- gene_expression_data[gene_expression_data$cell_types_6_groups %in% 
                                               c("a.Microglia", "intMoM"), ]
gene_expression_data$cell_types_selected <- factor(gene_expression_data$cell_types_6_groups, 
                                                   levels = c("a.Microglia", "intMoM"))

p5f_1 <- ggplot(gene_expression_data, aes(x=mhc2_score, y=cell_types_selected, fill=sex))+
  geom_density_ridges(alpha=0.65, scale=1 )+
  scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
  geom_vline(xintercept=2, color="red", linetype="dashed", size=1)+
  xlab("MHCII score (H2-Ab1, H2-Eb1, H2-Aa)")+
  ylab("")+
  theme_classic(base_size=18)+
  theme(axis.text.y = element_text(face="bold"),
        title = element_text(size = 20), axis.title.y = element_text(size=16),
        legend.text = element_text(size=18), legend.title = element_blank(),
        axis.text = element_text(size=18), legend.position = "top")

p5f_2 <- ggplot(gene_expression_data, aes(x=cell_types_selected, y=Mif, fill=sex))+
  geom_violin(scale="count")+
  scale_fill_manual(values=c( "#ffff00",  "#4FCAFF"))+
  facet_grid(~threshold)+
  ylab("Mif expression level")+
  xlab("")+
  theme_classic(base_size=18)+
  theme(axis.text = element_text(size=18), legend.position = "none")

pdf("fig_5f.pdf", width = 10,height = 10)
ggarrange(p5f_1,p5f_2, ncol=1, nrow=2)
dev.off()
6.png

本系列单细胞文献复现已经完结,从最开始的读取数据到创建Seurat对象及质控以及降维、聚类和细胞注释并到最后的marker基因等等步骤,按照原文中的5个Fig结果进行复现,总体感觉复现结果还行,自己也学到了不少东西,不过还有很多细节需要仔细琢磨和体会。

往期单细胞数据挖掘实战

单细胞数据挖掘实战:文献复现(一)批量读取数据

单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控

单细胞数据挖掘实战:文献复现(三)降维、聚类和细胞注释

单细胞数据挖掘实战:文献复现(四)细胞比例饼图

单细胞数据挖掘实战:文献复现(五)细胞亚群并可视化

单细胞数据挖掘实战:文献复现(六)标记基因及可视化

单细胞数据挖掘实战:文献复现(七)MG 和 Mo/MΦ 评分

单细胞数据挖掘实战:文献复现(八)marker基因在Hom-MG、 Act-MG 和 Mo/MΦ 细胞中的表达情况

单细胞数据挖掘实战:文献复现(九)基因GO分析及cnetplot可视化结果

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容