单细胞数据挖掘实战:文献复现(二)批量创建Seurat对象及质控
前面得到了samples_objects Seurat对象并进行了质控,下面进行降维、聚类和细胞注释,主要是复现一下文献中的Fig. 1b和Fig. 1c
一、高变基因
samples_objects <- lapply(seq_along(samples_objects), function(i) {
samples_objects[[i]] <- FindVariableFeatures(object = samples_objects[[i]],
selection.method = "vst",
nfeatures = 2000, verbose = FALSE)
})
names(samples_objects) <- names(samples_raw_data)
二、在Seurat对象中添加一些相关信息
- 文献中Fig. 1b和Fig. 1c主要用f_ctrl_1, f_tumor_1, m_ctrl_1, m_tumor_1这四个样本;
- 添加相关基因:用来计算细胞周期评分
#细胞周期相关基因
s_genes <- cc.genes$s.genes
g2m_genes <- cc.genes$g2m.genes
#小胶质细胞和巨噬细胞标志物
microglia_markers <- c("Tmem119", "P2ry12", "Sall1", "Pros1", "Crybb1")
macrophages_markers <- c("Itga4", "Tgfbi", "Cxcl2", "Ccr2", "Il10", "Fgr")
samples_objects <- lapply(seq_along(samples_objects), function(i) {
# 添加shortID
samples_objects[[i]]$shortID <- substr(as.character(samples_objects[[i]]$orig.ident), 15,
nchar(as.character(samples_objects[[i]]$orig.ident)))
# 添加sex和sex_condition
samples_objects[[i]]$sex <- substr(as.character(samples_objects[[i]]$shortID), 12, 12)
samples_objects[[i]]$sex_condition <- paste0(samples_objects[[i]]$sex, "_", samples_objects[[i]]$condition)
# 添加var.features
samples_objects[[i]]@assays$RNA@var.features <- unique(c(samples_objects[[i]]@assays$RNA@var.features,
s_genes, g2m_genes, microglia_markers, macrophages_markers))
samples_objects[[i]]
})
names(samples_objects) <- names(samples_raw_data)
三、按条件整合样本、PCA, t-SNE 及找细胞类群
设置参数
anchor_dims <- 30
pca_dims <- 30
clustering_resolution <- 0.3
n_features <- 2000
下面这个过程耗时较长
sex_condition_objects <- lapply(c(1,3,5,7), function(i) { # f_ctrl_1, f_tumor_1, m_ctrl_1, m_tumor_1
sex_condition <- unique(samples_objects[[i]]$sex_condition)
#按条件整合样本(f_ctrl_1, f_tumor_1, m_ctrl_1, m_tumor_1)
print(paste0("Find Integration Anchors: ", sex_condition))
samples_anchors <- FindIntegrationAnchors(object.list = list(samples_objects[[i]],
samples_objects[[i + 1]]),
dims = 1:anchor_dims,
anchor.features = n_features)
print(paste0("Integrate Data: ", sex_condition))
samples_integrated <- IntegrateData(anchorset = samples_anchors, dims = 1:anchor_dims)
DefaultAssay(object=samples_integrated) <- "integrated"
#细胞周期评分(CellCycleScoring函数)
print(paste0("Cell Cycle Scoring:", sex_condition))
samples_integrated <- CellCycleScoring(object = samples_integrated, s.features = s_genes,g2m.features = g2m_genes, set.ident = TRUE)
print(paste0("Scale Data: ", sex_condition, " regress out: nCount_RNA, percent_mito, CC_difference"))
#计算 G2M 和 S 期分数之间的差异,以分离非周期细胞和周期细胞
# Seurat细胞周期评分和回归
samples_integrated$CC_Difference <- samples_integrated$S.Score - samples_integrated$G2M.Score
samples_integrated <- ScaleData(object = samples_integrated, verbose = FALSE,
vars.to.regress = c("nCount_RNA",
"percent_mito",
"CC_Difference"))
print(paste0("Run PCA: ", sex_condition))
samples_integrated <- RunPCA(object = samples_integrated, npcs = pca_dims, verbose = FALSE)
print(paste0("Run t-SNE:", sex_condition))
samples_integrated <- RunTSNE(object = samples_integrated, reduction = "pca", dims=1:pca_dims)
print(paste0("Find Neighbors: ", sex_condition))
samples_integrated <- FindNeighbors(object = samples_integrated, dims = 1:pca_dims)
print(paste0("Find Clusters: ", sex_condition))
samples_integrated <- FindClusters(object = samples_integrated, resolution = clustering_resolution)
samples_integrated
})
names(sex_condition_objects) <- substring(names(samples_objects), 1, nchar(names(samples_objects))-2)[c(1,3,5,7)]
保存
saveRDS(sex_condition_objects, file = "sex_condition_objects.RDS")
四、细胞注释
- 细胞注释这一步文献中是手工注释的,需要找一些marker,然后根据marker基因在哪些cluster中表达来判断;
- 可以用DotPlot和FeaturePlot可视化marker基因;
- 文献中用到的marker在附件中,可以下载;
先看一下文献中的图
这里只尝试f-ctrl这个样本,其它三个大家可以自己去复现一下
先简单画个图,看下cluster
DimPlot(sex_condition_objects[[1]],label = T)
选取marker并作图观察
##这些marker来自文献中Fig. 1c中所示的基因
ctrl_gene_panel <- c("Cd14", "Tmem119", "P2ry12", "Csf1", "Crybb1", "Mcm5", "Ifit3", "Tgfbi", "Ifitm2", "Ifitm3", "S100a6", "Ly6c2", "Ccr2", "Mrc1", "Cd163", "Cd24a","Ncam1")
########################FeaturePlot###########
for (i in seq_along(ctrl_gene_panel)) {
FeaturePlot(sex_condition_objects[[1]], features = ctrl_gene_panel[i], coord.fixed = T, order = T)
ggsave2(paste0("FeaturePlot_tumor_gene_panel_", tumor_gene_panel[i], ".png"), path = "./annotation/f_ctrl", width = 10, height = 10, units = "cm")
}
#######################
#############################DotPlot#####################
p <- DotPlot(sex_condition_objects[[1]], features = ctrl_gene_panel,
assay='RNA' ) + coord_flip()
p
###############################################
需要对这些marker进行观察判断,然后根据marker对细胞进行注释,这里就按照文献里的注释进行下一步;
注释:Fig. 1b
#################################################
anno_cells <- c("MG1","MG2","MG3","BAM","MG4","MG5","pre-MG",
"pre-MG","pre-MG","pre-MG","MG6","pre-MG")
names(anno_cells) <- levels(sex_condition_objects[[1]])
sex_condition_objects[[1]] <- RenameIdents(sex_condition_objects[[1]],anno_cells)
DimPlot(sex_condition_objects[[1]],label = T,pt.size = 0.5,
cols = c("#9AEAFF","#41589C","#64A7D4","#0FD1AE",
"#3A2985","#427DB0","#29A5C8","#518FC3"))+NoLegend()
#################################################
和文献的图比较一下
总体感觉差不多,MG4位置有点不一样,不知道是不是因为R包版本不一致引起的?
Fig. 1c
DotPlot(sex_condition_objects[[1]], features = ctrl_gene_panel,
cols = c("#92cee9","#00ed01")) + RotatedAxis() +
theme(legend.position="top")
和文献的图比较一下
颜色需要调一下,字体什么的可以去AI里修改,这里只做了f_ctrl_1一个样本,还有f_tumor_1, m_ctrl_1, m_tumor_1三个样本,感兴趣的话可以去试一下。
往期单细胞数据挖掘实战: