具体改进:
- 使用了Seurat官网更新的最新标准化方法
SCTransform v2
,此方法在下游注释中可更佳清晰的分辨细胞亚群。 - 添加了
Harmony
算法以对样本间批次效应进行去除。 - 集成了
技能树jimmy
的最佳PC数量选定功能。
#R包安装
# devtools::install_github("satijalab/seurat", ref = "develop")
# BiocManager::install("glmGamPoi")
# devtools::install_github("satijalab/sctransform", ref = "develop")
# devtools::install_github("immunogenomics/harmony")
######################
library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)
library(harmony)
library(sctransform)
library(future)
library(glmGamPoi)
plan("multisession", workers = 16)
options(future.globals.maxSize= 1024^4)
plan()
rm(list=ls())
load(file = "data/0.raw_seurat_obj.rdata") #加载创建好的Seurat对象
####################取需要的样本(选用)############################
samplename=levels(sce)
samplename=as.data.frame(samplename)
write.table(samplename,"result/sample.txt",sep = "\t",col.names = T,row.names = F,quote = F)
#输出样本名,修改txt中为需要的样本后,读取回来取子集
subsamplename=read.table("data/sub-sample.txt",sep = "\t",header = T,quote = "",check.names = F)
subsamplename=subsamplename[,1]
sce = pbmc[, Idents(pbmc) %in% subsamplename]
sce$orig.ident=as.factor(as.character(sce$orig.ident))
#############SCTransform V2标准化质控降维#############################
sce[["percent.mt"]] <- PercentageFeatureSet(sce, pattern = "^MT-")
VlnPlot(sce, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
plot1 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(sce, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot1 + plot2
sce <- subset(sce, subset = nFeature_RNA > 200 & percent.mt < 2)
######Harmony去批次后降维####
sce <- SCTransform(sce,vst.flavor = "v2", verbose = FALSE, method = "glmGamPoi",vars.to.regress = "percent.mt")
sce=RunPCA(sce,assay="SCT",verbose = FALSE)
###########去批次
sce=RunHarmony(sce,group.by.vars="patient",assay.use="SCT", plot_convergence = TRUE,max.iter.harmony =50 )
#######最佳PC数量
pct <- sce [["harmony"]]@stdev / sum( sce [["harmony"]]@stdev) * 100
cumu <- cumsum(pct)
co1 <- which(cumu > 90 & pct < 5)[1]
co1
co2 <- sort(which((pct[1:length(pct) - 1] - pct[2:length(pct)]) > 0.1), decreasing = T)[1] + 1
co2
pcs <- min(co1, co2)
pcs
#获取了最佳PC用于UMAP和FindNeighbors
bestpc=1:pcs
sce<- sce %>% RunUMAP(reduction = "harmony", dims = bestpc) %>%
FindNeighbors(reduction = "harmony", dims = bestpc)
sce=FindClusters(sce,resolution = 0.5)#需要对粒度进行调整
#保存结果
pdf("new.result/01.main.cluster.sample.pdf",width = 10,height = 6)
DimPlot(sce, reduction = "umap", group.by = "patient",label = TRUE,repel = T, pt.size = .1)
dev.off()
pdf("new.result/01.main.cluster.pdf",width = 8,height = 6)
DimPlot(sce, reduction = "umap",label = TRUE,repel = T, pt.size = .1)
dev.off()
save(sce,file = "new.result/01.main.noann.rdata")
注意:
1. 对亚群注释寻找marker之前需要使用PrepSCTFindMarkers()
2. 取项目子集后需要重新标准化-去批次-降维聚类
3. 后续分析中如果要使用非SCT
数据,注意要进行标准化
参考来源:
https://github.com/immunogenomics/harmony/blob/master/docs/SeuratV3.html
https://www.jianshu.com/p/fb2e43905559
https://satijalab.org/seurat/articles/sctransform_v2_vignette.html
鸣谢:
I thank Dr.Jianming Zeng(University of Macau), and all the members of his bioinformatics team, biotrainee, for generously sharing their experience and codes.
问题交流:
Email: xuran@hrbmu.edu.cn