单细胞转录组测序分析--再说Seurat

上期我们讲了使用Seurat分析单个样本，那么2个或多个样本的分析会有哪些不同呢？单细胞转录组数据中一个重要的问题就在于批次效应，任何一个试验步骤都容易带来批次效应，多个样本的比较分析更甚。Seurat专门开发了一种能够比较多个样本的分析方法，我们一起来看看。
假设有2个样本，分别对2个样本进行数据的预处理及标准化，类似于单个样本的分析：

#样本1
matrix <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")
filter_10x_object <-CreateSeuratObject(raw.data = matrix,min.cells = 3)
filter_10x_object@meta.data$group <- "sample_1"
mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object@data), 
value = TRUE)
percent.mito <- Matrix::colSums(filter_10x_object@raw.data[mito.genes, ])/
Matrix::colSums(filter_10x_object@raw.data)
filter_10x_object <- AddMetaData(object = filter_10x_object, metadata 
= percent.mito,col.name = "percent.mito")
filter_10x_object <- FilterCells(object = filter_10x_object, subset.names =
 c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), 
high.thresholds = c(2500, 0.05))
filter_10x_object <- NormalizeData(object = filter_10x_object, 
normalization.method = "LogNormalize", scale.factor = 10000)
filter_10x_object<- ScaleData(filter_10x_object, vars.to.regress
 = c("nUMI", "percent.mito"), display.progress = F)
filter_10x_object <- FindVariableGenes(filter_10x_object, do.plot = F)
g.1 <- head(rownames(filter_10x_object@hvg.info), 1000)

#样本2
matrix2 <- Read10X_h5("filtered_gene_bc_matrices_h5.h5")
filter_10x_object_2<-CreateSeuratObject(raw.data = matrix2,min.cells = 3)
filter_10x_object_2@meta.data$group <- "sample_2"
mito.genes <- grep(pattern = "^MT-", x = rownames(x = filter_10x_object_2@data), 
value = TRUE)
percent.mito <- Matrix::colSums(filter_10x_object_2@raw.data[mito.genes, ])/
Matrix::colSums(filter_10x_object_2@raw.data)
filter_10x_object_2 <- AddMetaData(object = filter_10x_object_2, metadata 
= percent.mito,col.name = "percent.mito")
filter_10x_object_2<- FilterCells(object = filter_10x_object_2, subset.names =
 c("nGene", "percent.mito"), low.thresholds = c(200, -Inf), 
high.thresholds = c(2500, 0.05))
filter_10x_object_2<- NormalizeData(object = filter_10x_object_2, 
normalization.method = "LogNormalize", scale.factor = 10000)
filter_10x_object_2<- ScaleData(filter_10x_object_2, vars.to.regress
 = c("nUMI", "percent.mito"), display.progress = F)
filter_10x_object_2<- FindVariableGenes(filter_10x_object_2, do.plot = F)
g.2 <- head(rownames(filter_10x_object_2@hvg.info), 1000)

#选取2个样本高可变基因的交集作为后续分析的基因
genes.use <- unique(c(g.1, g.2))
genes.use <- intersect(genes.use, rownames(filter_10x_object@scale.data))
genes.use <- intersect(genes.use, rownames(filter_10x_object_2@scale.data))
#执行RunCCA函数，主要目的是鉴定2个数据集共有的变异源，同时也会将2个对象合并成一个对象，
#并进行数据的标准化和归一化（scale）
combined <- RunCCA(filter_10x_object, filter_10x_object_2, 
genes.use = genes.use, num.cc = 30,
add.cell.id1="sample_1",add.cell.id2="sample_2")
#如果是2个以上的样本，则需要使用RunMultiCCA函数

#类似于PCA分析的PC选择，执行RunCCA后也需要选择合适的CC个数进行后续分析
p3 <- MetageneBicorPlot(combined, grouping.var = "group", dims.eval = 1:30, 
display.progress = FALSE)

8.jpg

执行完上述步骤后进行align the CCA subspaces

combined <- AlignSubspace(combined, reduction.type = "cca", 
grouping.var = "group", dims.align = 1:20)
#执行完此步骤后会返回一个新的降维数据用于后续聚类分析
combined <- RunTSNE(combined, reduction.use = "cca.aligned", 
dims.use = 1:20, do.fast = T)
combined <- FindClusters(combined, reduction.type = "cca.aligned", 
resolution = 0.6, dims.use = 1:20)
#接下去就根据自己的分析需求进行多组数据比较分析了，这里就不介绍了

注：Seurat的多个样本合并分析，与10x官网的cellranger aggr分析相差很大。

单细胞转录组测序分析--再说Seurat

推荐阅读更多精彩内容