参考公众号:生信会客厅
rm(list=ls())
options(stringsAsFactors = FALSE)
Sys.setenv(language='en')
#===多样本合并与批次校正===#
library(Seurat)
library(tidyverse)
library(patchwork)
dir.create('cluster1')
dir.create('cluster2')
dir.create('cluster3')
set.seed(123) #设置随机数种子,使结果可重复
##==合并数据集==##
使用目录向量合并
dir = c('Data/GSE139324/GSE139324_SUB/GSM4138110',
'Data/GSE139324/GSE139324_SUB/GSM4138111',
'Data/GSE139324/GSE139324_SUB/GSM4138128',
'Data/GSE139324/GSE139324_SUB/GSM4138129',
'Data/GSE139324/GSE139324_SUB/GSM4138148',
'Data/GSE139324/GSE139324_SUB/GSM4138149',
'Data/GSE139324/GSE139324_SUB/GSM4138162',
'Data/GSE139324/GSE139324_SUB/GSM4138163',
'Data/GSE139324/GSE139324_SUB/GSM4138168',
'Data/GSE139324/GSE139324_SUB/GSM4138169')
names(dir) = c('HNC01PBMC', 'HNC01TIL', 'HNC10PBMC', 'HNC10TIL', 'HNC20PBMC',
'HNC20TIL', 'PBMC1', 'PBMC2', 'Tonsil1', 'Tonsil2')
counts <- Read10X(data.dir = dir)
scRNA1 = CreateSeuratObject(counts, min.cells=1)
#查看样本信息
dim(scRNA1) #查看基因数和细胞总数
[1] 23603 19750
table(scRNA1@meta.data$orig.ident)
#HNC01PBMC HNC01TIL HNC10PBMC HNC10TIL HNC20PBMC HNC20TIL PBMC1
#1725 1298 1750 1384 1530 1148 2445
#PBMC2 Tonsil1 Tonsil2
#2436 3325 2709
使用merge函数合并seurat对象
scRNAlist <- list()
#以下代码会把每个样本的数据创建一个seurat对象,并存放到列表scRNAlist里
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
scRNAlist[[i]] <- CreateSeuratObject(counts, min.cells=1)
}
#使用merge函数将10个seurat对象合并成一个seurat对象
scRNA2 <- merge(scRNAlist[[1]], y=c(scRNAlist[[2]], scRNAlist[[3]],
scRNAlist[[4]], scRNAlist[[5]], scRNAlist[[6]], scRNAlist[[7]],
scRNAlist[[8]], scRNAlist[[9]], scRNAlist[[10]]))
#查看样本信息
dim(scRNA2)
#23603 19750
table(scRNA2@meta.data$orig.ident)
#HNC01PBMC HNC01TIL HNC10PBMC HNC10TIL HNC20PBMC HNC20TIL PBMC1
#1725 1298 1750 1384 1530 1148 2445
#PBMC2 Tonsil1 Tonsil2
#2436 3325 2709
通过最后的dim和table函数查看数据,可以发现两种方法得到的基因数和细胞数完全一样。下面我们降维聚类看看有没有差异:
scRNA1降维聚类
scRNA1 <- NormalizeData(scRNA1)
scRNA1 <- FindVariableFeatures(scRNA1, selection.method = "vst")
scRNA1 <- ScaleData(scRNA1, features = VariableFeatures(scRNA1))
scRNA1 <- RunPCA(scRNA1, features = VariableFeatures(scRNA1))
plot1 <- DimPlot(scRNA1, reduction = "pca", group.by="orig.ident")
plot2 <- ElbowPlot(scRNA1, ndims=30, reduction="pca")
plotc <- plot1+plot2
plotc
pc.num=1:30 #选取前30个主成分
scRNA1 <- FindNeighbors(scRNA1, dims = pc.num)
scRNA1 <- FindClusters(scRNA1, resolution = 0.5)
table(scRNA1@meta.data$seurat_clusters)
#0 1 2 3 4 5 6 7 8 9 10 11 12 13
#2592 2111 1917 1729 1365 1102 888 885 878 827 640 626 603 544
#14 15 16 17 18 19 20 21 22
#532 521 484 405 334 307 205 205 50
metadata <- scRNA1@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
降维
tsne
scRNA1 = RunTSNE(scRNA1, dims = pc.num)
#group_by_cluster
plot1 = DimPlot(scRNA1, reduction = "tsne", label=T)
#group_by_sample
plot2 = DimPlot(scRNA1, reduction = "tsne", group.by='orig.ident')
#combinate
plotc <- plot1+plot2
plotc
UMAP
scRNA1 <- RunUMAP(scRNA1, dims = pc.num)
#group_by_cluster
plot3 = DimPlot(scRNA1, reduction = "umap", label=T)
#group_by_sample
plot4 = DimPlot(scRNA1, reduction = "umap", group.by='orig.ident')
#combinate
plotc <- plot3+plot4
plotc
合并tSNE与UMAP
plotc <- plot2+plot4+ plot_layout(guides = 'collect')
scRNA2对象的降维聚类
scRNA2 <- NormalizeData(scRNA2)
scRNA2 <- FindVariableFeatures(scRNA2, selection.method = "vst")
scRNA2 <- ScaleData(scRNA2, features = VariableFeatures(scRNA2))
scRNA2 <- RunPCA(scRNA2, features = VariableFeatures(scRNA2))
#memory.limit(size=560000)
plot1 <- DimPlot(scRNA2, reduction = "pca", group.by="orig.ident")
plot2 <- ElbowPlot(scRNA2, ndims=30, reduction="pca")
plotc <- plot1+plot2
pc.num=1:30 #选取前30个主成分
scRNA2 <- FindNeighbors(scRNA2, dims = pc.num)
scRNA2 <- FindClusters(scRNA2, resolution = 0.5)
table(scRNA2@meta.data$seurat_clusters)
#0 1 2 3 4 5 6 7 8 9 10 11 12 13
#2592 2111 1917 1729 1365 1102 888 885 878 827 640 626 603 544
#14 15 16 17 18 19 20 21 22
#532 521 484 405 334 307 205 205 50
metadata <- scRNA2@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
tSNE
scRNA2 = RunTSNE(scRNA2, dims = pc.num)
#group_by_cluster
plot1 = DimPlot(scRNA2, reduction = "tsne", label=T)
#group_by_sample
plot2 = DimPlot(scRNA2, reduction = "tsne", group.by='orig.ident')
#combinate
plotc <- plot1+plot2
plotc
UMAP
scRNA2 <- RunUMAP(scRNA2, dims = pc.num)
#group_by_cluster
plot3 = DimPlot(scRNA2, reduction = "umap", label=T)
#group_by_sample
plot4 = DimPlot(scRNA2, reduction = "umap", group.by='orig.ident')
#combinate
plotc <- plot3+plot4
合并tSNE与UMAP
plotc <- plot2+plot4+ plot_layout(guides = 'collect')
通过降维图可以看出两种方法的结果完全一致。这两种方法真的没有一点差异吗,可以用GSE125449的数据集试试。
数据集整合
#scRNAlist是之前代码运行保存好的seurat对象列表,保存了10个样本的独立数据
##数据整合
#数据整合使用的是标准化之后的高变基因,因此要对每个样本进行数据标准化和高变基因选择
for (i in 1:length(scRNAlist)) {
scRNAlist[[i]] <- NormalizeData(scRNAlist[[i]])
scRNAlist[[i]] <- FindVariableFeatures(scRNAlist[[i]], selection.method = "vst")
}
##以VariableFeatures为基础寻找锚点,运行时间较长
(我花了1个小时。。。)
scRNA.anchors <- FindIntegrationAnchors(object.list = scRNAlist)
Computing 2000 integration features
No variable features found for object1 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object2 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object3 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object4 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object5 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object6 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object7 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object8 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object9 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
No variable features found for object10 in the object.list. Running FindVariableFeatures ...
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Scaling features for provided objects
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=20s
Finding all pairwise anchors
| | 0 % ~calculating Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3443 anchors
Filtering anchors
Retained 1740 anchors
|++ | 2 % ~30m 21s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5015 anchors
Filtering anchors
Retained 2719 anchors
|+++ | 4 % ~36m 53s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3174 anchors
Filtering anchors
Retained 1614 anchors
|++++ | 7 % ~32m 40s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3771 anchors
Filtering anchors
Retained 1543 anchors
|+++++ | 9 % ~36m 15s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3118 anchors
Filtering anchors
Retained 1964 anchors
|++++++ | 11% ~34m 25s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3610 anchors
Filtering anchors
Retained 1765 anchors
|+++++++ | 13% ~32m 57s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4164 anchors
Filtering anchors
Retained 2169 anchors
|++++++++ | 16% ~34m 56s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3191 anchors
Filtering anchors
Retained 1478 anchors
|+++++++++ | 18% ~36m 52s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4097 anchors
Filtering anchors
Retained 1886 anchors
|++++++++++ | 20% ~37m 51s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3681 anchors
Filtering anchors
Retained 1534 anchors
|++++++++++++ | 22% ~38m 10s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3785 anchors
Filtering anchors
Retained 1381 anchors
|+++++++++++++ | 24% ~37m 30s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 2812 anchors
Filtering anchors
Retained 1841 anchors
|++++++++++++++ | 27% ~35m 52s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3546 anchors
Filtering anchors
Retained 1198 anchors
|+++++++++++++++ | 29% ~35m 08s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3132 anchors
Filtering anchors
Retained 1991 anchors
|++++++++++++++++ | 31% ~33m 19s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3404 anchors
Filtering anchors
Retained 922 anchors
|+++++++++++++++++ | 33% ~31m 24s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5125 anchors
Filtering anchors
Retained 2419 anchors
|++++++++++++++++++ | 36% ~30m 30s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3659 anchors
Filtering anchors
Retained 1353 anchors
|+++++++++++++++++++ | 38% ~28m 58s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5046 anchors
Filtering anchors
Retained 2094 anchors
|++++++++++++++++++++ | 40% ~27m 41s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4041 anchors
Filtering anchors
Retained 1471 anchors
|++++++++++++++++++++++ | 42% ~26m 15s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5071 anchors
Filtering anchors
Retained 2004 anchors
|+++++++++++++++++++++++ | 44% ~24m 51s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3759 anchors
Filtering anchors
Retained 809 anchors
|++++++++++++++++++++++++ | 47% ~23m 20s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5115 anchors
Filtering anchors
Retained 2196 anchors
|+++++++++++++++++++++++++ | 49% ~22m 28s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3518 anchors
Filtering anchors
Retained 1241 anchors
|++++++++++++++++++++++++++ | 51% ~21m 18s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5092 anchors
Filtering anchors
Retained 1915 anchors
|+++++++++++++++++++++++++++ | 53% ~20m 09s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4115 anchors
Filtering anchors
Retained 1445 anchors
|++++++++++++++++++++++++++++ | 56% ~19m 01s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5230 anchors
Filtering anchors
Retained 2114 anchors
|+++++++++++++++++++++++++++++ | 58% ~18m 40s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3762 anchors
Filtering anchors
Retained 838 anchors
|++++++++++++++++++++++++++++++ | 60% ~18m 19s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 6433 anchors
Filtering anchors
Retained 2451 anchors
|++++++++++++++++++++++++++++++++ | 62% ~17m 56s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5834 anchors
Filtering anchors
Retained 885 anchors
|+++++++++++++++++++++++++++++++++ | 64% ~17m 12s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4037 anchors
Filtering anchors
Retained 1044 anchors
|++++++++++++++++++++++++++++++++++ | 67% ~16m 07s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5664 anchors
Filtering anchors
Retained 861 anchors
|+++++++++++++++++++++++++++++++++++ | 69% ~15m 05s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4308 anchors
Filtering anchors
Retained 1022 anchors
|++++++++++++++++++++++++++++++++++++ | 71% ~13m 55s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4926 anchors
Filtering anchors
Retained 882 anchors
|+++++++++++++++++++++++++++++++++++++ | 73% ~12m 51s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4051 anchors
Filtering anchors
Retained 1119 anchors
|++++++++++++++++++++++++++++++++++++++ | 76% ~11m 41s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 6298 anchors
Filtering anchors
Retained 1089 anchors
|+++++++++++++++++++++++++++++++++++++++ | 78% ~10m 45s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5990 anchors
Filtering anchors
Retained 1099 anchors
|++++++++++++++++++++++++++++++++++++++++ | 80% ~09m 47s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5449 anchors
Filtering anchors
Retained 937 anchors
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~08m 39s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3741 anchors
Filtering anchors
Retained 1295 anchors
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~07m 29s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5303 anchors
Filtering anchors
Retained 963 anchors
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~06m 31s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4090 anchors
Filtering anchors
Retained 1192 anchors
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~05m 27s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 4888 anchors
Filtering anchors
Retained 1028 anchors
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~04m 21s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 3724 anchors
Filtering anchors
Retained 1523 anchors
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~03m 15s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5930 anchors
Filtering anchors
Retained 1057 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~02m 11s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 5974 anchors
Filtering anchors
Retained 1168 anchors
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~01m 06s Running CCA
Merging objects
Finding neighborhoods
Finding anchors
Found 6395 anchors
Filtering anchors
Retained 2384 anchors
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=50m 49s
##利用锚点整合数据,运行时间较长
##电脑又卡了。。。
scRNA3 <- IntegrateData(anchorset = scRNA.anchors)
#查看数据集情况
DefaultAssay(scRNA)
dim(scRNA3)
#可以发现当前默认数据集是"integrated" ,基因数量只有2000个了。这是因为seurat整合数据时只用2000个高变基因。
scRNA3对象的降维聚类
#scRNA3在整合前已经进行了标准化和筛选高变基因,因此这里不必再重复了
scRNA3 <- ScaleData(scRNA3, features = VariableFeatures(scRNA3))
scRNA3 <- RunPCA(scRNA3, features = VariableFeatures(scRNA3))
plot1 <- DimPlot(scRNA3, reduction = "pca", group.by="orig.ident")
plot2 <- ElbowPlot(scRNA3, ndims=30, reduction="pca")
plotc <- plot1+plot2
print(c("请选择哪些pc轴用于后续分析?示例如下:","pc.num=1:15"))
pc.num=1:30 #选取前30个主成分
scRNA3 <- FindNeighbors(scRNA3, dims = pc.num)
scRNA3 <- FindClusters(scRNA3, resolution = 0.5)
table(scRNA3@meta.data$seurat_clusters)
metadata <- scRNA3@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
#tSNE
scRNA3 = RunTSNE(scRNA3, dims = pc.num)
#group_by_cluster
plot1 = DimPlot(scRNA3, reduction = "tsne", label=T)
ggsave("cluster3/tSNE.png", plot = plot1, width = 8, height = 7)
#group_by_sample
plot2 = DimPlot(scRNA3, reduction = "tsne", group.by='orig.ident')
ggsave("cluster3/tSNE_sample.png", plot = plot2, width = 8, height = 7)
#combinate
plotc <- plot1+plot2
#UMAP
scRNA3 <- RunUMAP(scRNA3, dims = pc.num)
#group_by_cluster
plot3 = DimPlot(scRNA3, reduction = "umap", label=T)
#group_by_sample
plot4 = DimPlot(scRNA3, reduction = "umap", group.by='orig.ident')
#combinate
plotc <- plot3+plot4
#合并tSNE与UMAP
plotc <- plot2+plot4+ plot_layout(guides = 'collect')
数据质控
##数据质控
scRNA <- scRNA3 #以后的分析使用整合的数据进行
#meta.data添加信息
proj_name <- data.frame(proj_name=rep("demo2",ncol(scRNA)))
rownames(proj_name) <- row.names(scRNA@meta.data)
scRNA <- AddMetaData(scRNA, proj_name)
#切换数据集
DefaultAssay(scRNA) <- "RNA"
#计算线粒体和红细胞基因比例
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^MT-")
#计算红细胞比例
HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB_m <- match(HB.genes, rownames(scRNA@assays$RNA))
HB.genes <- rownames(scRNA@assays$RNA)[HB_m]
HB.genes <- HB.genes[!is.na(HB.genes)]
scRNA[["percent.HB"]]<-PercentageFeatureSet(scRNA, features=HB.genes)
#head(scRNA@meta.data)
col.num <- length(levels(as.factor(scRNA@meta.data$orig.ident)))
#绘制小提琴图
#所有样本一个小提琴图用group.by="proj_name",每个样本一个小提琴图用group.by="orig.ident"
violin <-VlnPlot(scRNA, group.by = "proj_name",
features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.HB"),
cols =rainbow(col.num),
pt.size = 0.01, #不需要显示点,可以设置pt.size = 0
ncol = 4) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
plot1 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot3 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.HB")
pearplot <- CombinePlots(plots = list(plot1, plot2, plot3), nrow=1, legend="none")
设置质控标准
minGene=500
maxGene=3000
pctMT=10
数据质控
scRNA <- subset(scRNA, subset = nFeature_RNA > minGene & nFeature_RNA < maxGene & percent.mt < pctMT)
col.num <- length(levels(as.factor(scRNA@meta.data$orig.ident)))
violin <-VlnPlot(scRNA, group.by = "proj_name",
features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.HB"),
cols =rainbow(col.num),
pt.size = 0.1,
ncol = 4) +
theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
细胞类型鉴定
library(SingleR)
dir.create("CellType")
#BiocManager::install('celldex')
refdata <- MonacoImmuneData()
testdata <- GetAssayData(scRNA, slot="data")
clusters <- scRNA@meta.data$seurat_clusters
##使用Monaco参考数据库鉴定
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main,
method = "cluster", clusters = clusters,
assay.type.test = "logcounts", assay.type.ref = "logcounts")
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.csv(celltype,"CellType/celltype_Monaco.csv",row.names = F)
#将scRNA的注释写到metadata中
scRNA@meta.data$celltype_Monaco = "NA"
for(i in 1:nrow(celltype)){
scRNA@meta.data[which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype_Monaco'] <- celltype$celltype[i]}
table(scRNA@meta.data$celltype_Monaco)
B cells CD4+ T cells Dendritic cells Monocytes NK cells
3777 3449 205 2852 822
Progenitors T cells
106 7617
p1 = DimPlot(scRNA, group.by="celltype_Monaco", repel=T, label=T, label.size=5, reduction='tsne')
p2 = DimPlot(scRNA, group.by="celltype_Monaco", repel=T, label=T, label.size=5, reduction='umap')
p3 = p1+p2+ plot_layout(guides = 'collect')
使用DICE参考数据库鉴定
refdata <- DatabaseImmuneCellExpressionData()
testdata <- GetAssayData(scRNA, slot="data")
clusters <- scRNA@meta.data$seurat_clusters
cellpred <- SingleR(test = testdata, ref = refdata, labels = refdata$label.main,
method = "cluster", clusters = clusters,
assay.type.test = "logcounts", assay.type.ref = "logcounts")
celltype = data.frame(ClusterID=rownames(cellpred), celltype=cellpred$labels, stringsAsFactors = F)
write.csv(celltype,"CellType/celltype_DICE.csv",row.names = F)
scRNA@meta.data$celltype_DICE = "NA"
for(i in 1:nrow(celltype)){
scRNA@meta.data[which(scRNA@meta.data$seurat_clusters == celltype$ClusterID[i]),'celltype_DICE'] <- celltype$celltype[i]}
p4 = DimPlot(scRNA, group.by="celltype_DICE", repel=T, label=T, label.size=5, reduction='tsne')
p5 = DimPlot(scRNA, group.by="celltype_DICE", repel=T, label=T, label.size=5, reduction='umap')
p6 = p3+p4+ plot_layout(guides = 'collect')
对比两种数据库鉴定的结果
p8 = p1+p4
不知道为什么我跑出来的是一样的。。