1.有直接的标准10X数据(喜大普奔)
解压缩后可以得到三个文件(barcodes.tsv/genes.tsv/matrix.mtx),文件名修改到一模一样
例如:GSE106273
下载后三个文件,解压缩后文件名改为barcodes.tsv、genes.tsv、matrix.mtx(一个字也不差)
pbmc.data <- Read10X(data.dir = "C:/Users/fhche/Desktop/GSE106273")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc", min.cells = 3, min.features = 200)
head(pbmc@meta.data)
2.多个10X数据可以用merge函数合并
例如:GSE135927,只有一个raw data能下载
下载后整理成GSM4038043、GSM4038044两个文件夹,分别含有barcodes.tsv、genes.tsv、matrix.mtx三个文件
GSM4038043<- Read10X(data.dir = "C:/Users/fhche/Desktop/GSE135927/GSM4038043")
pbmc1 <- CreateSeuratObject(counts = GSM4038043,
min.cells = 3,
min.features = 200)
GSM4038044<- Read10X(data.dir = "C:/Users/fhche/Desktop/GSE135927/GSM4038044")
pbmc2 <- CreateSeuratObject(counts = GSM4038044,
min.cells = 3,
min.features = 200)
head(pbmc2@meta.data)
pbmc = merge(pbmc1, pbmc2,
add.cell.ids = c("GSM4038043", "GSM4038044"),
merge.data = TRUE)
as.data.frame(pbmc@assays$RNA@counts[1:10, 1:2])
head(pbmc@meta.data)
实际操作中,目录下需要存放gz压缩文件。两个以上数据合并代码如下:
##————————————————————多个样本合并-----------------------------
dirs <- list.dirs(".\\GSE173193_RAW\\") #取得样本文件所在目录路径,每个下面3个文件
dir_sample <- dirs[2:5]
dir_sample
dir_sample[3]
#读入四个样本
GSM5261695 <- Read10X(data.dir = dir_sample[1])
GSM5261696 <- Read10X(data.dir = dir_sample[2])
GSM5261699 <- Read10X(data.dir = dir_sample[3])
GSM5261700 <- Read10X(data.dir = dir_sample[4])
#生成四个Seurat对象,修改count,其余默认
pbmc1 <- CreateSeuratObject(counts = GSM5261695,min.cells = 3, min.features = 200)
pbmc2 <- CreateSeuratObject(counts = GSM5261696,min.cells = 3, min.features = 200)
pbmc3 <- CreateSeuratObject(counts = GSM5261699,min.cells = 3, min.features = 200)
pbmc4 <- CreateSeuratObject(counts = GSM5261700,min.cells = 3, min.features = 200)
head(pbmc2@meta.data)
#合并数据
pbmc = merge(x = pbmc1, y = c(pbmc2,pbmc3,pbmc4),
add.cell.ids = c("GSM5261695","GSM5261696","GSM5261699","GSM5261700"),
merge.data = TRUE)
as.data.frame(pbmc@assays$RNA@counts[1:10, 1:15])
head(pbmc@meta.data)
saveRDS(pbmc,"pe_and_control.rds")