将创建对象的过程记录
setwd("~/pyc/07-cellranger/01-Fudan_EEC_Normal/")
# library(scater)
library(dplyr)
library(Seurat)
library(patchwork)
library(SingleCellExperiment)
library(ComplexHeatmap)
library(ConsensusClusterPlus)
library(msigdbr)
library(fgsea)
library(dplyr)
# library(tibble)
library(DoubletFinder)
library(Signac)
library(ggplot2)
library(stringr)
library(SingleR)
library(psych)
library(clustree)
library(patchwork)
library(ggplot2)
library(reshape2)
library(RColorBrewer)
library(paletteer)
library(harmony)
library(RColorBrewer)
library(viridis)
library(wesanderson)
##### 01-1-设置文件读取目录 #####
## 批量读取数据
### 设置数据路径与样本名称
pwd <- getwd()
assays <- dir("./")
# (dir <- paste0(pwd, "/", assays,"/filtered_feature_bc_matrix/"))
(dir <- paste0(pwd, "/", assays,"/","outs/filtered_feature_bc_matrix/"))
# 按文件顺序给样本命名,名称不要以数字开头,中间不能有空格
samples_name = c('T_223', 'T_224', 'N_227', 'T_229', 'N_230', 'N_231',
'T_866', 'T_869', 'N_870', 'N_871')
##### 01-2-创建单细胞对象 #####
scRNAlist <- list()
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
#不设置min.cells过滤基因会导致CellCycleScoring报错:
#Insufficient data values to produce 24 bins.
scRNAlist[[i]] <- CreateSeuratObject(counts, project=samples_name[i],
min.cells=3, min.features = 200)
#给细胞barcode加个前缀,防止合并后barcode重名
scRNAlist[[i]] <- RenameCells(scRNAlist[[i]], add.cell.id = samples_name[i])
#计算线粒体基因比例
if(T){
scRNAlist[[i]][["percent.mt"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^MT-")
}
#计算核糖体基因比例
if(T){
scRNAlist[[i]][["percent.rb"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^RP[SL]")
}
#计算红细胞基因比例
if(T){
HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB.genes <- CaseMatch(HB.genes, rownames(scRNAlist[[i]]))
scRNAlist[[i]][["percent.HB"]]<-PercentageFeatureSet(scRNAlist[[i]], features=HB.genes)
}
}
### 给列表命名并保存数据
dir.create("../02_Integrate")
setwd("../02_Integrate/")
names(scRNAlist) <- samples_name
#system.time(save(scRNAlist, file = "Integrate/scRNAlist0.Rdata"))
system.time(saveRDS(scRNAlist, file = "scRNAlist0.rds"))
# "2023-11-08 15:07:35 CST" 在11月8日,到这一步。
#### 现在是2024-4-7,已经过去好久了! ####
pwd <- getwd()
assays <- dir("./")
# (dir <- paste0(pwd, "/", assays,"/filtered_feature_bc_matrix/"))
(dir <- paste0(pwd, "/", assays,"/","filtered_feature_bc_matrix/"))
(dir <- dir[1:5])
# 按文件顺序给样本命名,名称不要以数字开头,中间不能有空格
samples_name = c('A_225', 'A_226', 'A_228', 'A_867', 'A_868')
scRNAlist <- list()
for(i in 1:length(dir)){
counts <- Read10X(data.dir = dir[i])
#不设置min.cells过滤基因会导致CellCycleScoring报错:
#Insufficient data values to produce 24 bins.
scRNAlist[[i]] <- CreateSeuratObject(counts, project=samples_name[i],
min.cells=3, min.features = 200)
#给细胞barcode加个前缀,防止合并后barcode重名
scRNAlist[[i]] <- RenameCells(scRNAlist[[i]], add.cell.id = samples_name[i])
#计算线粒体基因比例
if(T){
scRNAlist[[i]][["percent.mt"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^MT-")
}
#计算核糖体基因比例
if(T){
scRNAlist[[i]][["percent.rb"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^RP[SL]")
}
#计算红细胞基因比例
if(T){
HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB.genes <- CaseMatch(HB.genes, rownames(scRNAlist[[i]]))
scRNAlist[[i]][["percent.HB"]]<-PercentageFeatureSet(scRNAlist[[i]], features=HB.genes)
}
}
names(scRNAlist) <- samples_name
scRNAlist1 <- c(scRNAlist0, scRNAlist)
# 最后merge
##### 01-3-Merge #####
scRNA <- merge(scRNAlist1[[1]], scRNAlist1[2:length(scRNAlist1)])
scRNA
# An object of class Seurat
# 43055 features across 142654 samples within 1 assay
# Active assay: RNA (43055 features, 0 variable features)
table(scRNA$orig.ident)
# A_225 A_226 A_228 A_867 A_868 N_227 N_230 N_231 N_870 N_871 T_223 T_224 T_229 T_866 T_869
# 7570 11054 6112 10677 8117 8126 8001 11993 7518 8281 9914 4876 12560 13611 14244
saveRDS(scRNA,file = 'scRNA_orig.rds')
# file = "/home/newdisk/pyc/07-cellranger/01-Fudan_EEC_Normal/scRNA_orig.rds"
rm(scRNAlist1)
# 拆分seurat对象
ifnb.list <- SplitObject(scRNA, split.by = "orig.ident")
最后的结果
比较有用的代码就是
Seurat V4版本拆分对象
参考
Seurat 4.0 | 单细胞转录组数据整合(scRNA-seq integration)
多个10x单细胞对象的合并和批次校正--seurat锚点整合+Harmony