2024-04-08-创建单细胞对象

将创建对象的过程记录

setwd("~/pyc/07-cellranger/01-Fudan_EEC_Normal/")

# library(scater)
library(dplyr)
library(Seurat)
library(patchwork)
library(SingleCellExperiment)

library(ComplexHeatmap)
library(ConsensusClusterPlus)
library(msigdbr)
library(fgsea)
library(dplyr)
# library(tibble)
library(DoubletFinder)
library(Signac)
library(ggplot2)
library(stringr)
library(SingleR)
library(psych)
library(clustree)
library(patchwork)
library(ggplot2)
library(reshape2)
library(RColorBrewer)
library(paletteer)
library(harmony)
library(RColorBrewer) 
library(viridis)
library(wesanderson)


##### 01-1-设置文件读取目录  #####
## 批量读取数据
### 设置数据路径与样本名称
pwd <- getwd()
assays <- dir("./")
# (dir <- paste0(pwd, "/", assays,"/filtered_feature_bc_matrix/"))
(dir <- paste0(pwd, "/", assays,"/","outs/filtered_feature_bc_matrix/"))
# 按文件顺序给样本命名,名称不要以数字开头,中间不能有空格 
samples_name = c('T_223', 'T_224', 'N_227', 'T_229', 'N_230', 'N_231',
                 
                 'T_866', 'T_869', 'N_870', 'N_871')

##### 01-2-创建单细胞对象  #####
scRNAlist <- list()
for(i in 1:length(dir)){
  counts <- Read10X(data.dir = dir[i])
  #不设置min.cells过滤基因会导致CellCycleScoring报错:
  #Insufficient data values to produce 24 bins.  
  scRNAlist[[i]] <- CreateSeuratObject(counts, project=samples_name[i],
                                       min.cells=3, min.features = 200)
  #给细胞barcode加个前缀,防止合并后barcode重名
  scRNAlist[[i]] <- RenameCells(scRNAlist[[i]], add.cell.id = samples_name[i])   
  #计算线粒体基因比例
  if(T){    
    scRNAlist[[i]][["percent.mt"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^MT-") 
  }
  #计算核糖体基因比例
  if(T){
    scRNAlist[[i]][["percent.rb"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^RP[SL]")
  }
  #计算红细胞基因比例
  if(T){
    HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
    HB.genes <- CaseMatch(HB.genes, rownames(scRNAlist[[i]]))
    scRNAlist[[i]][["percent.HB"]]<-PercentageFeatureSet(scRNAlist[[i]], features=HB.genes) 
  }
}

### 给列表命名并保存数据
dir.create("../02_Integrate")
setwd("../02_Integrate/")

names(scRNAlist) <- samples_name
#system.time(save(scRNAlist, file = "Integrate/scRNAlist0.Rdata")) 
system.time(saveRDS(scRNAlist, file = "scRNAlist0.rds")) 
# "2023-11-08 15:07:35 CST" 在11月8日,到这一步。




#### 现在是2024-4-7,已经过去好久了! ####
pwd <- getwd()
assays <- dir("./")
# (dir <- paste0(pwd, "/", assays,"/filtered_feature_bc_matrix/"))
(dir <- paste0(pwd, "/", assays,"/","filtered_feature_bc_matrix/"))
(dir <- dir[1:5])
# 按文件顺序给样本命名,名称不要以数字开头,中间不能有空格 
samples_name = c('A_225', 'A_226', 'A_228', 'A_867', 'A_868')


scRNAlist <- list()
for(i in 1:length(dir)){
  counts <- Read10X(data.dir = dir[i])
  #不设置min.cells过滤基因会导致CellCycleScoring报错:
  #Insufficient data values to produce 24 bins.  
  scRNAlist[[i]] <- CreateSeuratObject(counts, project=samples_name[i],
                                       min.cells=3, min.features = 200)
  #给细胞barcode加个前缀,防止合并后barcode重名
  scRNAlist[[i]] <- RenameCells(scRNAlist[[i]], add.cell.id = samples_name[i])   
  #计算线粒体基因比例
  if(T){    
    scRNAlist[[i]][["percent.mt"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^MT-") 
  }
  #计算核糖体基因比例
  if(T){
    scRNAlist[[i]][["percent.rb"]] <- PercentageFeatureSet(scRNAlist[[i]], pattern = "^RP[SL]")
  }
  #计算红细胞基因比例
  if(T){
    HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
    HB.genes <- CaseMatch(HB.genes, rownames(scRNAlist[[i]]))
    scRNAlist[[i]][["percent.HB"]]<-PercentageFeatureSet(scRNAlist[[i]], features=HB.genes) 
  }
}
names(scRNAlist) <- samples_name

scRNAlist1 <- c(scRNAlist0, scRNAlist)

# 最后merge
##### 01-3-Merge #####
scRNA <- merge(scRNAlist1[[1]], scRNAlist1[2:length(scRNAlist1)])
scRNA 
# An object of class Seurat 
# 43055 features across 142654 samples within 1 assay 
# Active assay: RNA (43055 features, 0 variable features)


table(scRNA$orig.ident)
# A_225 A_226 A_228 A_867 A_868 N_227 N_230 N_231 N_870 N_871 T_223 T_224 T_229 T_866 T_869 
# 7570 11054  6112 10677  8117  8126  8001 11993  7518  8281  9914  4876 12560 13611 14244 
saveRDS(scRNA,file = 'scRNA_orig.rds')

# file = "/home/newdisk/pyc/07-cellranger/01-Fudan_EEC_Normal/scRNA_orig.rds"

rm(scRNAlist1)


# 拆分seurat对象
ifnb.list <- SplitObject(scRNA, split.by = "orig.ident")
最后的结果

比较有用的代码就是

Seurat V4版本拆分对象

参考

Seurat 4.0 | 单细胞转录组数据整合(scRNA-seq integration)
多个10x单细胞对象的合并和批次校正--seurat锚点整合+Harmony

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容