一、数据来源
今天要分析的单细胞数据来源于,A transcriptional metastatic signature predicts survival in clear cell renal cell carcinoma这篇文章,以下我将以一个样本为例进行演示。
二、数据处理
1、从csv文件读入count表达矩阵,并创建seurat object
library(Seurat)
library(Matrix)
library(dplyr)
library(scDblFinder)
library(ggplot2)
library(SingleCellExperiment)
i <- "GSM5392399"
tmp<-read.csv(file="GSM5392399_RCC-PR6-PTumor.count.csv"
,header=TRUE,row.names=NULL,check.names=FALSE)
row_name<-tmp[,1,drop=TRUE]
tmp<-tmp[,-1]
tmp<-as(tmp,"matrix")
rownames(tmp)<-row_name
tmp<-as(tmp,"sparseMatrix")
tmp<-CreateSeuratObject(tmp,min.cells=3,min.features=0,project=i)
RCC.seurat <- tmp
rm(tmp)
2、单细胞数据质控、如线粒体比率
RCC.seurat[["percent.mito"]] <- PercentageFeatureSet(RCC.seurat, pattern = "^MT-")
print(dim(RCC.seurat))
HB.genes_total=c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB_m=match(HB.genes_total,rownames(RCC.seurat@assays$RNA))
HB.genes=rownames(RCC.seurat@assays$RNA)[HB_m]
HB.genes=HB.genes[!is.na(HB.genes)]
RCC.seurat[["percent.HB"]]=PercentageFeatureSet(RCC.seurat,features=HB.genes)
head(RCC.seurat@meta.data)[,c(2,3,4,5)]
rb.genes <- rownames(RCC.seurat)[grep("^RP[SL]",rownames(RCC.seurat))]
C<-GetAssayData(object = RCC.seurat, slot = "counts")
percent.ribo <- Matrix::colSums(C[rb.genes,])/Matrix::colSums(C)*100
RCC.seurat <- AddMetaData(RCC.seurat, percent.ribo, col.name = "percent.ribo")
RCC.seurat <- subset(RCC.seurat, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mito < 30)
rb.genes <- rownames(RCC.seurat)[grep("^RP[SL]",rownames(RCC.seurat))]
mt.genes <- rownames(RCC.seurat)[grep("^MT-",rownames(RCC.seurat))]
rbmt <- c(rb.genes, mt.genes,"MALAT1","NEAT1")
filtergene <- setdiff(rownames(RCC.seurat), rbmt)
RCC.seurat <- RCC.seurat[filtergene,]
# Visualize QC metrics as a violin plot
p1 <- VlnPlot(RCC.seurat, features =c("nFeature_RNA","nCount_RNA","percent.mito"), ncol =3)
ggsave(p1, filename="QC.png", height=4, width=8)
3、使用scDblFinder R包 去除doublet
set.seed(123)
sce <- as.SingleCellExperiment(RCC.seurat)
sce <- scDblFinder(sce, dbr=0.1)
table(sce$scDblFinder.class)
scDblObj <- colData(sce)
scDblObj <- as.data.frame(scDblObj)
identical(rownames(scDblObj), rownames(RCC.seurat@meta.data)) #TRUE
RCC.seurat@meta.data$scDblFinder.class <- scDblObj$scDblFinder.class
RCC.seurat=subset(RCC.seurat, subset = scDblFinder.class == "singlet")
4、SCTransform:单细胞样本的标准化以及降维聚类
RCC.seurat <- SCTransform(RCC.seurat, verbose = TRUE
,vars.to.regress = c("percent.mito","percent.HB"
,"percent.ribo"))
RCC.seurat <- RunPCA(RCC.seurat, npcs = 30, verbose = FALSE)
RCC.seurat <- RunUMAP(RCC.seurat, reduction = "pca", dims = 1:30, verbose = FALSE)
RCC.seurat <- FindNeighbors(RCC.seurat, reduction = "pca", dims = 1:30, verbose = FALSE)
RCC.seurat <- FindClusters(RCC.seurat, resolution = 0.6, verbose = FALSE)
p1 <- DimPlot(RCC.seurat, label = T, repel = T) + ggtitle("Unsupervised clustering")
ggsave(p1, filename="clustering.png", height=4, width=4.5)
5、基于文献提供的Marker gene进行细胞注释
marklist=list(
ccRCC=c("CA9","CA12","NNMT","VCAM1","VEGFA","REG1A", "CP"),
Fibroblast=c("DCN","RGS5","MYH11","COL1A1","COL3A1"),
Endothelial=c("PECAM1","VWF","ENG","RAMP2"),
Immune=c("PTPRC"),
Monocytic=c("LYZ","FCN1","S100A9","CD68","CD163"
,"C1QA","C1QB",'C1QC',"CD1C","CLEC9A"),
Mast=c("CPA3","MS4A2","TPSAB1","TPSB2"),
Tcell=c("CD3G","CD3E","CD3D","CD8A","CD8B","CD4","IL7R"),
NK=c("KLRD1", "XCL1"),
Bcell=c("CD19","CD79A","CD79B","MS4A1")
)
pp = DotPlot(RCC.seurat,features = marklist, assay = "SCT",dot.scale = 10)
pp = pp + theme(axis.text.x = element_text(size = 12,angle=90,vjust=0.5,hjust=1)
,axis.text.y = element_text(size = 12)) + labs(x='',y='') +
guides(color = guide_colorbar(title = 'Scale expression'),size = guide_legend(title = 'Percent expressed')) +
theme(axis.line = element_line(size = 0.6),panel.border = element_rect(colour = "black", fill=NA, size=0.6))
ggsave(pp,filename = "Annotation_Marker.png",height = 8,width =18)
今天就到这里啦