setwd("/data/pyc/44_PBMC33k/")
library(SeuratData)
library(qs)
library(Seurat)
devtools::install_github('satijalab/seurat-data')
BiocManager::install('TENxPBMCData')
library(TENxPBMCData)
args(TENxPBMCData)
tenx_pbmc33k <- TENxPBMCData(dataset = "pbmc33k")
tenx_pbmc33k
counts(tenx_pbmc33k)
#转换
ct = as.data.frame( assay(tenx_pbmc33k))
ct[1:4,1:4]
colnames(ct) = 1:ncol(ct)
install.packages('AnnoProbe')
library(AnnoProbe)
ag=annoGene(rownames(ct),
ID_type = 'ENSEMBL',species = 'human'
)
head(ag)
ag=ag[!duplicated(ag$SYMBOL),]
ag=ag[!duplicated(ag$ENSEMBL),]
pos = match(ag$ENSEMBL,rownames(ct))
ct = ct[pos,]
rownames(ct) = ag$SYMBOL
ct[1:4,1:4]
library(scRNAstat)
library(Seurat)
library(ggplot2)
library(clustree)
library(cowplot)
library(dplyr)
sce = CreateSeuratObject(counts = ct)
sce@assays$RNA@counts[1:4,1:4]
sce<-RenameCells(sce,add.cell.id="Pbmc33k")
# An object of class Seurat
# 30977 features across 33148 samples within 1 assay
# Active assay: RNA (30977 features, 0 variable features)
# 2 layers present: counts, data
sce <- NormalizeData(sce, normalization.method = "LogNormalize", scale.factor = 10000)
sce <- FindVariableFeatures(sce, selection.method = "vst", nfeatures = 3000)
sce <- ScaleData(sce)
sce <- RunPCA(sce, features = VariableFeatures(object = sce))
system.time(qsave(sce,file = './03_sce_afterPCA.qs'))
unique(sce$orig.ident)
ElbowPlot(sce, ndims = 50, reduction = "pca")
library(findPC)
stdev_sorted <- sort(sce@reductions$pca@stdev, decreasing = TRUE)
stdev_sorted
findPC(sdev = stdev_sorted, # 主成分降序排列
number = 50, # number数量不需要
method = "all", # 所有的方法都看一下,默认方法是perpendicular line
figure = T)
sce@reductions$pca@stdev
pc.num=1:11
sce <- RunTSNE(sce, dims=pc.num, seed.use = 1013) %>% RunUMAP( dims=pc.num, seed.use = 1013)
sce <- FindNeighbors(sce, reduction = "pca", dims = 1:11)
sce <- FindClusters(sce, resolution = c(0.1,0.2,0.3, 0.5, 0.8, 1))
clustree(sce@meta.data, prefix = "RNA_snn_res.",node_label = "RNA_snn_res.")
colnames(sce@meta.data)
Idents(sce) <- "RNA_snn_res.0.5"
DimPlot(sce, label = T, label.size = 10)
DimPlot(sce, label = T, label.size = 10, reduction = "tsne")
qsave(sce, file = "/data/pyc/44_PBMC33k/sce_pbmc33k_2025_9_5_final_annotation.qs")
#
library(wesanderson)
pal <- wes_palette("Zissou1", 10, type = "continuous")
markers_list <- list(Epi = c('EPCAM', 'CDH1', 'KRT19','CLDN7'),
Endo = c('PECAM1', 'VWF', 'ENG', 'KDR', 'CDH5'),
Fib = c('FAP', 'COL1A1', 'COL1A2','DCN', "COL3A1", "COL6A1", "COL6A3", "LUM"),
Pericytes = c('RGS5', 'ACTA2', 'MYH11', 'MCAM'),
Bcells = c('MS4A1', 'CD79A','CD79B'),
Plasma_B = c('IGHA1', 'JCHAIN'),
NK = c('NKG7', 'GNLY', 'NCAM1') ,
Tcells = c('CD3D', 'CD3E','CD4','CD8A'),
Myeloid = c('CD68','CD14', 'MS4A6A', 'MRC1', 'CD163', 'MSR1'),
DC = c('ITGAX','XCR1', 'CD1C', 'CLEC9A', 'CLEC10A'),
cycling = c('MKI67', 'TOP2A'),
Neutro = c("S100A8", "S100A9","CSF3R","CXCR2"),
Mast = c('TPSAB1','TPSB2','CPA3','HPGDS'),
Immune = c("PTPRC"))
DotPlot(sce, features = markers_list,assay='RNA')+
theme(axis.text.x = element_text(angle = 45,
face="italic", hjust = 0.5, vjust = 0.5))+
scale_colour_gradientn(colours = pal)+
theme(legend.position="bottom") + labs(title = "cluster markers", y = "", x="")
# 发现第六群是NK
sce_NK <- subset(sce, idents = 6)
NK = c('NKG7', 'GNLY', 'NCAM1','GZMB','IL7R','CXCR6','CTLA4','CD3D','CD3E','CD8A','CD8B','CD4','GZMK','CD1D','CD161','ZNF683')
FeaturePlot(sce_NK, features = NK)
VlnPlot(sce_NK, features = NK)
qsave(sce_NK, file = "/data/pyc/44_PBMC33k/sce_NK_pbmc33k_2025_9_5_02.qs")
#
# part1 <- sce_NK
# part2 <-
#
FeaturePlot(sce_ZCPTBRCA_NK, features = NK)
VlnPlot(sce_ZCPTBRCA_NK, features = NK)
# 开始细分NKT项目!启动!
library(Seurat)
library(harmony)
library(qs)
library(ggplot2)
library(patchwork)
library(dplyr)
library(future)
future::plan(multisession, workers=6)
#### 重新开始跑 ####
scRNA <- sce_ZCPTBRCA_NK
colnames(scRNA@meta.data)
cellinfo <- subset(scRNA@meta.data, select = c("orig.ident", "percent.mt", "percent.ribo", "percent.hb",'sampleinf'))
dim(cellinfo)
View(cellinfo)
scRNA <- CreateSeuratObject(scRNA@assays$RNA@counts, meta.data = cellinfo)
# setwd("/data/pyc/11_HE_EC/")
library(harmony)
library(Seurat)
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 3000)
scRNA <- ScaleData(scRNA)
scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA))
# system.time(qsave(scRNA,file = '/data/pyc/11_HE_EC/14_TRC_FRC/scRNA_afterPCA.qs'))
gc()
#### scRNA2非线性降维 ####
scRNA2 <- RunHarmony(scRNA, group.by.vars="orig.ident", assay.use="RNA", plot_convergence = TRUE)
system.time(qsave(scRNA2,file = '/data/pyc/11_HE_EC/14_TRC_FRC/scRNA2_afterHarmony.qs'))
gc()
p_scRNA2 <- ElbowPlot(scRNA2, ndims = 50, reduction = "harmony")
ggsave(filename = "ElbowPlot_scRNA2.pdf", p_scRNA2, width = 8, height = 6)
pc.num=1:22
pc.num=1:10
gc()
scRNA2 <- RunTSNE(scRNA2, reduction="harmony", dims=pc.num,seed.use = 1013) %>% RunUMAP(reduction="harmony", dims=pc.num, seed.use = 1013)
scRNA2 <- FindNeighbors(scRNA2, dims = 1:22)
gc()
system.time(qsave(scRNA2,file = './scRNA2_afterUMAPTSNE.qs'))
gc()

image.png

image.png

image.png

image.png

RES 2

RES1.8

RES1.6