library(hdf5r)
library(Seurat)
library(dplyr)
library(patchwork)
data_sample <- Read10X_h5("D:\\test\\TISCHdata\\AEL_GSE142213\\AEL_GSE142213_expression.h5") #导入数据
pbmc <- CreateSeuratObject(data_sample,project = "test") #后面就可以单细胞处理的标准流程啦
meta <- read_tsv("D:\\test\\TISCHdata\\AEL_GSE142213\\AEL_GSE142213_CellMetainfo_table.tsv")
meta <- as.data.frame(meta)
rownames(meta) <- meta$Cell
pbmc <- AddMetaData(pbmc,meta)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
# Visualize QC metrics as a violin plot
VlnPlot(pbmc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
# FeatureScatter is typically used to visualize feature-feature relationships, but can be used
# for anything calculated by the object, i.e. columns in object metadata, PC scores etc.
pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
# Identify the 10 most highly variable genes
top10 <- head(VariableFeatures(pbmc), 10)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(pbmc)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
#plot1 + plot2
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
# Examine and visualize PCA results a few different ways
print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")
DimPlot(pbmc, reduction = "pca")
DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)
# NOTE: This process can take a long time for big datasets, comment out for expediency. More
# approximate techniques such as those implemented in ElbowPlot() can be used to reduce
# computation time
pbmc <- JackStraw(pbmc, num.replicate = 100)
pbmc <- ScoreJackStraw(pbmc, dims = 1:20)
JackStrawPlot(pbmc, dims = 1:15)
ElbowPlot(pbmc)
pbmc <- FindNeighbors(pbmc, dims = 1:10)
pbmc <- FindClusters(pbmc, resolution = 0.5)
# If you haven't installed UMAP, you can do so via reticulate::py_install(packages =
# 'umap-learn')
pbmc <- RunUMAP(pbmc, dims = 1:10)
# note that you can set `label = TRUE` or use the LabelClusters function to help label
# individual clusters
DimPlot(pbmc, reduction = "umap")
umap=as.data.frame(pbmc@reductions$umap@cell.embeddings)
#将原始降维信息覆盖本次降维信息
umap$UMAP_1 <- pbmc@meta.data$UMAP_1
umap$UMAP_2 <- pbmc@meta.data$UMAP_2
pbmc@reductions$umap@cell.embeddings <- as.matrix(umap)
#pbmc@reductions$umap@cell.embeddings = as.matrix(cbind(UMAP_1=pbmc@meta.data$UMAP_1,UMAP_2=pbmc@meta.data$UMAP_2))
saveRDS(pbmc, file = "TISCH.rds")
TISCHdata数据预处理
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
推荐阅读更多精彩内容
- Python主要数据预处理函数 要介绍的Python中的插值、数据归一化、主成分分析等与数据预处理相关的函数。 P...
- 预处理数预处理数据的方法总结(使用sklearn-preprocessing) https://blog.csdn...
- 前言:在我的上一份文章中,介绍到了数据清洗的常用方法及其Python实现。这篇文章主要是在数据清洗后的特征工程的实...