作者,Evil Genius
今日目标,单细胞轨迹分析
关于单细胞轨迹分析,虽然大家觉得已经烂大街了,但是分析的很好的人,屈指可数。
关于轨迹分析,有几个难点需要优先解决
1、基因的选择,如何选择合适的用于计算发育的基因对结果影响巨大
2、细胞注释,错误的注释结果带来的就是错误的发育结果
3、方法的选择,传统的依据基因推断还是velocity方式,现在通常二者都有
4、关键性的基因轨迹变化,也就是我们课上常说的基因“开关”
今日我们分享的方法,SCP,参考地址在Single Cell Pipeline • SCP,这个地方是一系列软件的合成,当然,也有创新。
大家同时也可以参考老俊俊的方法,也非常不错,https://github.com/junjunlab/ClusterGVis。
SCP需要R和python的联合环境使用
if (!require("devtools", quietly = TRUE)) {
install.packages("devtools")
}
devtools::install_github("zhanghao-njmu/SCP")
SCP::PrepareEnv()
options(reticulate.conda_binary = "/path/to/conda")
SCP::PrepareEnv()
或者
SCP::PrepareEnv(
miniconda_repo = "https://mirrors.bfsu.edu.cn/anaconda/miniconda",
pip_options = "-i https://pypi.tuna.tsinghua.edu.cn/simple"
)
我们来看看示例代码,示例数据在 mouse pancreas data,来看看关键的核心步骤。
library(SCP)
library(BiocParallel)
register(MulticoreParam(workers = 8, progressbar = TRUE))
data("pancreas_sub")
print(pancreas_sub)
#> An object of class Seurat
#> 47874 features across 1000 samples within 3 assays
#> Active assay: RNA (15958 features, 3467 variable features)
#> 2 other assays present: spliced, unspliced
#> 2 dimensional reductions calculated: PCA, UMAP
看的出来,V4结构,而且基础分析 + 可变剪切都已经做过了。
ht <- CellCorHeatmap(
srt_query = pancreas_sub, srt_ref = panc8_rename,
query_group = "SubCellType", ref_group = "celltype",
nlabel = 3, label_by = "row",
show_row_names = TRUE, show_column_names = TRUE
)
print(ht$plot)
PAGA analysis
pancreas_sub <- RunPAGA(
srt = pancreas_sub, group_by = "SubCellType",
linear_reduction = "PCA", nonlinear_reduction = "UMAP"
)
PAGAPlot(srt = pancreas_sub, reduction = "UMAP", label = TRUE, label_insitu = TRUE, label_repel = TRUE)
Velocity analysis
pancreas_sub <- RunSCVELO(
srt = pancreas_sub, group_by = "SubCellType",
linear_reduction = "PCA", nonlinear_reduction = "UMAP"
)
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", group_by = "SubCellType")
VelocityPlot(srt = pancreas_sub, reduction = "UMAP", plot_type = "stream")
Differential expression analysis
pancreas_sub <- RunDEtest(srt = pancreas_sub, group_by = "CellType", fc.threshold = 1, only.pos = FALSE)
VolcanoPlot(srt = pancreas_sub, group_by = "CellType")
DEGs <- pancreas_sub@tools$DEtest_CellType$AllMarkers_wilcox
DEGs <- DEGs[with(DEGs, avg_log2FC > 1 & p_val_adj < 0.05), ]
# Annotate features with transcription factors and surface proteins
pancreas_sub <- AnnotateFeatures(pancreas_sub, species = "Mus_musculus", db = c("TF", "CSPA"))
ht <- FeatureHeatmap(
srt = pancreas_sub, group.by = "CellType", features = DEGs$gene, feature_split = DEGs$group1,
species = "Mus_musculus", db = c("GO_BP", "KEGG", "WikiPathway"), anno_terms = TRUE,
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
height = 5, width = 4
)
print(ht$plot)
Enrichment analysis(over-representation)
EnrichmentPlot(
srt = pancreas_sub, group_by = "CellType", group_use = "Ductal",
plot_type = "enrichmap"
)
Enrichment analysis(GSEA)
pancreas_sub <- RunGSEA(
srt = pancreas_sub, group_by = "CellType", db = "GO_BP", species = "Mus_musculus",
DE_threshold = "p_val_adj < 0.05"
)
GSEAPlot(srt = pancreas_sub, group_by = "CellType", group_use = "Endocrine", id_use = "GO:0007186")
Dynamic features
pancreas_sub <- RunDynamicFeatures(srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), n_candidates = 200)
ht <- DynamicHeatmap(
srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"),
use_fitted = TRUE, n_split = 6, reverse_ht = "Lineage1",
species = "Mus_musculus", db = "GO_BP", anno_terms = TRUE, anno_keys = TRUE, anno_features = TRUE,
heatmap_palette = "viridis", cell_annotation = "SubCellType",
separate_annotation = list("SubCellType", c("Nnat", "Irx1")), separate_annotation_palette = c("Paired", "Set1"),
feature_annotation = c("TF", "CSPA"), feature_annotation_palcolor = list(c("gold", "steelblue"), c("forestgreen")),
pseudotime_label = 25, pseudotime_label_color = "red",
height = 5, width = 2
)
print(ht$plot)
DynamicPlot(
srt = pancreas_sub, lineages = c("Lineage1", "Lineage2"), group.by = "SubCellType",
features = c("Plk1", "Hes1", "Neurod2", "Ghrl", "Gcg", "Ins2"),
compare_lineages = TRUE, compare_features = FALSE
)