pseudotime
比较不同pseudoytime分析工具
在许多情况下,我们都在研究细胞不断变化的过程。例如,这包括在发育过程中发生的许多分化过程:在刺激后,细胞将从一种细胞类型转变为另一种细胞类型。由于一些细胞在分化过程中比其他细胞进行得更快,因此每个snapshot可能包含在发育过程中不同位置的细胞。我们使用统计方法对细胞沿着一个或多个代表潜在发育轨迹的轨迹进行排序,这种排序被称为伪时间。
library(SingleCellExperiment)
library(TSCAN)
library(M3Drop)
library(monocle)
library(destiny)
library(SLICER)
library(scater)
## Warning: package 'scater' was built under R version 3.5.2
library(ggplot2)
library(ggthemes)
library(ggbeeswarm)
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.5.2
set.seed(1234)
deng_SCE <- readRDS("D:/paopaoR/deng/deng-reads.rds")
deng_SCE
## class: SingleCellExperiment
## dim: 22431 268
## metadata(0):
## assays(2): counts logcounts
## rownames(22431): Hvcn1 Gbp7 ... Sox5 Alg11
## rowData names(10): feature_symbol is_feature_control ...
## total_counts log10_total_counts
## colnames(268): 16cell 16cell.1 ... zy.2 zy.3
## colData names(30): cell_type2 cell_type1 ... pct_counts_ERCC
## is_cell_control
## reducedDimNames(0):
## spikeNames(1): ERCC
deng_SCE$cell_type2 <- factor(
deng_SCE$cell_type2,
levels = c("zy", "early2cell", "mid2cell", "late2cell",
"4cell", "8cell", "16cell", "earlyblast",
"midblast", "lateblast")
)
cellLabels <- deng_SCE$cell_type2
数据集由来自小鼠早期发育的10个不同时间点的268个细胞组成。细胞label可以作为真实时间标准来评价伪时间的准确性。
deng <- counts(deng_SCE)
colnames(deng) <- cellLabels
deng_SCE <- runPCA(deng_SCE)
plotPCA(deng_SCE, colour_by = "cell_type2")
## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.
deng_SCE$PC1 <- reducedDim(deng_SCE)[,1]
ggplot(as.data.frame(colData(deng_SCE)), aes(x = PC1, y = cell_type2,
colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("First principal component") + ylab("Timepoint") +
ggtitle("Cells ordered by first principal component")
TSCAN
procdeng <- TSCAN::preprocess(deng)
colnames(procdeng) <- 1:ncol(deng)
dengclust <- TSCAN::exprmclust(procdeng, clusternum = 10)
TSCAN::plotmclust(dengclust)
dengorderTSCAN <- TSCAN::TSCANorder(dengclust, orderonly = FALSE)
pseudotime_order_tscan <- as.character(dengorderTSCAN$sample_name)
deng_SCE$pseudotime_order_tscan <- NA
deng_SCE$pseudotime_order_tscan[as.numeric(dengorderTSCAN$sample_name)] <-
dengorderTSCAN$Pseudotime
cellLabels[dengclust$clusterid == 10]
## [1] late2cell late2cell late2cell late2cell late2cell late2cell late2cell
## [8] late2cell late2cell late2cell
## 10 Levels: zy early2cell mid2cell late2cell 4cell 8cell ... lateblast
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_order_tscan,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("TSCAN pseudotime") + ylab("Timepoint") +
ggtitle("Cells ordered by TSCAN pseudotime")
## Warning: Removed 47 rows containing missing values (position_quasirandom).
MONOCLE
m3dGenes <- as.character(
M3DropFeatureSelection(deng)$Gene
)
## Warning in bg__calc_variables(expr_mat): Warning: Removing 1134 undetected
## genes.
d <- deng[which(rownames(deng) %in% m3dGenes), ]
d <- d[!duplicated(rownames(d)), ]
colnames(d) <- 1:ncol(d)
geneNames <- rownames(d)
rownames(d) <- 1:nrow(d)
pd <- data.frame(timepoint = cellLabels)
pd <- new("AnnotatedDataFrame", data=pd)
fd <- data.frame(gene_short_name = geneNames)
fd <- new("AnnotatedDataFrame", data=fd)
dCellData <- newCellDataSet(d, phenoData = pd, featureData = fd, expressionFamily = tobit())
dCellData <- setOrderingFilter(dCellData, which(geneNames %in% m3dGenes))
dCellData <- estimateSizeFactors(dCellData)
dCellDataSet <- reduceDimension(dCellData, pseudo_expr = 1)
## Warning in if (cds@expressionFamily@vfamily %in% c("negbinomial",
## "negbinomial.size")) {: 条件的长度大于一,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "binomialff") {: 条件的长度
## 大于一,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "Tobit") {: 条件的长度大于
## 一,因此只能用其第一元素
## Warning in if (cds@expressionFamily@vfamily == "uninormal") {: 条件的长度大
## 于一,因此只能用其第一元素
dCellDataSet <- orderCells(dCellDataSet, reverse = FALSE)
plot_cell_trajectory(dCellDataSet)
pseudotime_monocle <-
data.frame(
Timepoint = phenoData(dCellDataSet)$timepoint,
pseudotime = phenoData(dCellDataSet)$Pseudotime,
State = phenoData(dCellDataSet)$State
)
rownames(pseudotime_monocle) <- 1:ncol(d)
pseudotime_order_monocle <-
rownames(pseudotime_monocle[order(pseudotime_monocle$pseudotime), ])
deng_SCE$pseudotime_monocle <- pseudotime_monocle$pseudotime
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_monocle,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("monocle pseudotime") + ylab("Timepoint") +
ggtitle("Cells ordered by monocle pseudotime")
Diffusion maps
deng <- logcounts(deng_SCE)
colnames(deng) <- cellLabels
dm <- DiffusionMap(t(deng))
tmp <- data.frame(DC1 = eigenvectors(dm)[,1],
DC2 = eigenvectors(dm)[,2],
Timepoint = deng_SCE$cell_type2)
ggplot(tmp, aes(x = DC1, y = DC2, colour = Timepoint)) +
geom_point() + scale_color_tableau() +
xlab("Diffusion component 1") +
ylab("Diffusion component 2") +
theme_classic()
deng_SCE$pseudotime_diffusionmap <- rank(eigenvectors(dm)[,1])
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_diffusionmap,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("Diffusion map pseudotime (first diffusion map component)") +
ylab("Timepoint") +
ggtitle("Cells ordered by diffusion map pseudotime")
SLICER
这个包没有很常见到。找到可能的start细胞后,branch分析有时候会报错,GitHub上也有人同样的问题,换个细胞可能会运行成功,但是结果可能就不对了。
library("lle")
## Warning: package 'lle' was built under R version 3.5.2
## Warning: package 'snowfall' was built under R version 3.5.2
genes <- select_genes(t(deng))
k <- select_k(t(deng[genes,]), kmin = 30, kmax=60)
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
traj_lle <- lle(t(deng[genes,]), m = 2, k)$Y
## finding neighbours
## calculating weights
## computing coordinates
reducedDim(deng_SCE, "LLE") <- traj_lle
plotReducedDim(deng_SCE, use_dimred = "LLE", colour_by = "cell_type2") +
xlab("LLE component 1") + ylab("LLE component 2") +
ggtitle("Locally linear embedding of cells from SLICER")
## Warning: 'add_ticks' is deprecated.
## Use '+ geom_rug(...)' instead.
traj_graph <- conn_knn_graph(traj_lle, 10)
plot(traj_graph, main = "Fully connected kNN graph from SLICER")
ends <- find_extreme_cells(traj_graph, traj_lle)
start <- ends[1]
pseudotime_order_slicer <- cell_order(traj_graph, start)
branches <- assign_branches(traj_graph, start)
pseudotime_slicer <-
data.frame(
Timepoint = cellLabels,
pseudotime = NA,
State = branches
)
pseudotime_slicer$pseudotime[pseudotime_order_slicer] <-
1:length(pseudotime_order_slicer)
deng_SCE$pseudotime_slicer <- pseudotime_slicer$pseudotime
ggplot(as.data.frame(colData(deng_SCE)),
aes(x = pseudotime_slicer,
y = cell_type2, colour = cell_type2)) +
geom_quasirandom(groupOnX = FALSE) +
scale_color_tableau() + theme_classic() +
xlab("SLICER pseudotime (cell ordering)") +
ylab("Timepoint") +
theme_classic()