轨迹推断,顾名思义是推断细胞分化状态的一种方法,然而现行的轨迹推断的方法都不能将周期纳入其中,但是总会有人这样想,然后实现了它。
library(Revelio)
library(Seurat)
library(SeuratData)
revelioTestData_rawDataMatrix[1:4,1:4]
WT_CTTGTAGCGTCT WT_AATTTAAACTTG WT_GACAACCTCATC WT_ACCATACACACG
A1BG 0 0 0 0
A2M 0 0 0 0
A2M-AS1 0 0 0 0
A2MP1 0 0 0 0
myData <- createRevelioObject(rawData = revelioTestData_rawDataMatrix, # revelioTestData_rawDataMatrix
cyclicGenes = revelioTestData_cyclicGenes)
myData <- getCellCyclePhaseAssignInformation(dataList = myData)
head(myData@cellInfo)
datasetID batchID cellID nUMI nGene ccPhase meanOfPhaseScore sdOfPhaseScore highestPhaseScore
WT_CTTGTAGCGTCT data1 WT WT_CTTGTAGCGTCT 69992 9258 G2 0.8500887 0.1748365 1.5292065
WT_AATTTAAACTTG data1 WT WT_AATTTAAACTTG 61152 8480 G2 0.8297719 0.3069856 0.8591345
WT_GACAACCTCATC data1 WT WT_GACAACCTCATC 64340 9067 S 0.8096808 0.1185934 1.1576782
WT_ACCATACACACG data1 WT WT_ACCATACACACG 64509 8794 S 0.8207118 0.1813450 1.4787240
WT_TTTCAGGCAGAC data1 WT WT_TTTCAGGCAGAC 60750 8468 G1.S 0.7576278 0.1224292 1.1022062
WT_TGTATCTTATAT data1 WT WT_TGTATCTTATAT 65781 8529 M.G1 0.6280796 0.2822224 1.5622928
secondHighestPhaseScore G1.S S G2 G2.M M.G1 G1.S_zScore S_zScore G2_zScore
WT_CTTGTAGCGTCT 0.1644585 0.6529540 0.6998633 0.8727002 0.9508217 1.0741043 -0.3174031 -0.1465188 1.5292065
WT_AATTTAAACTTG 0.5741297 0.4137236 0.6147151 0.9367331 1.0254709 1.1582169 -1.5590611 -0.4339311 0.8591345
WT_GACAACCTCATC 1.0045966 0.7710146 0.8326568 0.7080084 0.7323286 1.0043958 1.0045966 1.1576782 -0.5149779
WT_ACCATACACACG 0.2387414 0.6034522 0.7511739 0.7997519 0.8486733 1.1005076 -0.8947453 1.4787240 0.1442364
WT_TTTCAGGCAGAC 1.0265113 0.7172310 0.7510659 0.6798175 0.6709441 0.9690804 1.1022062 1.0265113 -0.3701836
WT_TGTATCTTATAT 0.3051694 0.5168821 0.4061878 0.4772610 0.6279929 1.1120743 0.3051694 -0.9780575 -0.6834449
G2.M_zScore M.G1_zScore isOutlierSuspectedDoublets isOutlierNoConfidenceInPhaseScore ccAngle ccRadius
WT_CTTGTAGCGTCT 0.1644585 -1.2297432 FALSE FALSE 1.445197 7.395479
WT_AATTTAAACTTG 0.5741297 0.5597280 FALSE FALSE 1.088388 9.584398
WT_GACAACCTCATC -0.9431705 -0.7041264 FALSE FALSE 2.447252 8.011687
WT_ACCATACACACG -0.9669564 0.2387414 FALSE FALSE 1.931565 7.179339
WT_TTTCAGGCAGAC -1.0344432 -0.7240907 FALSE FALSE 2.515097 6.507658
WT_TGTATCTTATAT -0.2059597 1.5622928 FALSE FALSE 5.169606 4.757254
ccTime ccPercentage ccPercentageUniformlySpaced ccPositionIndex
WT_CTTGTAGCGTCT 14.883901 0.7699897 0.7550035 344
WT_AATTTAAACTTG 15.981614 0.8267777 0.8115942 1375
WT_GACAACCTCATC 11.801113 0.6105077 0.6259489 1206
WT_ACCATACACACG 13.387608 0.6925819 0.7039337 1251
WT_TTTCAGGCAGAC 11.592393 0.5997099 0.6197378 1329
WT_TGTATCTTATAT 3.425888 0.1772317 0.1145618 201
debug(getOptimalRotation)
myData <- getOptimalRotation(dataList = myData, boolPlotResults = TRUE)
真的还是有几分的周期的意思,那么它是如何做的呢?
先看每个细胞属于哪个周期的打分方法:
首先有一个revelioTestData_cyclicGenes
每个阶段特征表达的基因集,根据他们在每个细胞的表达情况来推断细胞的状态。
dim(revelioTestData_cyclicGenes)
[1] 216 5
revelioTestData_cyclicGenes[1:4,1:4]
G1.S S G2 G2.M
1 ABCA7 ABCC2 ALKBH1 ADH4
2 ACD ABCC5 ANLN AHI1
3 ACYP1 ABHD10 AP3D1 AKIRIN2
4 ADAMTS1 ACPP ARHGAP11B ANKRD40
有了细胞的周期归属之后,我们需要在低维空间中考虑如何用黄环形结构来可视化它。在三维空间中,数据形成一个倾斜的圆柱体。如果我们旋转圆柱体并从顶部或底部查看它,一个清晰的细胞周期结构将变得可见。旋转的pc是原始pc的简单线性组合,是动态组件(DCs)。DC1和DC2跨越细胞周期。
那么具体的实现手法是怎样的呢?其实就是对低维空间的坐标做了一个环形转化,具体可以看源代码。有了环形数据结构之后,如果想在数据中连同细胞周期一起绘制,则可以用ggplot绘制相关的属性。
function (dataList, boolPlotResults = FALSE)
{
startTime <- Sys.time()
cat(paste(Sys.time(), ": calculating optimal rotation: ",
sep = ""))
dataList@transformedData$dc <- calculateOptimalRotation(pcaData = dataList@transformedData$pca$data,
pcaWeights = dataList@transformedData$pca$weights, pcaIsPCAssociatedWithCC = dataList@transformedData$pca$pcProperties$isComponentAssociatedWithCC,
ccPhaseInformation = dataList@cellInfo$ccPhase)
cellCycleScore <- getCellCycleScoreForPCA(dataList@transformedData$dc$data,
dataList@cellInfo[, "ccPhase"])
boolOutliers <- rep(FALSE, length(cellCycleScore))
boolOutliers[1:(min(length(cellCycleScore), 100))] <- calculateOutliersInVector(data = as.vector(cellCycleScore[1:(min(length(cellCycleScore),
100))]), outlierThreshold = 2)
dataList@transformedData$dc$dcProperties <- cbind(dataList@transformedData$dc$dcProperties,
ccScore = cellCycleScore, isComponentAssociatedWithCC = boolOutliers)
thresholdForPCWeightToBeSignificant <- sqrt(1/dim(dataList@transformedData$pca$data)[1])
ccGenesHelp <- rep(FALSE, dim(dataList@geneInfo)[1])
names(ccGenesHelp) <- dataList@geneInfo[, "geneID"]
ccGenesHelp[dataList@geneInfo$geneID[dataList@geneInfo$pcaGenes][which((abs(dataList@transformedData$dc$weights[1,
]) > thresholdForPCWeightToBeSignificant) | (abs(dataList@transformedData$dc$weights[2,
]) > thresholdForPCWeightToBeSignificant))]] <- TRUE
dataList@geneInfo <- cbind(dataList@geneInfo, ccGenes = ccGenesHelp)
dataList <- getRotation2D(dataList = dataList)
dataList <- getCCSorting(dataList = dataList)
cat(paste(round(Sys.time() - startTime, 2), attr(Sys.time() -
startTime, "units"), "\n", sep = ""))
if (boolPlotResults) {
plotParameters <- list()
plotParameters$colorPaletteCellCyclePhasesGeneral <- c("#ac4343",
"#466caf", "#df8b3f", "#63b558", "#e8d760", "#61c5c7",
"#f04ddf", "#a555d4")
plotParameters$plotLabelTextSize <- 8
plotParameters$plotDotSize <- 0.1
plotParameters$fontFamily <- "Helvetica"
plotParameters$fontSize <- 8
plotParameters$colorPaletteCellCyclePhasesGeneral <- plotParameters$colorPaletteCellCyclePhasesGeneral[1:length(levels(dataList@cellInfo$ccPhase))]
names(plotParameters$colorPaletteCellCyclePhasesGeneral) <- levels(dataList@cellInfo$ccPhase)
plotBoundary <- max(dataList@transformedData$dc$data[c("DC1",
"DC2"), ]) * 0.78
ccPhaseBorderPathPolar <- data.frame(angle = rep(0,
7), radius = rep(0, 7))
ccPhaseBorderPathPolar[c(1, 3, 5, 7), 1] <- 2 * pi *
(1 - cumsum(c(dataList@datasetInfo$ccDurationG1,
dataList@datasetInfo$ccDurationS, dataList@datasetInfo$ccDurationG2,
dataList@datasetInfo$ccDurationM))/dataList@datasetInfo$ccDurationTotal)
ccPhaseBorderPathPolar[c(1, 3, 5, 7), 2] <- 50
ccPhaseBorderPathCartesian <- ccPhaseBorderPathPolar
ccPhaseBorderPathCartesian[, 1] <- ccPhaseBorderPathPolar[,
2] * cos(ccPhaseBorderPathPolar[, 1])
ccPhaseBorderPathCartesian[, 2] <- ccPhaseBorderPathPolar[,
2] * sin(ccPhaseBorderPathPolar[, 1])
colnames(ccPhaseBorderPathCartesian) <- c("xValue",
"yValue")
labelPositionHelp <- append(2 * pi, ccPhaseBorderPathPolar[c(1,
3, 5, 7), 1])
labelPositionHelp <- (labelPositionHelp[1:(length(labelPositionHelp) -
1)] - labelPositionHelp[2:length(labelPositionHelp)])/2 +
labelPositionHelp[2:length(labelPositionHelp)]
labelRadiusHelp <- labelPositionHelp
labelRadiusHelp[(labelRadiusHelp > pi/4) & (labelRadiusHelp <
3 * pi/4)] <- labelRadiusHelp[(labelRadiusHelp >
pi/4) & (labelRadiusHelp < 3 * pi/4)] - pi/2
labelRadiusHelp[(labelRadiusHelp > 5 * pi/4) & (labelRadiusHelp <
7 * pi/4)] <- labelRadiusHelp[(labelRadiusHelp >
5 * pi/4) & (labelRadiusHelp < 7 * pi/4)] - pi/2
labelPositionPolar <- data.frame(angle = labelPositionHelp,
radius = sqrt((plotBoundary * tan(labelRadiusHelp))^2 +
plotBoundary^2), label = c("G1", "S", "G2",
"M"))
labelPositionCartesian <- labelPositionPolar
labelPositionCartesian[, 1] <- labelPositionPolar[,
2] * cos(labelPositionPolar[, 1])
labelPositionCartesian[, 2] <- labelPositionPolar[,
2] * sin(labelPositionPolar[, 1])
colnames(labelPositionCartesian) <- c("xValue", "yValue",
"label")
plotDC1DC2 <- ggplot(data = cbind(as.data.frame(t(dataList@transformedData$dc$data)),
ccPhase = dataList@cellInfo$ccPhase)) + theme_gray(base_size = plotParameters$plotLabelTextSize) +
theme(text = element_text(family = plotParameters$fontFamily,
size = plotParameters$fontSize), axis.text = element_text(family = plotParameters$fontFamily,
size = plotParameters$fontSize - 1), axis.title = element_text(family = plotParameters$fontFamily,
size = plotParameters$fontSize), legend.text = element_text(family = plotParameters$fontFamily,
size = plotParameters$fontSize - 1), legend.title = element_text(family = plotParameters$fontFamily,
size = plotParameters$fontSize)) + geom_point(aes(x = DC1,
y = DC2, color = ccPhase), size = plotParameters$plotDotSize *
5) + coord_cartesian(xlim = c(-plotBoundary, plotBoundary),
ylim = c(-plotBoundary, plotBoundary)) + scale_color_manual(values = plotParameters$colorPaletteCellCyclePhasesGeneral,
labels = levels(dataList@cellInfo$ccPhase)) + theme(legend.position = "right",
axis.text.y = element_text(angle = 90, hjust = 0.5),
legend.title = element_blank(), aspect.ratio = 1) +
guides(color = guide_legend(override.aes = list(size = 1)))
grid.arrange(plotDC1DC2)
}
return(dataList)
}
cell_cycle = data.frame(phase = factor(c("G1", "S", "G2", "M"), levels = c("G1", "S", "G2", "M")),
hour = c(11, 8, 4, 1))
color = c("#66C2A5", "#FC8D62", "#8DA0CB", "#E78AC3")
circos.par(start.degree = 90)
circos.initialize(cell_cycle$phase, xlim = cbind(rep(0, 4), cell_cycle$hour))
circos.track(ylim = c(0, 1), panel.fun = function(x, y) {
circos.arrow(CELL_META$xlim[1], CELL_META$xlim[2],
arrow.head.width = CELL_META$yrange*0.8, arrow.head.length = cm_x(0.5),
col = color[CELL_META$sector.numeric.index])
circos.text(CELL_META$xcenter, CELL_META$ycenter, CELL_META$sector.index,
facing = "downward")
circos.axis(h = 1, major.at = seq(0, round(CELL_META$xlim[2])), minor.ticks = 1,
labels.cex = 0.6)
}, bg.border = NA, track.height = 0.3)
https://github.com/tinglab/reCAT
https://github.com/danielschw188/Revelio
http://bioconductor.org/books/release/OSCA/trajectory-analysis.html
https://jokergoo.github.io/circlize_book/book/graphics.html
Circular Trajectory Reconstruction Uncovers Cell‐Cycle Progression and Regulatory Dynamics from Single‐Cell Hi‐C Maps
Single-Cell RNA Sequencing Maps Endothelial Metabolic Plasticity in Pathological Angiogenesis
Latent periodic process inference from single-cell RNA-seq data
The transcriptome dynamics of single cells during the cell cycle