AverageExpression
gzh:BBio
Seurat中用于计算cluster基因平均表达值的函数,为啥这个结果和FindMarkers中差异倍数avg_logFC有出入呢?
#计算每个cluster中基因的平均表达
df <- AverageExpression(pbmc, verbose=F)$RNAhead(df)
#0 1 2
# MS4A1 0.000000 2.083443 171.6152
# CD79B 10.814657 17.548842 152.1344
# CD79A 0.000000 11.618333 215.0869
# HLA-DRA 37.105857 405.850522 1158.0852
# TCL1A 0.000000 3.463203 142.0748
# HLA-DQB1 3.968254 45.353183 169.2762
AverageExpression源码
getAnywhere('AverageExpression')
# fxn.average <- switch(EXPR = slot, data = function(x) {
# rowMeans(x = expm1(x = x))
# }, rowMeans)# for (j in levels(x = Idents(object))) {
# temp.cells <- WhichCells(object = object, idents = j)
# features.assay <- unique(x = intersect(x = features.assay,
# y = rownames(x = data.use)))
# if (length(x = temp.cells) == 1) {
# data.temp <- (data.use[features.assay, temp.cells])
# if (slot == "data") {
# data.temp <- expm1(x = data.temp)
# }
# }
# if (length(x = temp.cells) > 1) {
# data.temp <- fxn.average(data.use[features.assay,
# temp.cells, drop = FALSE])
# }
# data.all[[j]] <- data.temp
# if (verbose) {
# message(paste("Finished averaging", assays[i],
# "for cluster", j))
# }
# if (i == 1) {
# ident.new <- c(ident.new, as.character(x = ident.orig[temp.cells[1]]))
# }
# }
从源码可以看出,对数据中的cluster依次进行基因平均表达值的计算, rowMeans(x = expm1(x = x))表明平均表达值为data中数据转指数形式后减1的平均值,并不是简单的取data数据的平均值,实际上就是NormalizeData中log1p的逆步骤。
FindMarkers源码
getAnywhere('FindMarkers.default')# mean.fxn <- if (is.null(x = reduction) && slot != "scale.data") {
# switch(EXPR = slot, data = function(x) {
# return(log(x = rowMeans(x = expm1(x = x)) + pseudocount.use))
# }, function(x) {
# return(log(x = rowMeans(x = x) + pseudocount.use))
# })
# }
# else {
# rowMeans# }
# data.1 <- mean.fxn(data[features, cells.1, drop = FALSE])
# data.2 <- mean.fxn(data[features, cells.2, drop = FALSE])
# total.diff <- (data.1 - data.2)
从源码看出avg_logFC的计算过程先计算平均表达值,加1再取log对数后两组值相减的结果。pseudocount.use默认值为1。
LYZ基因示例
AverageExpression(pbmc_small, features = 'LYZ')
#0 1 2
#LYZ 44.31667 987.141 262.0951
FindMarkers(pbmc_small, features = 'LYZ',ident.1 = 0, ident.2 = 1)
#p_val avg_logFC pct.1 pct.2 p_val_adj
#LYZ 6.997602e-11 -3.08215 0.417 1 1.609449e-08
log((44.31667+1)/(987.141+1))
#-3.08215
马克marker
#T细胞
FeaturePlot(object = pbmc_small, features = c('CD3D', 'CD8A', 'IL7R'),ncol=3)