同名公主号:BBio
rstatix包支持管道符,与tidyverse包设计理念相一致,用于执行基本的统计检验,包括t检验、Wilcoxon检验、ANOVA、Kruskal-Wallis和相关性分析。
ggpubr包提供了一些易于使用的函数,基于ggplot2,可绘制用于文章发表的各种图形。当然,ggpubr也提供了一些统计函数,但是rstatix在统计方面更加丰富。
如何使用两者绘制出下面好看的图形呢?以单细胞数据为例。
//细胞比例组间差异
-
生成测试数据
library(Seurat)
library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggplot2)
#生成测试数据
pbmc <- pbmc_small
func <- function(x)str_c('S', x%%12+1)
sample <- pbmc[['groups']] %>% add_column(order=1:dim(pbmc)[2]) %>% mutate(remainder=func(order)) %>% pull(remainder)
df <- table(Idents(pbmc), sample) %>%
prop.table(2) %>%
as.data.frame.array() %>%
rownames_to_column("cluster") %>%
pivot_longer(unique(sample), names_to="sample", values_to="percent") %>%
left_join(data.frame(sample=str_c("S", 1:12), group=rep(c("G1", "G2"), each = 6)))
df
# A tibble: 18 × 4
# cluster sample percent group
# <chr> <chr> <dbl> <chr>
# 1 0 S2 0.571 G1
# 2 0 S3 0.429 G1
# 3 0 S4 0.385 G2
# 4 0 S5 0.385 G2
# 5 0 S6 0.462 G2
# 6 0 S1 0.462 G1
# 7 1 S2 0.214 G1
# 8 1 S3 0.357 G1
# 9 1 S4 0.385 G2
#10 1 S5 0.385 G2
#11 1 S6 0.231 G2
#12 1 S1 0.308 G1
#13 2 S2 0.214 G1
#14 2 S3 0.214 G1
#15 2 S4 0.231 G2
#16 2 S5 0.231 G2
#17 2 S6 0.308 G2
#18 2 S1 0.231 G1
-
细胞比例组间差异柱状图
#只画cluster 2
data <- df[df$cluster==2, ]
comparisons <- list(c("G1", "G2"))
ggbarplot(data, x = "group", y = "percent",
color = "group",
palette = c("#00AFBB", "#E7B800"),
width = 0.2,
x.text.angle = 90,
add=c('mean_se')) +
labs(x=NULL) + theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid")) +
stat_compare_means(comparisons = comparisons, method = 't.test', label = 'p.signif')
-
细胞比例组间差异箱线图
#只画cluster2
data <- df[df$cluster==2, ]
comparisons <- list(c("G1", "G2"))
p1 <- ggboxplot(data, x = "group", y = "percent",
color = "group",
palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group") +
labs(x=NULL) + theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid")) +
stat_compare_means(comparisons = comparisons, method = 't.test', label = 'p.signif', size=4)
#画所有cluster
p2 <- ggboxplot(df, x = "group", y = "percent",
color = "group",
facet.by = "cluster",
palette =c("#00AFBB", "#E7B800"),
add = "jitter", shape = "group") +
labs(x=NULL) + theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid")) +
stat_compare_means(comparisons = comparisons, method = 't.test', label = 'p.signif', size=4)
p1 + p2
#以上图形,很坐标都是组别,ggpubr中的stat_compare_means可以直接画图。但是如果横坐标是cluster呢?
stat.test <- df %>%
group_by(cluster) %>%
t_test(percent ~ group) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p")
stat.test <- stat.test %>%
add_xy_position(x = "cluster", dodge = 0.8)
# A tibble: 3 × 16
# cluster .y. group1 group2 n1 n2 statistic df p p.adj
# <chr> <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl>
#1 0 percent G1 G2 6 6 2.03 9.72 0.0706 0.212
#2 1 percent G1 G2 6 6 -2.52 9.02 0.0326 0.0978
#3 2 percent G1 G2 6 6 0.210 9.04 0.838 1
# p.signif y.position groups x xmin xmax
# <chr> <dbl> <named list> <dbl> <dbl> <dbl>
#1 ns 0.707 <chr [2]> 1 0.8 1.2
#2 ns 0.540 <chr [2]> 2 1.8 2.2
#3 ns 0.469 <chr [2]> 3 2.8 3.2
p3 <- ggboxplot(df, x = "cluster", y = "percent",
color = "group",
palette =c("#00AFBB", "#E7B800", "#FC4E07"),
add = "jitter", shape = "group") +
labs(x=NULL) + theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid"))
p4 <- p1 + stat_pvalue_manual(stat.test, label = "p", tip.length = 0)
p5 <- p1 + stat_pvalue_manual(stat.test, label = "p.signif", tip.length = 0)
p4 + p5
//组间基因表达差异
-
生成测试数据
library(Seurat)
library(tidyverse)
library(rstatix)
library(ggpubr)
library(ggplot2)
#生成测试数据,添加一个分组
pbmc <- pbmc_small
g3_cells <- sample(colnames(pbmc), 27)
pbmc[['groups']][g3_cells, ] <- "g3"
unique(pbmc$groups)
-
组间基因差异表达小提琴图
#绘制组间所有细胞中基因表达差异
data <- pbmc
Idents(data) <- data$groups
levels(data) <- c('g1', 'g2', 'g3')
comparisons <- list(c("g2", "g3"))
p <- VlnPlot(data, features='LYZ', cols=c("#00AFBB", "#E7B800", "#FC4E07")) +
theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid"), axis.text.x=element_text(angle=0, hjust=0.5)) +
labs(x=NULL)
p6 <- p + stat_compare_means(comparisons = comparisons, method = 't.test', label = 'p.signif', size=8) + lims(y=c(0, 9))
comparisons <- list(c("g2", "g3"), c('g1', 'g2'), c('g1', 'g3'))
p7 <- p + stat_compare_means(comparisons = comparisons, method = 't.test', label = 'p.signif', size=8) + lims(y=c(0, 11))
p6 + p7
png('test.png', height=200, width=360)
print(p)
dev.off()
#如果横坐标是细胞类型呢?
data <- pbmc
data$groups <- factor(data$groups, levels=c('g1', 'g2', 'g3'))
comparisons <- list(c("g2", "g3"), c('g1', 'g3'))
p <- VlnPlot(data, features='LYZ', cols=c("#00AFBB", "#E7B800", "#FC4E07"), split.by='groups') +
theme(legend.position="none", panel.border=element_rect(color='black', fill=NA, size=0.7, linetype="solid"), axis.text.x=element_text(angle=0, hjust=0.5)) +
labs(x=NULL)
#执行统计检验
df <- p$data
head(df)
# LYZ ident split
#ATGCCAGAACGACT 4.968834e+00 0 g3
#CATGGCCTGTGCAT 4.776148e+00 0 g3
#GAACCTGATGAACC 4.753098e+00 0 g2
#TGACTGGATTCTCA 6.328626e-06 0 g2
#AGTCAGACTGCACA 4.042683e-06 0 g2
#TCTGATACACGTGT 4.968820e+00 0 g3
stat.test <- df %>%
group_by(ident) %>%
t_test(LYZ ~ split, comparisons = comparisons) %>%
adjust_pvalue(method = "bonferroni") %>%
add_significance("p")
stat.test <- stat.test %>%
add_xy_position(x = "ident", dodge = 0.8)
print(stat.test, width=Inf)
p8 <- p + stat_pvalue_manual(stat.test, label = "p.signif", tip.length = 0, step.increase = 0)
p8
#Error in `check_aesthetics()`:
#! Aesthetics must be either length 1 or the same as the data (6): fill
#无解报错,不知所以,试着用ggplot2重画,还是同样报错,ggpubr中的ggviolin就不会报错
p <- ggviolin(df, x = "ident", y = "LYZ", fill = "split",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
trim = TRUE) +
labs(x=NULL, fill=NULL)
p8 <- p + stat_pvalue_manual(stat.test, label = "{p} {p.signif}", tip.length = 0, step.increase = 0)
p8