最近,众多学术期刊上开始出现一些让人眼前一亮的高颜值小提琴图。在网上搜罗一番,发现,规范的称呼应该是split violin。直译有点太抽象了,暂且译为对半小提琴吧。
该功能来自网址https://stackoverflow.com/questions/35717353/split-violin-plot-with-ggplot2。其中列举了两种方法,一种是建立了一个功能geo_split_violin直接嵌入ggplot中,无需过多设置,无脑操作即可;另一种是手动打造,先分组求密度分布曲线,然后通过调整分组轴上的坐标位置来实现错位,但是需要对数据进行转换,而且面对更多分组的时候,就需要特别的设置,门槛略高。
以下示例基于自己的数据
加载工具包
library(reshape2)
library(dplyr)
library(rfPermute)
library(phyloseq)
library(lmerTest)
library(lme4)
library(car)
library(piecewiseSEM)
加载整理数据
load('../01_filter_and_rarefy_Bacteria/.RData')
remove(physeq)
sample_variables(rarefy)
rarefy = subset_samples(rarefy, NP<100&EC<2000&CN<150& (!Sequencing_ID_16s_18s%in% c('M24','M12','M30')))
map = data.frame(sample_data(rarefy))
map = mutate(map,Latitude = abs(Latitude),
group = case_when(Vegetation == 'Forest'~ 'Forest', TRUE ~ 'NonForest'))
map$group = as.factor(map$group)
新建向量存储分组变量,以便后文直接引用
envs = c('Latitude', "Elevation", "Slope", # geologic
"MAT", "AI", # climate
"Plant_cover", # plant
"pH", "EC", "Clay_silt", 'WHC','TP') # soil
funs = c("BG","PHOS","NAG","Rb","PO4","NO3", "NH4", 'NPP', 'ORC')
计算物种多样性
richness = estimate_richness(rarefy, measures="Observed")
rownames(richness) == map$Global_Atlas_Order
map$Bacteria_richness = richness$Observed
map$Global_Atlas_Order = NULL
混合效应模型
forest_scale = mutate_if(forest, is.numeric, scale)
nonforest_scale = mutate_if(nonforest, is.numeric, scale)
# additive formular
formular00 =as.formula(
paste("Bacteria_richness~",
paste(c(envs, 'Forest','(1|CLIMA2)','(1|LandUse)'), collapse="+")))
formular01 =as.formula(
paste("Plant_richness~",
paste(c(envs, 'Forest','(1|CLIMA2)','(1|LandUse)'), collapse="+")))
lmer01 = lmer(formular00, data = mutate(rbind(forest_scale, nonforest_scale),
Forest = case_when(group == 'Forest' ~1, TRUE~0)))
lmer02 = lmer(formular01, data = mutate(rbind(forest_scale, nonforest_scale),
Forest = case_when(group == 'Forest' ~1, TRUE~0)))
混合效应模型的结果评估
anova(lmer01,type="I")
summary(lmer01)
vif(lmer01)
rsquared(lmer01)
混合效应模型的bootstrap 检验
lmer_boot01 = bootMer(lmer01,fixef,nsim = 1000,parallel = "snow")
lmer_boot02 = bootMer(lmer02,fixef,nsim = 1000,parallel = "snow")
lmer_boot0 = rbind(data.frame(melt(lmer_boot01$t), group = 'Bacteria'),
data.frame(melt(lmer_boot02$t), group = 'Plant'))
结果整理,因子排序,并设置出图顺序
lmer_boot0 = subset(lmer_boot0, Var2 != '(Intercept)', select = c(Var2, value, group))
lmer_boot0$Var2 = gsub('_',' ',lmer_boot0$Var2)
test0 = aggregate(value~group*Var2, lmer_boot0, median) %>% arrange(group, desc(value))
lmer_boot0$Var2 = factor(lmer_boot0$Var2, levels = test0$Var2[1:12])
两个小花样
#方便后文绘图时,设置间隔性阴影背景来增加多个分组的区分度
odds0 <- seq(1, 12, 2)
rect0 = lmer_boot0[odds0,]
#文章开头提到的那个***split_violin***函数内容有点多,直接放在本空间里有点臃肿,选择新建一个外挂直接调用。
source('utiles.R')
绘图
lmer_boot0_plt = ggplot(lmer_boot0, aes(Var2, value, fill = group)) +
geom_rect(data = rect0,
xmin = odds0 - 0.5,
xmax = odds0 + 0.5,
ymin = -Inf, ymax = +Inf,
fill = 'grey', alpha = 0.5, inherit.aes = F) +
geom_hline(yintercept = 0, lty = 2, color = 'black') +
geom_split_violin() +
theme_classic()+
labs(x = NULL, y = 'standardized effect on diversity',fill = NULL) +
theme(legend.position = c(0.8, 0.9),
legend.text = element_text(color = 'black'),
legend.background = element_blank(),
legend.key.size = unit(0.3,'cm'),
axis.text = element_text(color = 'black'),
axis.title = element_text(color = 'black'),
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
下面分页箱线图插入间隔行的阴影背景请查考这里