R语言~非线性拟合及寻找数据拐点

加载R包

packages <- c("ggplot2", "reshape2", 'gam', 'chngpt', 'plyr', 'ggpmisc', "scales", "ade4", "ggpubr", "Rmisc", "ggsci")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
}

invisible(lapply(packages, library, character.only = TRUE))
library(dplyr)

dplyr与plyr有重叠函数,相冲突,加载顺序很重要

加载并整理数据

dat = read.table('../Data/GlobalAtlasv2_scores_v5.txt', header = T, sep = '\t',check.names = FALSE)
dat$AridityIndex_wc2 = NULL
names(dat) = gsub('_18s_richness','', names(dat))
names(dat) = gsub('Richness_bacteria','Bacteria', names(dat))
names(dat) = gsub('_wc2','',names(dat))
names(dat) = gsub('WaterHoldingCapacity','WHC',names(dat))
names(dat) = gsub('Proportion_decomposers','Decomposers',names(dat))
names(dat) = gsub('Proportion_symbiotic_fungi','Sym_fungi', names(dat))
names(dat) = gsub('Pathogen_control','Pathogen_ctrl',names(dat))
names(dat) = gsub('Soil_respiration','Soil_resp',names(dat))
names(dat) = gsub('Soil_salinity','Salinity',names(dat))
names(dat) = gsub('Soil_pH','pH', names(dat))
names(dat) = gsub('Clay_silt_c','Clay_silt',names(dat))
names(dat) = gsub('Soil_ORC_g_kg','SOC', names(dat))
names(dat) = gsub('Aridity_Index_v3','AridityIndex', names(dat))
dat$Salinity = log(dat$Salinity)

预设一些向量,以便后文直接引用

div_var = c("Oomycota","Bacteria", "Fungi", "Nematoda","Ciliophora", 
            "Amoebozoa","Excavata", "Apicomplexa","Platyhelminthes", "Annelida", 
            "Rotifera","Cercozoa", "Ochrophyta", "Dinoflagellata", "Arachnida", 
            "Collembola",  "Neoptera","Myriapoda", "Tardigrada",   "Chlorophyta")

protist = c("Cercozoa", "Ciliophora", "Chlorophyta", "Amoebozoa", "Excavata", 
            "Ochrophyta", "Apicomplexa", "Oomycota", "Dinoflagellata")

invertebrate = c("Nematoda", "Rotifera", "Arachnida", "Tardigrada", 
                 "Neoptera", "Collembola", "Annelida", "Myriapoda","Platyhelminthes")

func_var = c("Porosity","WHC","NH4","NO3","PO4","NAG","Soil_resp",
              "NPP","Pathogen_ctrl" ,"Sym_fungi","Decomposers")

env_var = c("pH", 'Clay_silt' ,"Salinity", "SOC", "Plant_cover", "MAT", "PSEA", "MDR", "TSEA",'AridityIndex')

数据概览

diversity = melt(subset(dat, select = c(div_var,'Eco_corrected')), id.vars = 'Eco_corrected', variable.name = 'Diversity')
mycol = pal_lancet()(9)
div_eco_p = ggplot(diversity, aes(Eco_corrected, value, fill = Eco_corrected)) +
  facet_wrap(.~Diversity, scales = 'free') +
  geom_boxplot()+
  scale_fill_manual(values = mycol)+
  stat_compare_means(label = 'p.signif',color = 'black',label.x.npc = 0.5, label.y.npc = 0.8)+
  theme_classic()+
  labs(x = NULL, y = NULL, fill = NULL) +
  theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),
        strip.placement = 'outside',
        legend.position = 'top', legend.direction = 'horizontal')
ggsave(div_eco_p, filename = 'diversity_forest.jpg', width = 8, height = 8, dpi = 300)

比较线性和非线性拟合的效果,非线性拟合效果好,则意味着存在有价值的拐点

通过置换检验,进行一千次比较

source('utils.R')
threshold_check = compute_pairs(env_var, div_var, dat, log.y = F)
##Check results

gam_test = threshold_check$gam_test
gam_test$Var1 = factor(gam_test$Var1, levels = c('gam','Quad', 'lineal'))
gam_test$Xvar = gsub('AridityIndex','Wat bal', gam_test$Xvar)
gam_test$Xvar = gsub('Plant_cover', 'Plt cov', gam_test$Xvar)
head(gam_test)
gam_test_plot = ggplot(subset(gam_test,Xvar != 'Clay_silt'), aes(Xvar, Freq/1000, fill = Var1)) +
  geom_bar(stat = 'identity') +
  geom_hline(yintercept = 0.5, lwd = 1, color = 'black') +
  facet_wrap(.~Yvar)+
  labs(x = NULL, y = 'Frequency', fill = 'fitting model') +
  theme_classic()+
  scale_y_continuous(labels = label_percent()) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        legend.direction = 'horizontal',
        legend.position = 'top')
ggsave(gam_test_plot, filename = 'gam_test_new.jpg', width = 8, height = 8, dpi = 300)

gam_test_new.jpg

线性拟合占比超过一半,说明线性拟合效果更好,反之,非线性拟合效果好,存在拐点

获取潜在的数据拐点

threshold = subset(threshold_check$thresholds, method == 'all')

dataLine <- threshold %>%
  group_by(Xvar,Yvar) %>%
  summarize(mean_threshold = mean(thresholds),median_threshold = median(thresholds))

fin_db = threshold_check$fin_db

获取线性拟合占有的XY组合,以便在后续绘图添加拐点标识时加以区分

No_threshold_XY = mutate(subset(gam_test, Freq > 500 & Var1 == 'lineal'), pair = paste0(Xvar,Yvar))$pair

评价哪些变量的拐点更具保守性,可能意味着其更重要。

avd_Xvar = avd(subset(dataLine, !paste0(Xvar,Yvar) %in% No_threshold_XY & Xvar != 'Clay_silt'), "Xvar") %>% arrange(desc(AVD))
avd_Xvar$Xvar = gsub('Plant_cover','Plt cov', avd_Xvar$Xvar)
avd_Xvar$Xvar = gsub('AridityIndex', 'Wat avbl', avd_Xvar$Xvar)
avd_Xvar$Xvar = factor(avd_Xvar$Xvar, levels = avd_Xvar$Xvar)


avd_Xvar_plot = ggplot(avd_Xvar, aes(Xvar, AVD)) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0))+
  geom_bar(stat = 'identity', show.legend = F, fill = 'grey', color = 'black', width = 0.75)+
  theme_classic() +
  labs(x = NULL, y = 'Average variation degree', fill = NULL) +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))
# write.csv(data.frame(avd_Xvar, group = 'Total'), file = 'avd.env.csv')

以干旱度指数为例,绘制拐点分布图及拐点前后的相关关系的变化

1000次非线性拟合确认的拐点分布图

dataLine_aridity = data.frame(subset(dataLine, Xvar == 'AridityIndex') %>% arrange(mean_threshold))
threshold_aridity = subset(threshold, Xvar == "AridityIndex")
threshold_aridity$Yvar = factor(threshold_aridity$Yvar, levels = dataLine_aridity$Yvar)
threshold_aridity$Group = ifelse(threshold_aridity$Yvar %in% invertebrate, 'Invertebrate',
                                 ifelse(threshold_aridity$Yvar %in% protist, 'Protist', 
                                        as.character(threshold_aridity$Yvar)))

library(ggsci)
mycol = pal_lancet()(9)
aridity_threshods = ggplot(threshold_aridity, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(color = 'black',show.legend = T) +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'Water availability',fill = NULL) +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'),
        legend.text = element_text(size = 16, color = 'black'),
        legend.background = element_blank(),
        legend.position = c(0.8,0.55),
        legend.key.height = unit(0.6,'cm'))

取1000次拐点的均值为最终的拐点,并比较拐点前后的相关性变化

No_threshold_aridity_Y = subset(gam_test, Xvar == 'Wat bal' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_aridity = subset(fin_db, Xvar == 'AridityIndex')
fin_db_aridity$Xvar = gsub('AridityIndex','Wat avbl', fin_db_aridity$Xvar)
fin_db_aridity$Yvar = factor(fin_db_aridity$Yvar, levels = rev(dataLine_aridity$Yvar))
dataLine_aridity$Yvar = factor(dataLine_aridity$Yvar, levels = dataLine_aridity$Yvar)
aridity_break_plot <- ggplot(fin_db_aridity, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  # geom_point(size = 3)+
  geom_smooth(data = subset(fin_db_aridity, !Yvar %in% No_threshold_aridity_Y), 
              aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_aridity, Yvar %in% No_threshold_aridity_Y), 
              aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_aridity, !Yvar %in% No_threshold_aridity_Y),  
             aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'Wat avbl', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(0.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())

同理针对其他环境变量

dataLine_psea = data.frame(subset(dataLine, Xvar == 'PSEA') %>% arrange(mean_threshold))
threshold_psea = subset(threshold, Xvar == "PSEA")
threshold_psea$Yvar = factor(threshold_psea$Yvar, levels = dataLine_psea$Yvar)
threshold_psea$Group = ifelse(threshold_psea$Yvar %in% invertebrate, 'Invertebrate',
                              ifelse(threshold_psea$Yvar %in% protist, 'Protist', 
                                     as.character(threshold_psea$Yvar)))
psea_threshods = ggplot(threshold_psea, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'PSEA',fill = NULL) +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_psea_Y = subset(gam_test, Xvar == 'PSEA' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_psea = subset(fin_db, Xvar == 'PSEA')
fin_db_psea$Yvar = factor(fin_db_psea$Yvar, levels = rev(dataLine_psea$Yvar))
dataLine_psea$Yvar = factor(dataLine_psea$Yvar, levels = dataLine_psea$Yvar)
psea_break_plot <- ggplot(fin_db_psea, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_psea, !Yvar %in% No_threshold_psea_Y), 
              aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_psea, Yvar %in% No_threshold_psea_Y), 
              aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_psea, !Yvar %in% No_threshold_psea_Y),  
             aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'PSEA', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())

dataLine_Plant_cover = data.frame(subset(dataLine, Xvar == 'Plant_cover') %>% arrange(mean_threshold))
threshold_Plant_cover = subset(threshold, Xvar == "Plant_cover")
threshold_Plant_cover$Yvar = factor(threshold_Plant_cover$Yvar, levels = dataLine_Plant_cover$Yvar)
threshold_Plant_cover$Group = ifelse(threshold_Plant_cover$Yvar %in% invertebrate, 'Invertebrate',
                              ifelse(threshold_Plant_cover$Yvar %in% protist, 'Protist', 
                                     as.character(threshold_Plant_cover$Yvar)))
Plant_cover_threshods = ggplot(threshold_Plant_cover, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'Plant cover', fill = NULL) +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_Plant_cover_Y = subset(gam_test, Xvar == 'Plt cov' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_Plant_cover = subset(fin_db, Xvar == 'Plant_cover')
fin_db_Plant_cover$Yvar = factor(fin_db_Plant_cover$Yvar, levels = rev(dataLine_Plant_cover$Yvar))
dataLine_Plant_cover$Yvar = factor(dataLine_Plant_cover$Yvar, levels = dataLine_Plant_cover$Yvar)
Plant_cover_break_plot <- ggplot(fin_db_Plant_cover, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_Plant_cover, !Yvar %in% No_threshold_Plant_cover_Y), 
              aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_Plant_cover, Yvar %in% No_threshold_Plant_cover_Y), 
              aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_Plant_cover, !Yvar %in% No_threshold_Plant_cover_Y),  
             aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'Plt cov', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())



dataLine_Salinity = data.frame(subset(dataLine, Xvar == 'Salinity') %>% arrange(mean_threshold))
threshold_Salinity = subset(threshold, Xvar == "Salinity")
threshold_Salinity$Yvar = factor(threshold_Salinity$Yvar, levels = dataLine_Salinity$Yvar)
threshold_Salinity$Group = ifelse(threshold_Salinity$Yvar %in% invertebrate, 'Invertebrate',
                                  ifelse(threshold_Salinity$Yvar %in% protist, 'Protist', 
                                         as.character(threshold_Salinity$Yvar)))
Salinity_threshods = ggplot(threshold_Salinity, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'log(Salinity)') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_Salinity_Y = subset(gam_test, Xvar == 'Salinity' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_Salinity = subset(fin_db, Xvar == 'Salinity')
fin_db_Salinity$Yvar = factor(fin_db_Salinity$Yvar, levels = rev(dataLine_Salinity$Yvar))
dataLine_Salinity$Yvar = factor(dataLine_Salinity$Yvar, levels = dataLine_Salinity$Yvar)
Salinity_break_plot <- ggplot(fin_db_Salinity, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_Salinity, !Yvar %in% No_threshold_Salinity_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_Salinity, Yvar %in% No_threshold_Salinity_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_Salinity, !Yvar %in% No_threshold_Salinity_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'log(Salinity)', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())


dataLine_TSEA = data.frame(subset(dataLine, Xvar == 'TSEA') %>% arrange(mean_threshold))
threshold_TSEA = subset(threshold, Xvar == "TSEA")
threshold_TSEA$Yvar = factor(threshold_TSEA$Yvar, levels = dataLine_TSEA$Yvar)
threshold_TSEA$Group = ifelse(threshold_TSEA$Yvar %in% invertebrate, 'Invertebrate',
                              ifelse(threshold_TSEA$Yvar %in% protist, 'Protist', 
                                     as.character(threshold_TSEA$Yvar)))
TSEA_threshods = ggplot(threshold_TSEA, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'TSEA') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_TSEA_Y = subset(gam_test, Xvar == 'TSEA' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_TSEA = subset(fin_db, Xvar == 'TSEA')
fin_db_TSEA$Yvar = factor(fin_db_TSEA$Yvar, levels = rev(dataLine_TSEA$Yvar))
dataLine_TSEA$Yvar = factor(dataLine_TSEA$Yvar, levels = dataLine_TSEA$Yvar)
TSEA_break_plot <- ggplot(fin_db_TSEA, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_TSEA, !Yvar %in% No_threshold_TSEA_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_TSEA, Yvar %in% No_threshold_TSEA_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_TSEA, !Yvar %in% No_threshold_TSEA_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'TSEA', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())



dataLine_pH = data.frame(subset(dataLine, Xvar == 'pH') %>% arrange(mean_threshold))
threshold_pH = subset(threshold, Xvar == "pH")
threshold_pH$Yvar = factor(threshold_pH$Yvar, levels = dataLine_pH$Yvar)
threshold_pH$Group = ifelse(threshold_pH$Yvar %in% invertebrate, 'Invertebrate',
                            ifelse(threshold_pH$Yvar %in% protist, 'Protist',
                                   as.character(threshold_pH$Yvar)))
pH_threshods = ggplot(threshold_pH, aes(Yvar, thresholds,fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'pH') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_pH_Y = subset(gam_test, Xvar == 'pH' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_pH = subset(fin_db, Xvar == 'pH')
fin_db_pH$Yvar = factor(fin_db_pH$Yvar, levels = rev(dataLine_pH$Yvar))
dataLine_pH$Yvar = factor(dataLine_pH$Yvar, levels = dataLine_pH$Yvar)
pH_break_plot <- ggplot(fin_db_pH, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_pH, !Yvar %in% No_threshold_pH_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_pH, Yvar %in% No_threshold_pH_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_pH, !Yvar %in% No_threshold_pH_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'pH', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())



dataLine_MAT = data.frame(subset(dataLine, Xvar == 'MAT') %>% arrange(mean_threshold))
threshold_MAT = subset(threshold, Xvar == "MAT")
threshold_MAT$Yvar = factor(threshold_MAT$Yvar, levels = dataLine_MAT$Yvar)
threshold_MAT$Group = ifelse(threshold_MAT$Yvar %in% invertebrate, 'Invertebrate',
                             ifelse(threshold_MAT$Yvar %in% protist, 'Protist', 
                                    as.character(threshold_MAT$Yvar)))
MAT_threshods = ggplot(threshold_MAT, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'MAT') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_MAT_Y = subset(gam_test, Xvar == 'MAT' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_MAT = subset(fin_db, Xvar == 'MAT')
fin_db_MAT$Yvar = factor(fin_db_MAT$Yvar, levels = rev(dataLine_MAT$Yvar))
dataLine_MAT$Yvar = factor(dataLine_MAT$Yvar, levels = dataLine_MAT$Yvar)
MAT_break_plot <- ggplot(fin_db_MAT, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_MAT, !Yvar %in% No_threshold_MAT_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_MAT, Yvar %in% No_threshold_MAT_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_MAT, !Yvar %in% No_threshold_MAT_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'MAT', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())

dataLine_SOC = data.frame(subset(dataLine, Xvar == 'SOC') %>% arrange(mean_threshold))
threshold_SOC = subset(threshold, Xvar == "SOC")
threshold_SOC$Yvar = factor(threshold_SOC$Yvar, levels = dataLine_SOC$Yvar)
threshold_SOC$Group = ifelse(threshold_SOC$Yvar %in% invertebrate, 'Invertebrate',
                             ifelse(threshold_SOC$Yvar %in% protist, 'Protist', 
                                    as.character(threshold_SOC$Yvar)))
SOC_threshods = ggplot(threshold_SOC, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'SOC') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_SOC_Y = subset(gam_test, Xvar == 'SOC' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_SOC = subset(fin_db, Xvar == 'SOC')
fin_db_SOC$Yvar = factor(fin_db_SOC$Yvar, levels = rev(dataLine_SOC$Yvar))
dataLine_SOC$Yvar = factor(dataLine_SOC$Yvar, levels = dataLine_SOC$Yvar)
SOC_break_plot <- ggplot(fin_db_SOC, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_SOC, !Yvar %in% No_threshold_SOC_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_SOC, Yvar %in% No_threshold_SOC_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_SOC, !Yvar %in% No_threshold_SOC_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'SOC', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())



dataLine_MDR = data.frame(subset(dataLine, Xvar == 'MDR') %>% arrange(mean_threshold))
threshold_MDR = subset(threshold, Xvar == "MDR")
threshold_MDR$Yvar = factor(threshold_MDR$Yvar, levels = dataLine_MDR$Yvar)
threshold_MDR$Group = ifelse(threshold_MDR$Yvar %in% invertebrate, 'Invertebrate',
                             ifelse(threshold_MDR$Yvar %in% protist, 'Protist', 
                                    as.character(threshold_MDR$Yvar)))
MDR_threshods = ggplot(threshold_MDR, aes(Yvar, thresholds, fill = Group)) +
  geom_boxplot(show.legend = F,color = 'black') +
  scale_fill_jco()+
  coord_flip() +
  labs(x = 'Biodiversity', y = 'MDR') +
  theme_classic() +
  theme(axis.title = element_text(size = 12, color = 'black'),
        axis.text = element_text(size = 12, color = 'black'))

No_threshold_MDR_Y = subset(gam_test, Xvar == 'MDR' & Freq > 500 & Var1 == 'lineal')$Yvar
fin_db_MDR = subset(fin_db, Xvar == 'MDR')
fin_db_MDR$Yvar = factor(fin_db_MDR$Yvar, levels = rev(dataLine_MDR$Yvar))
dataLine_MDR$Yvar = factor(dataLine_MDR$Yvar, levels = dataLine_MDR$Yvar)
MDR_break_plot <- ggplot(fin_db_MDR, aes(x = x, y = y))+
  theme_classic()+
  facet_grid(Yvar~., scales = 'free',switch = 'both') +
  geom_smooth(data = subset(fin_db_MDR, !Yvar %in% No_threshold_MDR_Y), aes(group = group),method = "lm",formula = y~x, size = 0.5,color = 'black')+
  geom_smooth(data = subset(fin_db_MDR, Yvar %in% No_threshold_MDR_Y), aes(group = 1),method = "lm",formula = y~x, size = 0.5,color = 'grey')+
  geom_vline(data = subset(dataLine_MDR, !Yvar %in% No_threshold_MDR_Y),  aes(xintercept = mean_threshold), lty = 1, color = 'blue', lwd = 0.5)+  
  guides(color = guide_legend(override.aes = list(size = 8))) +
  labs(x = 'MDR', y = NULL, color = NULL) +
  theme(axis.title.y = element_blank(),
        # axis.title.x = element_text(size = 12, color = 'black'),
        axis.text.y = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(color = 'black', size = 0.5),
        strip.placement = 'outside',
        strip.text = element_blank(),
        panel.spacing.y = unit(.1, 'cm'),
        plot.margin = margin(b = 25, t = 10),
        strip.background = element_blank(),
        panel.grid = element_blank())

多图合并

主图

avd_plot = ggarrange(aridity_threshods, aridity_break_plot,NA,
                     psea_threshods, psea_break_plot, NA, 
                     pH_threshods, pH_break_plot, 
                     ncol = 8, widths = c(1,.2,.1,1,.2,.1,1,.2),
                     labels = c('a','','','b','','','c',''),
                     font.label = list(size = 20, color = 'black'),
                     common.legend = T, legend = 'bottom')
ggsave(avd_plot, filename = 'Figure1_new.jpg', width = 12, height = 5, dpi = 600)
# ggsave(avd_plot, filename = 'Figure1.pdf', width = 12, height = 5)
Figure1_new.jpg

作为附图

figure.S2 = ggarrange(ggarrange(avd_Xvar_plot, NA,
                                Salinity_threshods, Salinity_break_plot,NA,
                                Plant_cover_threshods, Plant_cover_break_plot,NA,
                                TSEA_threshods, TSEA_break_plot,
                                ncol = 10, 
                                widths = c(1.2,.1,1,.2,.1,1,.2,.1,1,.2,.1,1,.2),
                                labels = c('a','','b','','','c','','','d',''),
                                font.label = list(size = 20, color = 'black')),
                      ggarrange(MAT_threshods, MAT_break_plot,NA,
                                SOC_threshods, SOC_break_plot,NA,
                                MDR_threshods, MDR_break_plot,
                                ncol = 10, 
                                widths = c(1,.2,.1,1,.2,.1,1,.2,.1,1,.2),
                                labels = c('e','','','f','','','g'),
                                font.label = list(size = 20, color = 'black')),
                      nrow = 2, heights = c(1,1))
ggsave(figure.S2, filename = 'Figure.S3_new.jpg', width = 20, height = 12, dpi = 600)
Figure.S3_new.jpg
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。
禁止转载,如需转载请通过简信或评论联系作者。

相关阅读更多精彩内容

友情链接更多精彩内容