效果图
rm(list=ls())
setwd("Z:\\Alcanivorax\\MT_analysis\\COGID_base_stat")
library(readxl)
library(tidyverse)
library(fastGetColors)
library(ggpubr)
library(patchwork)
library(ggplot2)
library(ggsignif)
samplegroup<- read_xlsx("Z:\\Alcanivorax\\MT_analysis\\samplegroup.xlsx")
file_name <- "Pseudoalteromonas_FPKM_COG_base.count.table"
data <- read.table(file = file_name,header = T)
resortByID<- function(input_data,name_seq){
data_tmp <- input_data
for(i in c(1:length(name_seq))){
index <- which(colnames(input_data)==name_seq[i])
# cat(index)
data_tmp[,i] <- input_data[,index]
}
colnames(data_tmp) <- name_seq
return(data_tmp)
}
order <- samplegroup$SampleID
data2<- resortByID(data,order)
data3 <- data2
colsum <- colSums(data2)
n <- length(colsum)
for (i in 1:n) {
for (j in 1:length(data2[,1])) {
data3[j,i] <- data3[j,i]/colsum[i]
}
}
name <- "AllCOG.txt"
COG_list_names <- name
COG_list_A <- read.delim(paste0("Z:\\Alcanivorax\\MT_analysis\\COGID_base_stat\\keypathway\\",COG_list_names))
rownames(COG_list_A) <- COG_list_A$COG_Number
COG_list <- COG_list_A$COG_Number
index <- which(COG_list %in% rownames(data3) ==T )
keypathway <- as.data.frame(matrix(0,length(COG_list),length(data3)))
rownames(keypathway) <- c(COG_list)
colnames(keypathway) <- colnames(data3)
keypathway[index,] <- data3[COG_list[index],]
varechem <- read_excel("Z:\\Alcanivorax\\MT_analysis\\info_flitered.xlsx",col_names = T)
rownames(varechem) <- varechem$SampleID
varechem2 <- varechem[,-1]
rownames(varechem2) <- varechem$SampleID
varechem<- varechem2[colnames(data3),]
rownames(varechem) <- colnames(data3)
test<- arrange(varechem,Carbon.total)
order2 <- rownames(test)
keypathway<- resortByID(keypathway,order2)
carbon_total <- data.frame(row.names = rownames(varechem),carbon_total = varechem$Carbon.total)
carbon_total<- t(resortByID(t(carbon_total),order2))
######################################绘图#############################
library(tidyr)
library(ggplot2)
library(wesanderson)
for (cog in rownames(keypathway)) {
COG_i_TPM <- keypathway[cog,]
COG_i_TPM_long <- pivot_longer(COG_i_TPM,cols = starts_with("ERR"),names_to = "sample",values_to = "Value")
COG_i_TPM_long$sample <- factor(COG_i_TPM_long$sample,levels = order2)
Kn <- 1
if(max(COG_i_TPM_long$Value)>0){
Kn <- max(carbon_total)/max(COG_i_TPM_long$Value)
}
carbon_total <- carbon_total/Kn
color <- c(rep("#386cb0",10),rep("#f02a7f",37),rep("#bf5b16",11))
COG_i_TPM_long <- cbind(COG_i_TPM_long,carbon_total,color)
# COG_i_TPM_long_s <- COG_i_TPM_long
# #去除carbontotal=0的站位
# COG_i_TPM_long_0 <- COG_i_TPM_long
# COG_i_TPM_long_0 <- COG_i_TPM_long[which(COG_i_TPM_long$carbon_total!=0),]
# COG_i_TPM_long_0 <- COG_i_TPM_long_0[which(Kn*COG_i_TPM_long_0$carbon_total<=0.002),]
# COG_i_TPM_long <- COG_i_TPM_long_0
#[11:47] #>0 && <0.02 total carbon分布均匀
#48:58 #高浓度
P_barplot<- ggplot(COG_i_TPM_long, aes(x = sample, y = Value)) +
geom_bar(stat = "identity",color= color,alpha =0.8,fill= color) +
geom_line(aes(y= carbon_total,group=1),color = "red",size=0.5)+
geom_point(aes(y= carbon_total),color = "blue",size=.8)+
scale_y_continuous(
name = "presence",
sec.axis = sec_axis(~ . * Kn, name = "Carbon total") # 第二坐标轴
)+
labs(title = paste(cog,COG_list_A[cog,1]), x = "Sample", y = "Presence",) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none")
P_reline_plot<- ggplot(COG_i_TPM_long, aes(x = carbon_total, y = Value)) +
geom_point(color = "blue", size = 2) + # 绘制散点图
geom_smooth(method = "lm", color = "red", se = TRUE) + # 添加回归曲线,se = TRUE 显示置信区间
stat_cor(method = "pearson", aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
label.x.npc = "left", label.y.npc = "top", size = 5) + # 显示相关系数和 P 值
labs(title = cog,
x = "Carbon total",
y = "Gene Expression(TPM)") +
theme_minimal()
COG_i_TPM_long_part1 <- COG_i_TPM_long[11:47,] #均匀浓度
COG_i_TPM_long_part2 <- COG_i_TPM_long[48:58,] #高有机物浓度
P_reline_plot_part1<- ggplot(COG_i_TPM_long_part1, aes(x = carbon_total, y = Value)) +
geom_point(color = "#f02a7f", size = 2) + # 绘制散点图
geom_smooth(method = "lm", color = "red", se = TRUE) + # 添加回归曲线,se = TRUE 显示置信区间
stat_cor(method = "pearson", aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
label.x.npc = "left", label.y.npc = "top", size = 5) + # 显示相关系数和 P 值
labs(title = paste(cog,"Carbon total (正常浓度范围)"),
x = "Carbon total (正常浓度范围)",
y = "Gene Expression(TPM)") +
theme_minimal()
P_reline_plot_part2<- ggplot(COG_i_TPM_long_part2, aes(x = carbon_total, y = Value)) +
geom_point(color = "#bf5b16", size = 2) + # 绘制散点图
geom_smooth(method = "lm", color = "red", se = TRUE) + # 添加回归曲线,se = TRUE 显示置信区间
stat_cor(method = "pearson", aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
label.x.npc = "left", label.y.npc = "top", size = 5) + # 显示相关系数和 P 值
labs(title = paste(cog,"Carbon total(高浓度)"),
x = "Carbon total(高浓度)",
y = "Gene Expression(TPM)") +
theme_minimal()
#绘制检验图
part1 <- COG_i_TPM_long_part1$Value
part2 <- COG_i_TPM_long_part2$Value
data <- data.frame(
value = c(part1, part2),
group = factor(c(rep("carbon total(normal)", length(part1)), rep("carbon total(hight)", length(part2))))
)
# 进行 t 检验
t_test_result <- t.test(part1, part2)
p_value <- t_test_result$p.value
# 绘制箱线图和点图,并标记显著性
P_t_test<- ggplot(data, aes(x = group, y = value, fill = group)) +
geom_boxplot(outlier.shape = NA) + # 绘制箱线图
geom_jitter(width = 0.2, size = 2) + # 绘制点图
scale_fill_manual(values = c("#bf5b16","#f02a7f")) + # 自定义颜色
geom_signif(comparisons = list(c("carbon total(normal)", "carbon total(hight)")), # 标记显著性
map_signif_level = TRUE) +
labs(title = "Boxplot with Significance",
x = "Group",
y = "Value") +
theme_minimal()
combined_plot <- P_barplot + (P_reline_plot+P_reline_plot_part1)/(P_reline_plot_part2 + P_t_test)
print(combined_plot)
# 保存合并后的图像
ggsave(filename = paste0("Z:\\Alcanivorax\\MT_analysis\\COGID_base_stat\\keypathway\\Pseudoalteromonas\\",paste0(cog),"_bar_plot.png"),combined_plot,
width = 15, height = 8)
}