组学关联分析之——ggplot绘制九象限图

Part1. 数据及包的载入

rm(list = ls())


#载入相关的R包;
library(dplyr)
library(ggplot2)
library(ggrepel)


#读入两个差异分析表(分别是转录组和翻译组)并合并;
#设置工作目录;
setwd("E:/R/demo/九象限图")
#查看工作目录中的文件;
dir()
#[1] "protein_exp.csv" "RNA_exp.csv"
#读入数据;
RNA =read.csv("RNA_exp.csv",header=T)
protein=read.csv("protein_exp.csv",header=T)

#查看数据的维度;
dim(RNA)
head(RNA)

dim(protein)
head(protein)

Part2. 两类组学数据的整合

#通常组学的整合取交集
#合并两个表格。选择表头为"id"的列进行合并(取交集);
#suffixes:如果两个表列名相同,则会在列名后加入suffixes(后缀)参数中对应的后缀;
#all.x=FALSE,all.y=FALSE,表示输出的是x,y表格的交集;
combine= merge(RNA,protein,
               by.x="id",
               by.y="id",
               suffixes = c("_RNA","_Protein") ,
               all.x=FALSE,
               all.y=FALSE)
#查看合并后表格的维度;
dim(combine)
#[1] 3619 7
#保存合并后的数据;

Part3. 数据分组及相关性分析

#提取用于作图的列组成新数据框;
data <- data.frame(combine[c(1,3,4,5,6,7)])
#查看前6行;
head(data)


#对数据进行分组;
#生成显著上下调数据标签;

data$part <- case_when(
  # 条件1: DEPs和DEGs都上调
  (data$log2FC_RNA >= 1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Up",
  
  # 条件2: DEPs和DEGs都下调
  (data$log2FC_RNA <= -1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Down",
  
  # 条件3: DEPs上调,DEGs下调
  (data$log2FC_RNA <= -1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Up_DEGs_Down",
  
  # 条件4: DEPs下调,DEGs上调
  (data$log2FC_RNA >= 1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Down_DEGs_Up",
  
  # 条件5: only_DEG_Up
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA >= 1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Up",
  
  # 条件6: only_DEG_Down
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA <= -1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Down",
  
  # 条件7: only_DEP_Up
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein >= log2(1.5) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Up",
  
  # 条件8: only_DEP_Down
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein <= log2(2/3) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Down",
  
  # 条件9: BothNone
  ((log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5)) | 
     data$Qvalue_Protein >= 0.05) &
    (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA >= 0.05) ~
    "BothNone",
   
  #TRUE ~ "Other" # 默认情况
)

head(data)
data %>% count(part)  #统计分组情况

Part 4. 提取满足条件的数据并进行相关性分析


# 提取满足条件的数据
filtered_data_same_trend <- data[data$part == "DEPs_DEGs_SameTrend_Up" |
                                   data$part == "DEPs_DEGs_SameTrend_Down", ]

filtered_data_cor_opposite<- data[data$part == " DEPs_Down_DEGs_Up" | 
                                    data$part == "DEPs_Up_DEGs_Down", ]
# 查看提取的数据
head(filtered_data_same_trend)
head(filtered_data_cor_opposite)


#### 计算数据相关性  ####
cor_same_trend = round(cor(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein),2)
cor_opposite = round(cor(filtered_data_cor_opposite$log2FC_RNA,filtered_data_cor_opposite$log2FC_Protein),2)


correlation_test <- cor.test(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein, method = "pearson")

cor_same_trend
cor_opposite

Part 5.1 绘图—— 直角坐标系

#### 3.1 绘制直角坐标系绘图 ####
{
# 定义绘制坐标轴函数:
draw_axis_line <- function(length_x, length_y, 
                           tick_step = NULL, lab_step = NULL){
  axis_x_begin <- -1*length_x
  axis_x_end <- length_x
  
  axis_y_begin  <- -1*length_y
  axis_y_end    <- length_y
  
  if (missing(tick_step))
    tick_step <- 1
  
  if (is.null(lab_step))
    lab_step <- 2
  
  # axis ticks data
  tick_x_frame <- data.frame(ticks = seq(axis_x_begin, axis_x_end, 
                                         by = tick_step))
  
  tick_y_frame <-  data.frame(ticks = seq(axis_y_begin, axis_y_end, 
                                          by = tick_step))
  
  # axis labels data
  lab_x_frame <- subset(data.frame(lab = seq(axis_x_begin, axis_x_end, 
                                             by = lab_step), zero = 0), 
                        lab != 0)
  
  lab_y_frame <- subset(data.frame(lab = seq(axis_y_begin, axis_y_end,
                                             by = lab_step),zero = 0), 
                        lab != 0)
  
  # set tick length
  tick_x_length = 0.05
  tick_y_length = 0.05
  
  # set zero point
  
  data <- data.frame(x = 0, y = 0)
  p <- ggplot(data = data) +
    
    # draw axis line
    geom_segment(y = 0, yend = 0, 
                 x = axis_x_begin, 
                 xend = axis_x_end,
                 size = 0.5) + 
    geom_segment(x = 0, xend = 0, 
                 y = axis_y_begin, 
                 yend = axis_y_end,
                 size = 0.5) +
    # x ticks
    geom_segment(data = tick_x_frame, 
                 aes(x = ticks, xend = ticks, 
                     y = 0, yend = 0 - tick_x_length)) +
    # y ticks
    geom_segment(data = tick_y_frame, 
                 aes(x = 0, xend = 0 - tick_y_length, 
                     y = ticks, yend = ticks)) + 
    
    # labels
    geom_text(data=lab_x_frame, aes(x=lab, y=zero, label=lab), vjust = 1.5) +
    geom_text(data=lab_y_frame, aes(x=zero, y=lab, label=lab), hjust= 1.5) +
    theme_minimal()+
    theme(panel.grid = element_blank(),axis.text = element_blank())
  return(p)
}

p <- draw_axis_line(8, 4)
p1 <- p + geom_point(data=data, aes(data$log2FC_RNA, data$log2FC_Protein, color = part))+
  scale_color_manual(values = c("DEPs_DEGs_SameTrend_Up" = "#dd8653",
                                "DEPs_DEGs_SameTrend_Down" = "#dd8653",
                                "DEPs_Up_DEGs_Down" = "#59a5d7",
                                "DEPs_Up_DEGs_Up" = "#59a5d7", 
                                "only_DEG_Up" = "#aa65a4", 
                                "only_DEG_Down" = "#aa65a4",
                                "only_DEP_Up" = "#99CC00",
                                "only_DEP_Down" = "#99CC00",
                                "BothNone"="#878787"),
                     values=alpha(0.7),
                     breaks = c("DEPs_DEGs_SameTrend_Up",
                                "DEPs_DEGs_SameTrend_Down",
                                "DEPs_Up_DEGs_Down",
                                "DEPs_Up_DEGs_Up", 
                                "only_DEG_Up" , 
                                "only_DEG_Down",
                                "only_DEP_Up",
                                "only_DEP_Down",
                                "BothNone"))+
  xlab("mRNA:FC(5mM Selenite/CK)")+
  ylab("Protein:FC(5mM Selenite/CK)")+
  theme(legend.position = "bottom")+
  annotate("text", label = "bolditalic(Brain)", parse = TRUE, 
           x = -2, y = 2, size = 4, colour = "black")+
  guides(color = guide_legend(title = "", ncol = 1, byrow = TRUE))
}

Part 5.2 绘图—— ggplot绘图


#开始尝试绘图;
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))
#添加散点;
p1 <- p0+geom_point(size=1.2)+guides(color="none")
p1


#自定义半透明颜色;
mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")
#颜色对应数据分组,可自行修改

p2 <- p1 + scale_colour_manual(name="",values=alpha(mycolor,0.7))
p2


#添加辅助线;
p3 <- p2+geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")
p3


#调整横轴和纵轴绘图区域的范围;
#设置y轴范围(上下两端的空白区域设为1),修改刻度标签;
#expansion函数设置坐标轴范围两端空白区域的大小;mult为“倍数”模式,add为“加性”模式;
p4<-p3+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))
p4


#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
#隐藏纵轴,并对字体样式、坐标轴的粗细、颜色、刻度长度进行限定;
mytheme<-theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))
#应用自定义主题;
p5 <- p4+mytheme
p5


#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")

#重新进行绘图;
p6 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")
p6


#自定义半透明颜色;
mycolor <- c("#99CC00","#FF9999","#c77cff","gray90")
p7 <- p6 + scale_colour_manual(name="",values=alpha(mycolor,0.7))+mytheme
p7

#添加辅助线并修改坐标轴范围;
p8 <- p7+geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                   limits = c(-3, 3),
                   breaks = c(-3,-2,-1,0,1,2,3),
                   label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  annotate("text", x = 3, y = 2,
           label =  c('DEPs_DEGs_SameTrend 89', 'DEPs_DEGs_Opposite 72' ), 
           parse = TRUE)
p8


#计算两个组学差异倍数的相关性,并取2位小数;
cor = round(cor(data$log2FC_RNA,data$log2FC_Protein),2)
#准备作为图形的标题;
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")

lab
#[1] "correlation=0.35"
#在图上添加文字标签;
p8+
  geom_text(x = -5., y = 2.5, label = lab, color="black",size = 2)+
  geom_text(x = 4., y = -2.2, label = lab1, color="black",size=2)+
  geom_text(x = 4., y = -2.5, label = lab2, color="black",size=2)

Part 6.1 绘图简易代码——不区分significant和no_significant

mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")



#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2




p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))+
  geom_point(size=1.2)+guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))

Part 6 .2 绘图简易代码——区分significant和no_significant

#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")



lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")


#重新进行绘图;
p1 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  theme_bw()+
  theme(text=element_text(family = "serif",colour ="black",size = 12,face="bold"),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"),
        plot.title = element_text(hjust = 0.5,vjust = 0,size=11))+
  geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  geom_text(x = -4., y = 2.5, label = lab,  color="#000000",size =3.5,fontface="plain")+
  geom_text(x = 4., y = -2.2, label = lab1, color="#000000",size=3.5,fontface="plain")+
  geom_text(x = 4., y = -2.4, label = lab2, color="#000000",size=3.5,fontface="plain")+
  labs(title = "Correlation of Proteome and Transcriptome")

p1  





©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容