组学关联分析之——ggplot绘制九象限图

Part1. 数据及包的载入

rm(list = ls())


#载入相关的R包;
library(dplyr)
library(ggplot2)
library(ggrepel)


#读入两个差异分析表(分别是转录组和翻译组)并合并;
#设置工作目录;
setwd("E:/R/demo/九象限图")
#查看工作目录中的文件;
dir()
#[1] "protein_exp.csv" "RNA_exp.csv"
#读入数据;
RNA =read.csv("RNA_exp.csv",header=T)
protein=read.csv("protein_exp.csv",header=T)

#查看数据的维度;
dim(RNA)
head(RNA)

dim(protein)
head(protein)

Part2. 两类组学数据的整合

#通常组学的整合取交集
#合并两个表格。选择表头为"id"的列进行合并(取交集);
#suffixes:如果两个表列名相同,则会在列名后加入suffixes(后缀)参数中对应的后缀;
#all.x=FALSE,all.y=FALSE,表示输出的是x,y表格的交集;
combine= merge(RNA,protein,
               by.x="id",
               by.y="id",
               suffixes = c("_RNA","_Protein") ,
               all.x=FALSE,
               all.y=FALSE)
#查看合并后表格的维度;
dim(combine)
#[1] 3619 7
#保存合并后的数据;

Part3. 数据分组及相关性分析

#提取用于作图的列组成新数据框;
data <- data.frame(combine[c(1,3,4,5,6,7)])
#查看前6行;
head(data)


#对数据进行分组;
#生成显著上下调数据标签;

data$part <- case_when(
  # 条件1: DEPs和DEGs都上调
  (data$log2FC_RNA >= 1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Up",
  
  # 条件2: DEPs和DEGs都下调
  (data$log2FC_RNA <= -1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_DEGs_SameTrend_Down",
  
  # 条件3: DEPs上调,DEGs下调
  (data$log2FC_RNA <= -1 & data$log2FC_Protein >= log2(1.5) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Up_DEGs_Down",
  
  # 条件4: DEPs下调,DEGs上调
  (data$log2FC_RNA >= 1 & data$log2FC_Protein <= log2(2/3) & 
     data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
    "DEPs_Down_DEGs_Up",
  
  # 条件5: only_DEG_Up
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA >= 1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Up",
  
  # 条件6: only_DEG_Down
  (data$Qvalue_RNA <= 0.05 & data$log2FC_RNA <= -1 & 
     (log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) | 
        data$Qvalue_Protein > 0.05)) ~
    "only_DEG_Down",
  
  # 条件7: only_DEP_Up
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein >= log2(1.5) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Up",
  
  # 条件8: only_DEP_Down
  (data$Qvalue_Protein <= 0.05 & data$log2FC_Protein <= log2(2/3) & 
     (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
    "only_DEP_Down",
  
  # 条件9: BothNone
  ((log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5)) | 
     data$Qvalue_Protein >= 0.05) &
    (-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA >= 0.05) ~
    "BothNone",
   
  #TRUE ~ "Other" # 默认情况
)

head(data)
data %>% count(part)  #统计分组情况

Part 4. 提取满足条件的数据并进行相关性分析


# 提取满足条件的数据
filtered_data_same_trend <- data[data$part == "DEPs_DEGs_SameTrend_Up" |
                                   data$part == "DEPs_DEGs_SameTrend_Down", ]

filtered_data_cor_opposite<- data[data$part == " DEPs_Down_DEGs_Up" | 
                                    data$part == "DEPs_Up_DEGs_Down", ]
# 查看提取的数据
head(filtered_data_same_trend)
head(filtered_data_cor_opposite)


#### 计算数据相关性  ####
cor_same_trend = round(cor(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein),2)
cor_opposite = round(cor(filtered_data_cor_opposite$log2FC_RNA,filtered_data_cor_opposite$log2FC_Protein),2)


correlation_test <- cor.test(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein, method = "pearson")

cor_same_trend
cor_opposite

Part 5.1 绘图—— 直角坐标系

#### 3.1 绘制直角坐标系绘图 ####
{
# 定义绘制坐标轴函数:
draw_axis_line <- function(length_x, length_y, 
                           tick_step = NULL, lab_step = NULL){
  axis_x_begin <- -1*length_x
  axis_x_end <- length_x
  
  axis_y_begin  <- -1*length_y
  axis_y_end    <- length_y
  
  if (missing(tick_step))
    tick_step <- 1
  
  if (is.null(lab_step))
    lab_step <- 2
  
  # axis ticks data
  tick_x_frame <- data.frame(ticks = seq(axis_x_begin, axis_x_end, 
                                         by = tick_step))
  
  tick_y_frame <-  data.frame(ticks = seq(axis_y_begin, axis_y_end, 
                                          by = tick_step))
  
  # axis labels data
  lab_x_frame <- subset(data.frame(lab = seq(axis_x_begin, axis_x_end, 
                                             by = lab_step), zero = 0), 
                        lab != 0)
  
  lab_y_frame <- subset(data.frame(lab = seq(axis_y_begin, axis_y_end,
                                             by = lab_step),zero = 0), 
                        lab != 0)
  
  # set tick length
  tick_x_length = 0.05
  tick_y_length = 0.05
  
  # set zero point
  
  data <- data.frame(x = 0, y = 0)
  p <- ggplot(data = data) +
    
    # draw axis line
    geom_segment(y = 0, yend = 0, 
                 x = axis_x_begin, 
                 xend = axis_x_end,
                 size = 0.5) + 
    geom_segment(x = 0, xend = 0, 
                 y = axis_y_begin, 
                 yend = axis_y_end,
                 size = 0.5) +
    # x ticks
    geom_segment(data = tick_x_frame, 
                 aes(x = ticks, xend = ticks, 
                     y = 0, yend = 0 - tick_x_length)) +
    # y ticks
    geom_segment(data = tick_y_frame, 
                 aes(x = 0, xend = 0 - tick_y_length, 
                     y = ticks, yend = ticks)) + 
    
    # labels
    geom_text(data=lab_x_frame, aes(x=lab, y=zero, label=lab), vjust = 1.5) +
    geom_text(data=lab_y_frame, aes(x=zero, y=lab, label=lab), hjust= 1.5) +
    theme_minimal()+
    theme(panel.grid = element_blank(),axis.text = element_blank())
  return(p)
}

p <- draw_axis_line(8, 4)
p1 <- p + geom_point(data=data, aes(data$log2FC_RNA, data$log2FC_Protein, color = part))+
  scale_color_manual(values = c("DEPs_DEGs_SameTrend_Up" = "#dd8653",
                                "DEPs_DEGs_SameTrend_Down" = "#dd8653",
                                "DEPs_Up_DEGs_Down" = "#59a5d7",
                                "DEPs_Up_DEGs_Up" = "#59a5d7", 
                                "only_DEG_Up" = "#aa65a4", 
                                "only_DEG_Down" = "#aa65a4",
                                "only_DEP_Up" = "#99CC00",
                                "only_DEP_Down" = "#99CC00",
                                "BothNone"="#878787"),
                     values=alpha(0.7),
                     breaks = c("DEPs_DEGs_SameTrend_Up",
                                "DEPs_DEGs_SameTrend_Down",
                                "DEPs_Up_DEGs_Down",
                                "DEPs_Up_DEGs_Up", 
                                "only_DEG_Up" , 
                                "only_DEG_Down",
                                "only_DEP_Up",
                                "only_DEP_Down",
                                "BothNone"))+
  xlab("mRNA:FC(5mM Selenite/CK)")+
  ylab("Protein:FC(5mM Selenite/CK)")+
  theme(legend.position = "bottom")+
  annotate("text", label = "bolditalic(Brain)", parse = TRUE, 
           x = -2, y = 2, size = 4, colour = "black")+
  guides(color = guide_legend(title = "", ncol = 1, byrow = TRUE))
}

Part 5.2 绘图—— ggplot绘图


#开始尝试绘图;
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))
#添加散点;
p1 <- p0+geom_point(size=1.2)+guides(color="none")
p1


#自定义半透明颜色;
mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")
#颜色对应数据分组,可自行修改

p2 <- p1 + scale_colour_manual(name="",values=alpha(mycolor,0.7))
p2


#添加辅助线;
p3 <- p2+geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")
p3


#调整横轴和纵轴绘图区域的范围;
#设置y轴范围(上下两端的空白区域设为1),修改刻度标签;
#expansion函数设置坐标轴范围两端空白区域的大小;mult为“倍数”模式,add为“加性”模式;
p4<-p3+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))
p4


#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
#隐藏纵轴,并对字体样式、坐标轴的粗细、颜色、刻度长度进行限定;
mytheme<-theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))
#应用自定义主题;
p5 <- p4+mytheme
p5


#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")

#重新进行绘图;
p6 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")
p6


#自定义半透明颜色;
mycolor <- c("#99CC00","#FF9999","#c77cff","gray90")
p7 <- p6 + scale_colour_manual(name="",values=alpha(mycolor,0.7))+mytheme
p7

#添加辅助线并修改坐标轴范围;
p8 <- p7+geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                   limits = c(-3, 3),
                   breaks = c(-3,-2,-1,0,1,2,3),
                   label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  annotate("text", x = 3, y = 2,
           label =  c('DEPs_DEGs_SameTrend 89', 'DEPs_DEGs_Opposite 72' ), 
           parse = TRUE)
p8


#计算两个组学差异倍数的相关性,并取2位小数;
cor = round(cor(data$log2FC_RNA,data$log2FC_Protein),2)
#准备作为图形的标题;
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")

lab
#[1] "correlation=0.35"
#在图上添加文字标签;
p8+
  geom_text(x = -5., y = 2.5, label = lab, color="black",size = 2)+
  geom_text(x = 4., y = -2.2, label = lab1, color="black",size=2)+
  geom_text(x = 4., y = -2.5, label = lab2, color="black",size=2)

Part 6.1 绘图简易代码——不区分significant和no_significant

mycolor <- c("gray80","#FF0000","#FF0000",
             "#FFA500", "#FFA500","#99CC00",
             "#99CC00","#0020AF","#0020AF")



#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2




p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))+
  geom_point(size=1.2)+guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  geom_hline(yintercept = c(log2(2/3),log2(1.5)),
                    size = 0.5,
                    color = "grey40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "grey40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  theme_bw()+
  theme(text=element_text(family = "sans",colour ="gray30",size = 12),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"))

Part 6 .2 绘图简易代码——区分significant和no_significant

#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
                      data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)

#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")



lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")


#重新进行绘图;
p1 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
  geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
  geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
  guides(color="none")+
  scale_colour_manual(name="",values=alpha(mycolor,0.7))+
  theme_bw()+
  theme(text=element_text(family = "serif",colour ="black",size = 12,face="bold"),
        panel.grid = element_blank(),
        panel.border = element_rect(size = 0.8,colour = "gray30"),
        axis.line = element_blank(),
        axis.ticks = element_line(size = 0.6,colour = "gray30"),
        axis.ticks.length = unit(1.5,units = "mm"),
        plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
                         units="inches"),
        plot.title = element_text(hjust = 0.5,vjust = 0,size=11))+
  geom_hline(yintercept = c(log(2/3),log2(1.5)),
                    size = 0.5,
                    color = "gray40",
                    lty = "dashed")+
  geom_vline(xintercept = c(-1,1),
             size = 0.5,
             color = "gray40",
             lty = "dashed")+
  scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-3, 3),
                     breaks = c(-3,-2,-1,0,1,2,3),
                     label = c("-3","-2","-1","0","1","2","3"))+
  scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
                     limits = c(-7, 7),
                     breaks = c(-5,-2.5,0,2.5,5),
                     label = c("-5","-2.5","0","2.5","5"))+
  geom_text(x = -4., y = 2.5, label = lab,  color="#000000",size =3.5,fontface="plain")+
  geom_text(x = 4., y = -2.2, label = lab1, color="#000000",size=3.5,fontface="plain")+
  geom_text(x = 4., y = -2.4, label = lab2, color="#000000",size=3.5,fontface="plain")+
  labs(title = "Correlation of Proteome and Transcriptome")

p1  





©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 214,444评论 6 496
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 91,421评论 3 389
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 160,036评论 0 349
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,363评论 1 288
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,460评论 6 386
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,502评论 1 292
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,511评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,280评论 0 270
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,736评论 1 307
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,014评论 2 328
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,190评论 1 342
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,848评论 5 338
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,531评论 3 322
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,159评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,411评论 1 268
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,067评论 2 365
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,078评论 2 352

推荐阅读更多精彩内容