Part1. 数据及包的载入
rm(list = ls())
#载入相关的R包;
library(dplyr)
library(ggplot2)
library(ggrepel)
#读入两个差异分析表(分别是转录组和翻译组)并合并;
#设置工作目录;
setwd("E:/R/demo/九象限图")
#查看工作目录中的文件;
dir()
#[1] "protein_exp.csv" "RNA_exp.csv"
#读入数据;
RNA =read.csv("RNA_exp.csv",header=T)
protein=read.csv("protein_exp.csv",header=T)
#查看数据的维度;
dim(RNA)
head(RNA)
dim(protein)
head(protein)
Part2. 两类组学数据的整合
#通常组学的整合取交集
#合并两个表格。选择表头为"id"的列进行合并(取交集);
#suffixes:如果两个表列名相同,则会在列名后加入suffixes(后缀)参数中对应的后缀;
#all.x=FALSE,all.y=FALSE,表示输出的是x,y表格的交集;
combine= merge(RNA,protein,
by.x="id",
by.y="id",
suffixes = c("_RNA","_Protein") ,
all.x=FALSE,
all.y=FALSE)
#查看合并后表格的维度;
dim(combine)
#[1] 3619 7
#保存合并后的数据;
Part3. 数据分组及相关性分析
#提取用于作图的列组成新数据框;
data <- data.frame(combine[c(1,3,4,5,6,7)])
#查看前6行;
head(data)
#对数据进行分组;
#生成显著上下调数据标签;
data$part <- case_when(
# 条件1: DEPs和DEGs都上调
(data$log2FC_RNA >= 1 & data$log2FC_Protein >= log2(1.5) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_DEGs_SameTrend_Up",
# 条件2: DEPs和DEGs都下调
(data$log2FC_RNA <= -1 & data$log2FC_Protein <= log2(2/3) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_DEGs_SameTrend_Down",
# 条件3: DEPs上调,DEGs下调
(data$log2FC_RNA <= -1 & data$log2FC_Protein >= log2(1.5) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_Up_DEGs_Down",
# 条件4: DEPs下调,DEGs上调
(data$log2FC_RNA >= 1 & data$log2FC_Protein <= log2(2/3) &
data$Qvalue_RNA <= 0.05 & data$Qvalue_Protein <= 0.05) ~
"DEPs_Down_DEGs_Up",
# 条件5: only_DEG_Up
(data$Qvalue_RNA <= 0.05 & data$log2FC_RNA >= 1 &
(log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) |
data$Qvalue_Protein > 0.05)) ~
"only_DEG_Up",
# 条件6: only_DEG_Down
(data$Qvalue_RNA <= 0.05 & data$log2FC_RNA <= -1 &
(log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5) |
data$Qvalue_Protein > 0.05)) ~
"only_DEG_Down",
# 条件7: only_DEP_Up
(data$Qvalue_Protein <= 0.05 & data$log2FC_Protein >= log2(1.5) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
"only_DEP_Up",
# 条件8: only_DEP_Down
(data$Qvalue_Protein <= 0.05 & data$log2FC_Protein <= log2(2/3) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA > 0.05)) ~
"only_DEP_Down",
# 条件9: BothNone
((log2(2/3) < data$log2FC_Protein & data$log2FC_Protein < log2(1.5)) |
data$Qvalue_Protein >= 0.05) &
(-1 < data$log2FC_RNA & data$log2FC_RNA < 1 | data$Qvalue_RNA >= 0.05) ~
"BothNone",
#TRUE ~ "Other" # 默认情况
)
head(data)
data %>% count(part) #统计分组情况
Part 4. 提取满足条件的数据并进行相关性分析
# 提取满足条件的数据
filtered_data_same_trend <- data[data$part == "DEPs_DEGs_SameTrend_Up" |
data$part == "DEPs_DEGs_SameTrend_Down", ]
filtered_data_cor_opposite<- data[data$part == " DEPs_Down_DEGs_Up" |
data$part == "DEPs_Up_DEGs_Down", ]
# 查看提取的数据
head(filtered_data_same_trend)
head(filtered_data_cor_opposite)
#### 计算数据相关性 ####
cor_same_trend = round(cor(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein),2)
cor_opposite = round(cor(filtered_data_cor_opposite$log2FC_RNA,filtered_data_cor_opposite$log2FC_Protein),2)
correlation_test <- cor.test(filtered_data_same_trend$log2FC_RNA,filtered_data_same_trend$log2FC_Protein, method = "pearson")
cor_same_trend
cor_opposite
Part 5.1 绘图—— 直角坐标系
#### 3.1 绘制直角坐标系绘图 ####
{
# 定义绘制坐标轴函数:
draw_axis_line <- function(length_x, length_y,
tick_step = NULL, lab_step = NULL){
axis_x_begin <- -1*length_x
axis_x_end <- length_x
axis_y_begin <- -1*length_y
axis_y_end <- length_y
if (missing(tick_step))
tick_step <- 1
if (is.null(lab_step))
lab_step <- 2
# axis ticks data
tick_x_frame <- data.frame(ticks = seq(axis_x_begin, axis_x_end,
by = tick_step))
tick_y_frame <- data.frame(ticks = seq(axis_y_begin, axis_y_end,
by = tick_step))
# axis labels data
lab_x_frame <- subset(data.frame(lab = seq(axis_x_begin, axis_x_end,
by = lab_step), zero = 0),
lab != 0)
lab_y_frame <- subset(data.frame(lab = seq(axis_y_begin, axis_y_end,
by = lab_step),zero = 0),
lab != 0)
# set tick length
tick_x_length = 0.05
tick_y_length = 0.05
# set zero point
data <- data.frame(x = 0, y = 0)
p <- ggplot(data = data) +
# draw axis line
geom_segment(y = 0, yend = 0,
x = axis_x_begin,
xend = axis_x_end,
size = 0.5) +
geom_segment(x = 0, xend = 0,
y = axis_y_begin,
yend = axis_y_end,
size = 0.5) +
# x ticks
geom_segment(data = tick_x_frame,
aes(x = ticks, xend = ticks,
y = 0, yend = 0 - tick_x_length)) +
# y ticks
geom_segment(data = tick_y_frame,
aes(x = 0, xend = 0 - tick_y_length,
y = ticks, yend = ticks)) +
# labels
geom_text(data=lab_x_frame, aes(x=lab, y=zero, label=lab), vjust = 1.5) +
geom_text(data=lab_y_frame, aes(x=zero, y=lab, label=lab), hjust= 1.5) +
theme_minimal()+
theme(panel.grid = element_blank(),axis.text = element_blank())
return(p)
}
p <- draw_axis_line(8, 4)
p1 <- p + geom_point(data=data, aes(data$log2FC_RNA, data$log2FC_Protein, color = part))+
scale_color_manual(values = c("DEPs_DEGs_SameTrend_Up" = "#dd8653",
"DEPs_DEGs_SameTrend_Down" = "#dd8653",
"DEPs_Up_DEGs_Down" = "#59a5d7",
"DEPs_Up_DEGs_Up" = "#59a5d7",
"only_DEG_Up" = "#aa65a4",
"only_DEG_Down" = "#aa65a4",
"only_DEP_Up" = "#99CC00",
"only_DEP_Down" = "#99CC00",
"BothNone"="#878787"),
values=alpha(0.7),
breaks = c("DEPs_DEGs_SameTrend_Up",
"DEPs_DEGs_SameTrend_Down",
"DEPs_Up_DEGs_Down",
"DEPs_Up_DEGs_Up",
"only_DEG_Up" ,
"only_DEG_Down",
"only_DEP_Up",
"only_DEP_Down",
"BothNone"))+
xlab("mRNA:FC(5mM Selenite/CK)")+
ylab("Protein:FC(5mM Selenite/CK)")+
theme(legend.position = "bottom")+
annotate("text", label = "bolditalic(Brain)", parse = TRUE,
x = -2, y = 2, size = 4, colour = "black")+
guides(color = guide_legend(title = "", ncol = 1, byrow = TRUE))
}
Part 5.2 绘图—— ggplot绘图
#开始尝试绘图;
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))
#添加散点;
p1 <- p0+geom_point(size=1.2)+guides(color="none")
p1
#自定义半透明颜色;
mycolor <- c("gray80","#FF0000","#FF0000",
"#FFA500", "#FFA500","#99CC00",
"#99CC00","#0020AF","#0020AF")
#颜色对应数据分组,可自行修改
p2 <- p1 + scale_colour_manual(name="",values=alpha(mycolor,0.7))
p2
#添加辅助线;
p3 <- p2+geom_hline(yintercept = c(log2(2/3),log2(1.5)),
size = 0.5,
color = "grey40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "grey40",
lty = "dashed")
p3
#调整横轴和纵轴绘图区域的范围;
#设置y轴范围(上下两端的空白区域设为1),修改刻度标签;
#expansion函数设置坐标轴范围两端空白区域的大小;mult为“倍数”模式,add为“加性”模式;
p4<-p3+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))
p4
#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
#隐藏纵轴,并对字体样式、坐标轴的粗细、颜色、刻度长度进行限定;
mytheme<-theme_bw()+
theme(text=element_text(family = "sans",colour ="gray30",size = 12),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"))
#应用自定义主题;
p5 <- p4+mytheme
p5
#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)
#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")
#重新进行绘图;
p6 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
guides(color="none")
p6
#自定义半透明颜色;
mycolor <- c("#99CC00","#FF9999","#c77cff","gray90")
p7 <- p6 + scale_colour_manual(name="",values=alpha(mycolor,0.7))+mytheme
p7
#添加辅助线并修改坐标轴范围;
p8 <- p7+geom_hline(yintercept = c(log(2/3),log2(1.5)),
size = 0.5,
color = "gray40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "gray40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
annotate("text", x = 3, y = 2,
label = c('DEPs_DEGs_SameTrend 89', 'DEPs_DEGs_Opposite 72' ),
parse = TRUE)
p8
#计算两个组学差异倍数的相关性,并取2位小数;
cor = round(cor(data$log2FC_RNA,data$log2FC_Protein),2)
#准备作为图形的标题;
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")
lab
#[1] "correlation=0.35"
#在图上添加文字标签;
p8+
geom_text(x = -5., y = 2.5, label = lab, color="black",size = 2)+
geom_text(x = 4., y = -2.2, label = lab1, color="black",size=2)+
geom_text(x = 4., y = -2.5, label = lab2, color="black",size=2)
Part 6.1 绘图简易代码——不区分significant和no_significant
mycolor <- c("gray80","#FF0000","#FF0000",
"#FFA500", "#FFA500","#99CC00",
"#99CC00","#0020AF","#0020AF")
#自定义图表主题,对图表做精细调整;
top.mar=0.2
right.mar=0.2
bottom.mar=0.2
left.mar=0.2
p0 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein,color=part))+
geom_point(size=1.2)+guides(color="none")+
scale_colour_manual(name="",values=alpha(mycolor,0.7))+
geom_hline(yintercept = c(log2(2/3),log2(1.5)),
size = 0.5,
color = "grey40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "grey40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
theme_bw()+
theme(text=element_text(family = "sans",colour ="gray30",size = 12),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"))
Part 6 .2 绘图简易代码——区分significant和no_significant
#如果考虑差异的显著性,则需要进一步分组;
#生成至少在一个组学显著上下调的数据标签;
data$sig <- case_when(data$Qvalue_RNA < 0.05 & data$Qvalue_Protein <0.05 ~ "sig",
data$Qvalue_RNA >= 0.05 | data$Qvalue_Protein >=0.05 ~ "no")
head(data)
#将作图数据表格拆分成显著和不显著两部分;
sig <- filter(data,sig == "sig")
non <- filter(data,sig == "no")
lab = paste("Correlation of Same Trend:",cor_same_trend,sep="","***")
lab1= paste("SameTrend: 89")
lab2= paste("Opposite: 72")
#重新进行绘图;
p1 <-ggplot(data,aes(log2FC_RNA,log2FC_Protein))+
geom_point(data=non,aes(log2FC_RNA,log2FC_Protein),size=1.2,color="gray90")+
geom_point(data=sig,aes(log2FC_RNA,log2FC_Protein,color=part),size=1.5)+
guides(color="none")+
scale_colour_manual(name="",values=alpha(mycolor,0.7))+
theme_bw()+
theme(text=element_text(family = "serif",colour ="black",size = 12,face="bold"),
panel.grid = element_blank(),
panel.border = element_rect(size = 0.8,colour = "gray30"),
axis.line = element_blank(),
axis.ticks = element_line(size = 0.6,colour = "gray30"),
axis.ticks.length = unit(1.5,units = "mm"),
plot.margin=unit(x=c(top.mar,right.mar,bottom.mar,left.mar),
units="inches"),
plot.title = element_text(hjust = 0.5,vjust = 0,size=11))+
geom_hline(yintercept = c(log(2/3),log2(1.5)),
size = 0.5,
color = "gray40",
lty = "dashed")+
geom_vline(xintercept = c(-1,1),
size = 0.5,
color = "gray40",
lty = "dashed")+
scale_y_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-3, 3),
breaks = c(-3,-2,-1,0,1,2,3),
label = c("-3","-2","-1","0","1","2","3"))+
scale_x_continuous(expand=expansion(add = c(0.5, 0.5)),
limits = c(-7, 7),
breaks = c(-5,-2.5,0,2.5,5),
label = c("-5","-2.5","0","2.5","5"))+
geom_text(x = -4., y = 2.5, label = lab, color="#000000",size =3.5,fontface="plain")+
geom_text(x = 4., y = -2.2, label = lab1, color="#000000",size=3.5,fontface="plain")+
geom_text(x = 4., y = -2.4, label = lab2, color="#000000",size=3.5,fontface="plain")+
labs(title = "Correlation of Proteome and Transcriptome")
p1