一. 示例数据准备
下载:链接:https://pan.baidu.com/s/1tKR943efKOn7-TW_892KLg 提取码:wbk6 (如存放在D盘
)
文件说明
示例数据,其中数据均为虚拟数据,与实际生物学过程无关
文件名:dataset_volcano.txt
列分别为基因 (gene),差异倍数(logFC),t-test的P值(P.Value)
二. 环境需求
Rstudio: 如果系统中没有 Rstudio,先下载安装:https://www.rstudio.com/products/rstudio/download/#download
ggplot2包:
如果没有安装该R包,执行以下代码:
install.packages('ggplot2')
三. 绘制火山图
- 火山图
# 执行前设置====================================
# 清空暂存数据
rm(list=ls())
# 载入R包
library(ggplot2)
# 设置工作目录
setwd("D:/")
# 整理数据集====================================
# 参数'./dataset_volcano.txt',表示载入D://dataset_volcano.txt
dataset <- read.table('resource/dataset_volcano.txt',header = TRUE)
# 设置pvalue和logFC的阈值
cut_off_pvalue = 0.0000001
cut_off_logFC = 1
# 根据阈值分别为上调基因设置‘up’,下调基因设置‘Down’,无差异设置‘Stable’,保存到change列
# 这里的change列用来设置火山图点的颜色
dataset$change = ifelse(dataset$P.Value < cut_off_pvalue & abs(dataset$logFC) >= cut_off_logFC,
ifelse(dataset$logFC> cut_off_logFC ,'Up','Down'),
'Stable')
# 绘制火山图====================================
ggplot(
#设置数据
dataset,
aes(x = logFC,
y = -log10(P.Value),
colour=change)) +
geom_point(alpha=0.4, size=3.5) +
scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
# 辅助线
geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(cut_off_pvalue),lty=4,col="black",lwd=0.8) +
# 坐标轴
labs(x="log2(fold change)",
y="-log10 (p-value)")+
theme_bw()+
# 图例
theme(plot.title = element_text(hjust = 0.5),
legend.position="right",
legend.title = element_blank()
)
- 标记基因的火山图
#先定义火山图图层,代码同前
p<-ggplot( dataset,
aes(x = logFC,
y = -log10(P.Value),
colour=change)) +
geom_point(alpha=0.4, size=3.5) +
scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(cut_off_pvalue),lty=4,col="black",lwd=0.8) +
labs(x="log2(fold change)",
y="-log10 (p-value)")+
theme_bw()+
theme(plot.title = element_text(hjust = 0.5),
legend.position="right",
legend.title = element_blank()
)
#定义显示的标记基因
# 将需要标记的基因放置在label列
# 这里设置logFC值大于5的差异基因来标记
# !!!需要注意的是标记的基因不能太多,Rstudio容易卡死
dataset$label = ifelse(dataset$P.Value < cut_off_pvalue & abs(dataset$logFC) >= 5, as.character(dataset$gene),"")
p+geom_text_repel(data = dataset, aes(x = dataset$logFC,
y = -log10(dataset$P.Value),
label = label),
size = 3,box.padding = unit(0.5, "lines"),
point.padding = unit(0.8, "lines"),
segment.color = "black",
show.legend = FALSE)