这个系列,我们陆续的学习一些散点图的技巧,其实像前面的火山图,PCA图等均是散点图,只不过对象和数据类型不太一样而已。
今天我们学习如何给散点图添加拟合曲线以及置信区间。下面这个图是paper中的一个散点图。
我们还是随机生成了一组测试数据
library(Hmisc)
library(ggplot2)
data <- data.frame(gene1= sort(runif(50, min = 3, max = 7))[c(1:10,sample(11:41),42:50)],
gene2=sort(runif(50, min = 21, max = 26), decreasing = T)[c(1:10,sample(11:41),42:50)])
runif函数是随机生成min-max之间符合均匀分布的随机数。
# 计算相关系数和p值,我们用的是rcorr函数。
res <- rcorr(data$gene1, data$gene2)
其实计算相关系数的包很多,大家随便找个用就成。从中,我们提取p值和R值。
p_value <- res$P[1,2]
cor_value <- round(res$r[1,2], 2)
ggplot(data,aes(gene1, gene2))+
geom_point()+
#theme_bw()+ #4个边框都有
theme_classic()+ #边框没有右上
geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
theme(
panel.grid = element_blank(),
axis.title = element_text(face = "bold.italic"),
plot.title = element_text(hjust = 0.5)
)+
labs(title = paste0("R = ", cor_value, ", p = ", p_value))
注:#geom_smooth函数用来向散点图中添加拟合曲线,当然,这里只是用了lm直线拟合方法,同样可以选择一般线性模型glm、一般加性模型gam和曲线loess等。
这个是改成loess拟合的结果。
ggplot(data,aes(gene1, gene2))+
geom_point()+
#theme_bw()+
theme_classic()+
#geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
geom_smooth(method = "loess", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
theme(
# 去除网格线:
panel.grid = element_blank(),
# 修改坐标轴标签
axis.title = element_text(face = "bold.italic"),
# 标题居中:
plot.title = element_text(hjust = 0.5)
)+
labs(title = paste0("R = ", cor_value, ", p = ", p_value))
如果想像别人paper中,无非就是多几组数据,写个for循环就行。
# 数据新增5列:
# 数据新增5列:
data$gene3 <- sort(runif(50, min = 13, max = 26), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene4 <- sort(runif(50, min = 12, max = 21), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene5 <- sort(runif(50, min = 10, max = 23), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene6<- sort(runif(50, min = 9, max = 19), decreasing = T)[c(1:10,sample(11:41),42:50)]
data$gene7 <- sort(runif(50, min = 11, max = 22), decreasing = T)[c(1:10,sample(11:41),42:50)]
p_list <- list()
for (i in 2:ncol(data)) {
res <- rcorr(data$gene1, data[,i])
p_value <- signif(res$P[1,2], 2)
cor_value <- round(res$r[1,2], 2)
# 每次新建一个绘图数据框:
data_new <- data[,c(1,i)]
colnames(data_new) <- c("gene1", "gene")
head(data_new)
p<-ggplot(data_new,aes(gene1, gene))+
geom_point()+
#theme_bw()+
theme_classic()+
ylab(colnames(data)[i])+
geom_smooth(method = "lm", formula = y ~ x, color = "black", fill = "#b2e7fa",alpha = 0.8)+
#geom_smooth(method = "lm", formula = y ~ x, fill = "#b2e7fa", color = "#00aeef", alpha = 0.8)+
theme(
# 去除网格线:
panel.grid = element_blank(),
# 修改坐标轴标签
axis.title = element_text(face = "bold.italic"),
# 标题居中:
plot.title = element_text(hjust = 0.5)
)+
labs(title = paste0("R = ", cor_value, ", p = ", p_value))
p_list[[i-1]] <- p
}
library(cowplot)
library(patchwork)
p <- plot_grid(p_list[[1]], p_list[[2]], p_list[[3]], p_list[[4]], p_list[[5]], p_list[[6]], ncol = 3)
ggsave("plot.pdf", plot = p, height = 6, width = 9)