今天推文重复的图来自于 论文
Whole-genome resequencing of 445 Lactuca accessions reveals the domestication history of cultivated lettuce
今天试着重复的图片对应着的是论文附件中的Figure8c,基因结构图,论文中文字部分对图的描述是 Gene structure of Lsat_6X11620. Closed bars represent exons, and open bars represent untranslated regions and introns. The positions of the SNPs in the promoter region are indicated by black triangles. An highly associated SNP, A-to-G transition at Chr. 6:15,542,968 is represented by a red triangle.
首先是准备数据
表示整个基因的矩形数据
gene1<-data.frame(
xmin=15000,
xmax=16000,
ymin=1,
ymax=2
)
外显子的数据
exon<-
data.frame(
xmin=c(15100,15300,15700),
xmax=c(15200,15600,15900),
ymin=1,
ymax=2,
label=paste0("exon_",1:3)
)
基因上下游的线段的数据
df<-data.frame(
x=14500,
xend=16500,
y=1.5,
yend=1.5
)
snp的位置数据
df1<-
data.frame(
x=c(14510,14530,14560,14590),
y=1.5
)
df2<-
data.frame(
x=c(14520),
y=1.5
)
画图代码
library(ggplot2)
library(ggfittext)
ggplot()+
geom_segment(data=df,aes(
x=x,xend=xend,y=y,yend=yend
))+
geom_rect(data=gene1,aes(xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax),
fill="white",color="black")+
geom_rect(data=exon,aes(xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax),
color="black")+
geom_fit_text(data=exon,aes(xmin=xmin,
xmax=xmax,
ymin=ymin,
ymax=ymax,
label=label),
contrast = TRUE)+
geom_point(data=df1,aes(x=x,y=y-0.05),
shape=17)+
geom_point(data = df2,aes(x=x,y=y+0.05),
shape=25,fill="red",color="red")+
theme_minimal()+
theme(aspect.ratio = 0.2,
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.line.x=element_line(),
axis.ticks.x=element_line())+
scale_x_continuous(labels = c("1.45",
"1.50",
"1.55",
"1.60",
"1.65"))+
labs(x="Chromosome 6 (MB)")
最终结果如下
这个地方新遇到了一个R包是ggfittext
,github对应的链接是 https://github.com/wilkox/ggfittext
这个包的主要作用是可以让指定区域内的文字自动适应区域的大小,背景等,还可以根据区域自动换行等等,后面争取出一期推文专门介绍这个包
另外之前的推文遇到了一个问题是ggplot2添加文本标签的时候如何让文字居左或者居右显示,有人留言说是hjust参数,我原来一直以为这个参数是让文本左右移动,原来hjust设置为0.5,就是居中,设置为1就是居右,设置为0就是居左
本篇推文的完整代码
library(ggplot2)
library(ggfittext)
gene1 <- data.frame(
xmin = 15000,
xmax = 16000,
ymin = 1,
ymax = 2
)
exon <-
data.frame(
xmin = c(15100, 15300, 15700),
xmax = c(15200, 15600, 15900),
ymin = 1,
ymax = 2,
label = paste0("exon_", 1:3)
)
df <- data.frame(
x = 14500,
xend = 16500,
y = 1.5,
yend = 1.5
)
df1 <-
data.frame(
x = c(14510, 14530, 14560, 14590),
y = 1.5
)
df2 <-
data.frame(
x = c(14520),
y = 1.5
)
ggplot() +
geom_segment(data = df, aes(
x = x, xend = xend, y = y, yend = yend
)) +
geom_rect(
data = gene1, aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
fill = "white", color = "black"
) +
geom_rect(
data = exon, aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax
),
color = "black"
) +
geom_fit_text(
data = exon, aes(
xmin = xmin,
xmax = xmax,
ymin = ymin,
ymax = ymax,
label = label
),
contrast = TRUE
) +
geom_point(
data = df1, aes(x = x, y = y - 0.05),
shape = 17
) +
geom_point(
data = df2, aes(x = x, y = y + 0.05),
shape = 25, fill = "red", color = "red"
) +
theme_minimal() +
theme(
aspect.ratio = 0.2,
panel.grid = element_blank(),
axis.text.y = element_blank(),
axis.title.y = element_blank(),
axis.line.x = element_line(),
axis.ticks.x = element_line()
) +
scale_x_continuous(labels = c(
"1.45",
"1.50",
"1.55",
"1.60",
"1.65"
)) +
labs(x = "Chromosome 6 (MB)")
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!