在找资料的时候偶然发现的这个图,这个图来源于论文
Russian periphery is dying in movement: a cohort assessment of internal youth migration in Central Russia
。论文链接是 https://link.springer.com/article/10.1007%2Fs10708-018-9953-5,数据和代码存储的链接 https://gist.github.com/ikashnitsky/2f3e2b2af6f50911bb775bbce6eb0fb8
https://ikashnitsky.github.io/2019/dotplot/
感觉这个图很漂亮,数据代码还是公开的,所以我们来重复一下
这个图横坐标是 变化率,纵坐标是地区,每一个纵坐标对应的是两个 1980-84 1988-92 变量,其中每一个对应的是实心点和空心点,census和stat record
这个图目前还想不到如何应用于我自己的数据,可以用来表示比较摸某一个数值,比如处理和对照
前面整理数据的代码这里就不介绍了,大家感兴趣可以自己运行试试,研究一下每行代码的作用
library(tidyverse)
df<-read.csv("20210822_raw.csv")
head(df)
# relevel regions ascending
df_plot <- df %>%
select(cohort, region, change_cens) %>%
spread(cohort, change_cens) %>%
arrange(`Cohort 1988-1992`) %>%
mutate(
region = region %>%
as_factor %>%
fct_relevel("CFD TOTAL", after = 0)
) %>%
arrange(region) %>%
gather("cohort", "value", 2:3) %>%
left_join(df, by = c("region", "cohort"))
df_plot %>%
# calculate y positioning values
mutate(region = region %>% as_factor,
y = region %>% as.numeric,
adjust = ifelse(cohort=="Cohort 1988-1992", .15, -.15),
ypos = y - adjust) %>%
write.csv(file="20210822.csv",quote = F,row.names = F)
最终用到的画图数据
df_plot_1<-read.csv("20210822.csv")
head(df_plot_1)
画图代码
library(ggplot2)
library(tidyverse)
library(extrafont)
df_plot_1 %>% pull(region) %>% unique() -> labels
breaks<-1:length(labels)
breaks
pal <- c("#8C510A", "#003C30")
df_plot_1 %>%
ggplot(aes(color = cohort, y = ypos))+
geom_vline(xintercept = 0, size = 2,
alpha = .5, color = "grey50")+
geom_segment(aes(x = change_cens,
xend = change_rolling,
yend = ypos))+
geom_point(aes(x = change_cens),
shape = 16, size = 2)+
geom_point(aes(x = change_rolling),
shape = 21, size = 2,
fill = "white")+
scale_color_manual(values = pal)+
scale_y_continuous(breaks = breaks,
labels = labels,
expand = c(.01, .01))+
theme_minimal(base_family = "Times New Roman",
base_size = 12)+
theme(legend.position = "none",
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(size = 4, color = "grey95"),
axis.text.y = element_text(vjust = .3, size = 12))+
labs(x = "Change in cohort size, 2003-2010, %", y = NULL)
相比于原始代码 这里我对字体进行了修改,因为原始代码中用到了hrbrthemes
这个主题包,涉及到字体的地方我一直没有搞明白
接下来是图例
他这里采用的办法是使用annotate()
函数手动添加
font_rc <- "Times New Roman"
p1+
annotate("rect", xmin = 29, xmax = 63,
ymin = 2.5, ymax = 9.5,
color = "grey50", fill = "white")+
annotate("text", x = 45, y = 8.5,
label = "LEGEND",
size = 5, hjust = .5,
family = font_rc, color = "grey20")+
annotate("text", x = 45, y = 7,
label = "Change in cohort size by",
size = 4.5, hjust = .5,
family = font_rc, color = "grey20")+
annotate("point", x = c(32.5, 47.5), y = 6,
pch = c(16, 21), size = 2, color = 1)+
annotate("text", x = c(35, 50), y = 6,
label = c("census", "stat record"),
size = 4.5, hjust = 0,
family = font_rc, color = "grey20")+
annotate("text", x = 45, y = 4.5,
label = "Cohorts born in",
size = 4.5, hjust = .5,
family = font_rc, color = "grey20")+
annotate("segment", x = c(32, 47), xend = c(34, 49),
y = 3.5, yend = 3.5,
pch = c(16, 21), size = 2, color = pal)+
annotate("text", x = c(35, 50), y = 3.5,
label = c("1980-84", "1988-92"),
size = 4.5, hjust = 0,
family = font_rc, color = "grey20")
最终结果
示例数据和代码可以在今天次条推文的留言区获取,次条推文是广告
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!