了解如何重现著名的gapminder散点图
在本教程中,您将学习如何再现由Hans Rosling开发的世界上最著名的数据可视化之一喜欢的小伙伴可以关注个人公众号R语言数据分析指南在此先行拜谢了!!!
汉斯·罗斯林(Hans Rosling)是国际卫生学教授,也是人类进步的发言人。在过去的几十年中,全球世界贫困已大大减少。过去几十年来地球上的生活得到了改善。散点图描绘了2007年人均收入与预期寿命之间的关系。
我们将通过ggplot2对数据进行可视化
获取数据
通过gapminder软件包获取数据
rm(list=ls())
library(tidyverse)
library(gapminder)
library(ggthemes)
library(countrycode)
library(mapproj)
glimpse(gapminder, width = 50)
首先,使用dplyr对数据进行过滤,由于大洋洲和亚洲大陆之间的可视化没有区别,因此将大洋洲归入亚洲。使用case_when函数,该函数可以在特定条件下创建新变量
gapminder_cleaned <- gapminder %>%
filter(year == "2007") %>%
mutate(
pop2 = pop + 1,
continent = case_when(
continent == "Oceania" ~ "Asia",
TRUE ~ as.character(continent)
) %>% as.factor %>%
fct_relevel("Asia", "Americas", "Europe", "Africa")
)
创建第一个散点图
ggplot(data = gapminder_cleaned, aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(size = pop, color = continent)) +
geom_point(aes(size = pop2), color = "black", shape = 21) +
scale_x_log10(breaks = c(500, 1000, 2000, 4000,
8000, 16000, 32000, 64000)) +
scale_y_continuous(breaks = seq(0, 90, by = 10)) +
scale_color_manual(values = c("#F15772", "#7EEB03",
"#FBE700", "#54D5E9"))
为什么我们向可视化添加了两个geom_point?gapminder可视化中的点带有黑色边框,要将这些添加到geom_points并不容易。因此我们创建第二个geom_point,并将其填充为空shape = 21
。变量pop2比变量pop稍大一点,因此我们通过一个点向另一个点上添加来黑色边框
调整散点图
散点图还远远不够完美。点的大小不够大。可视化的背景是灰色而不是白色,我们需要摆脱图例:
pp <- ggplot(data = gapminder_cleaned,aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(size = pop, color = continent)) +
geom_point(aes(size = pop2), color = "black", shape = 21) +
scale_x_log10(breaks = c(500, 1000, 2000, 4000,
8000, 16000, 32000, 64000)) +
scale_y_continuous(breaks = seq(0, 90, by = 10)) +
scale_color_manual(values = c("#F15772", "#7EEB03",
"#FBE700", "#54D5E9")) +
scale_size_continuous(range = c(1, 30)) +
guides(size = FALSE, color = FALSE) +
labs(x = "Income",y = "Life expectancy") +
theme_minimal()
pp
调整轴样式和可视化网格
(gapminder_plot <- pp+
annotate("text", x = 4000, y = 45, hjust = 0.5,
size = 85, color = "#999999",
label = "2007", alpha = .3,
family = "Helvetica Neue") +
annotate("segment", x = 0, xend = 2014, y = 46.9, yend = 46.9,
color = "#606F7B", linetype = 2, size = .2) +
annotate("segment", x = 2014, xend = 2014, y = 0, yend = 46.9,
color = "#606F7B", linetype = 2, size = .2) +
annotate("text", x = 28200, y = 2,
label = "per person (GDP/capita, PPP$ inflation-adjusted)",
size = 2.8, color = "#999999") +
annotate("text", x = 2304, y = 42, hjust = 0,
size = 3.5,
label = paste0("Nigeria had a life expectancy of\n",
"46.9 years and an annual income of",
"\n$2014 per year per person in 2007")) +
theme(
plot.margin = unit(rep(1, 4), "cm"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(size = 0.2,
color = "#e5e5e5"),
axis.title.y = element_text(margin = margin(r = 15),
size = 11,
family = "Helvetica Neue Light"),
axis.title.x = element_text(margin = margin(t = 15),
size = 11,
family = "Helvetica Neue Light"),
axis.text = element_text(family = "Helvetica Neue Light"),
axis.line = element_line(color = "#999999",
size = 0.2)) +coord_cartesian(ylim = c(4.1, 86)))
coord_cartesian函数可以明确分辨y轴或x轴的极限
获取世界地图的数据
world <- map_data("world") %>%
filter(region != "Antarctica") %>%
mutate(continent = countrycode(sourcevar = region,
origin = "country.name",destination = "continent"),
continent = case_when(continent == "Oceania" ~ "Asia",
TRUE ~ as.character(continent)) %>% as.factor %>%
fct_relevel("Asia", "Americas", "Europe", "Africa")) %>%
drop_na(continent)
glimpse(world, width = 50)
用世界各大洲创建第一张世界地图
ggplot(data = world) + geom_map(map = world,
aes(long, lat, group = group, map_id = region,
fill = continent)) +theme_map() +
coord_map(xlim = c(-180, 180),ylim = c(-200, 200))
改善地图
(continent_map <- ggplot(data = world) +
geom_map(map = world,aes(long, lat, group = group, map_id = region,
fill = continent)) +theme_map() +
coord_map(xlim = c(-180, 180),ylim = c(-200, 200)) +
scale_fill_manual(values = c("#F15772","#7EEB03","#FBE700","#54D5E9")) +
guides(fill = FALSE) +
theme(plot.background = element_rect(color = "#B8C2CC", fill = NA)))
将地图添加到散点图
gapminder_plot +
annotation_custom(grob = ggplotGrob(continent_map),
xmin = log10(800), xmax = log10(650000),ymin = 5, ymax = 42)