数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。数据相关可视化图形主要有:散点图、热图、相关图、气泡图、连线散点图和二维密度图等。
散点图 Scatterplot
library(ggplot2)
library(dplyr)
ggplot(data=mtcars %>% mutate(cyl=factor(cyl)), aes(x=mpg, disp))+
geom_point(aes(color=cyl), size=3)+
geom_rug(col="black", alpha=0.5, size=1)+
geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE)+
geom_text(
label=rownames(mtcars),
nudge_x = 0.25,
nudge_y = 0.25,
check_overlap = T,
label.size = 0.35,
color = "black",
family="serif")+
theme_classic()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
热图 heatmap
library(ComplexHeatmap)
library(circlize)
set.seed(123)
mat <- matrix(rnorm(100), 10)
rownames(mat) <- paste0("R", 1:10)
colnames(mat) <- paste0("C", 1:10)
column_ha <- HeatmapAnnotation(foo1 = runif(10), bar1 = anno_barplot(runif(10)))
row_ha <- rowAnnotation(foo2 = runif(10), bar2 = anno_barplot(runif(10)))
col_fun <- colorRamp2(c(-2, 0, 2), c("green", "white", "red"))
Heatmap(mat,
name = "mat",
column_title = "pre-defined distance method (1 - pearson)",
column_title_side = "bottom",
column_title_gp = gpar(fontsize = 10, fontface = "bold"),
col = col_fun,
clustering_distance_rows = "pearson",
cluster_rows = TRUE,
show_column_dend = FALSE,
row_km = 2,
column_km = 3,
width = unit(6, "cm"),
height = unit(6, "cm"),
top_annotation = column_ha,
right_annotation = row_ha)
相关图 correlogram
library(GGally)
library(ggplot2)
data(flea)
ggpairs(flea, columns = 2:4, aes(colour=species))+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
气泡图 Bubble
library(ggplot2)
library(dplyr)
library(gapminder)
data <- gapminder %>% filter(year=="2007") %>%
dplyr::select(-year)
data %>%
arrange(desc(pop)) %>%
mutate(country = factor(country, country)) %>%
ggplot(aes(x=gdpPercap, y=lifeExp, size=pop, color=continent)) +
geom_point(alpha=0.5) +
scale_size(range = c(.1, 24), name="Population (M)")+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
连线点图 Connected Scatterplot
library(ggplot2)
library(dplyr)
library(babynames)
library(ggrepel)
library(tidyr)
data <- babynames %>%
filter(name %in% c("Ashley", "Amanda")) %>%
filter(sex == "F") %>%
filter(year > 1970) %>%
select(year, name, n) %>%
spread(key = name, value=n, -1)
tmp_date <- data %>% sample_frac(0.3)
data %>%
ggplot(aes(x=Amanda, y=Ashley, label=year)) +
geom_point(color="#69b3a2") +
geom_text_repel(data=tmp_date) +
geom_segment(color="#69b3a2",
aes(
xend=c(tail(Amanda, n=-1), NA),
yend=c(tail(Ashley, n=-1), NA)
),
arrow=arrow(length=unit(0.3,"cm"))
)+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
二维密度图 Density 2d
library(tidyverse)
a <- data.frame( x=rnorm(20000, 10, 1.9), y=rnorm(20000, 10, 1.2) )
b <- data.frame( x=rnorm(20000, 14.5, 1.9), y=rnorm(20000, 14.5, 1.9) )
c <- data.frame( x=rnorm(20000, 9.5, 1.9), y=rnorm(20000, 15.5, 1.9) )
data <- rbind(a, b, c)
pl1 <- ggplot(data, aes(x=x, y=y))+
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE)+
scale_x_continuous(expand = c(0, 0))+
scale_y_continuous(expand = c(0, 0))+
scale_fill_distiller(palette=4, direction=-1)+
theme(legend.position='none')
pl2 <- ggplot(data, aes(x=x, y=y))+
geom_hex(bins = 70) +
scale_fill_continuous(type = "viridis") +
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
cowplot::plot_grid(pl1, pl2, ncol = 2, align = "h", labels = LETTERS[1:2])