数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。数据排列可视化图形主要有:条形图、雷达图、词云、棒棒糖图和平行坐标系统图以及循环条形图。
条形图 Barplot
library(ggplot2)
library(dplyr)
data <- iris %>% select(Species, Sepal.Length) %>%
group_by(Species) %>%
summarise(
n=n(),
mean=mean(Sepal.Length),
sd=sd(Sepal.Length)
) %>%
mutate( se=sd/sqrt(n)) %>%
mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
ggplot(data)+
geom_bar(aes(x=Species, y=mean),
stat="identity", fill="skyblue", alpha=0.7)+
geom_errorbar(aes(x=Species, ymin=mean-sd, ymax=mean+sd),
width=0.4, colour="orange", alpha=0.9, size=1.3)+
# geom_errorbar(aes(x=Species, ymin=mean-ic, ymax=mean+ic),
# width=0.4, colour="orange", alpha=0.9, size=1.5)+
# geom_crossbar(aes(x=Species, y=mean, ymin=mean-sd, ymax=mean+sd),
# width=0.4, colour="orange", alpha=0.9, size=1.3)+
geom_pointrange(aes(x=Species, y=mean, ymin=mean-sd, ymax=mean+sd),
colour="orange", alpha=0.9, size=1.3)+
scale_y_continuous(expand = c(0, 0),
limits = c(0, 8))+
labs(x="",y="")+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
- 根据大小控制条形图宽度
library(ggplot2)
data <- data.frame(
group=c("A ","B ","C ","D ") ,
value=c(33,62,56,67) ,
number_of_obs=c(100,500,459,342)
)
data$right <- cumsum(data$number_of_obs) + 30*c(0:(nrow(data)-1))
data$left <- data$right - data$number_of_obs
ggplot(data, aes(ymin = 0))+
geom_rect(aes(xmin = left,
xmax = right,
ymax = value,
color = group,
fill = group))+
xlab("number of obs")+
ylab("value")+
scale_y_continuous(expand = c(0, 0),
limits = c(0, 81))+
theme_bw()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
legend.position = 'right',
legend.key.height = unit(0.6,'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
雷达图 radar chart
library(fmsb)
set.seed(99)
data <- as.data.frame(matrix( sample( 0:20 , 15 , replace=F) , ncol=5))
colnames(data) <- c("math" , "english" , "biology" , "music" , "R-coding" )
rownames(data) <- paste("mister" , letters[1:3] , sep="-")
data <- rbind(rep(20,5) , rep(0,5) , data)
colors_border <- c(rgb(0.2,0.5,0.5,0.9),
rgb(0.8,0.2,0.5,0.9),
rgb(0.7,0.5,0.1,0.9))
colors_in <- c(rgb(0.2,0.5,0.5,0.4),
rgb(0.8,0.2,0.5,0.4),
rgb(0.7,0.5,0.1,0.4) )
radarchart(data, axistype=1,
pcol=colors_border, pfcol=colors_in, plwd=4, plty=1,
cglcol="grey", cglty=1, axislabcol="grey", caxislabels=seq(0,20,5), cglwd=0.8,
vlcex=0.8)
legend(x=1.2, y=1.2, legend=rownames(data[-c(1,2),]),
bty = "n", pch=20 , col=colors_in ,
text.col = "grey", cex=1.2, pt.cex=3)
词云 wordcloud
library(wordcloud2)
wordcloud2(demoFreq, size = 2.3,
minRotation = -pi/6,
maxRotation = -pi/6,
rotateRatio = 1)
平行坐标系统 Parallel Coordinates chart
library(hrbrthemes)
library(GGally)
library(viridis)
data <- iris
p1 <- ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="globalminmax",
showPoints = TRUE,
title = "No scaling",
alphaLines = 0.3)+
scale_color_viridis(discrete=TRUE)+
theme_ipsum()+
theme(legend.position="none",
plot.title = element_text(size=13))+
xlab("")
p2 <- ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="uniminmax",
showPoints = TRUE,
title = "Standardize to Min = 0 and Max = 1",
alphaLines = 0.3)+
scale_color_viridis(discrete=TRUE)+
theme_ipsum()+
theme(legend.position="none",
plot.title = element_text(size=13))+
xlab("")
p3 <- ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="std",
showPoints = TRUE,
title = "Normalize univariately (substract mean & divide by sd)",
alphaLines = 0.3)+
scale_color_viridis(discrete=TRUE)+
theme_ipsum()+
theme(legend.position="none",
plot.title = element_text(size=13))+
xlab("")
p4 <- ggparcoord(data,
columns = 1:4, groupColumn = 5, order = "anyClass",
scale="center",
showPoints = TRUE,
title = "Standardize and center variables",
alphaLines = 0.3)+
scale_color_manual(values=c( "#69b3a2", "#E8E8E8", "#E8E8E8"))+
theme_ipsum()+
theme(legend.position="none",
plot.title = element_text(size=13))+
xlab("")
cowplot::plot_grid(p1, p2, p3, p4, ncol = 2, align = "hv", labels = LETTERS[1:4])
棒棒糖图 Lollipop plot
library(ggplot2)
data <- data.frame(
x=LETTERS[1:26],
y=abs(rnorm(26))) %>%
arrange(y) %>%
mutate(x=factor(x, x))
p1 <- ggplot(data, aes(x=x, y=y))+
geom_segment(aes(x=x, xend=x, y=1, yend=y), color="grey")+
geom_point(color="orange", size=4)+
xlab("") +
ylab("Value of Y")+
theme_light()+
theme(axis.title = element_text(face = 'bold',color = 'black',size = 14),
axis.text = element_text(color = 'black',size = 10),
text = element_text(size = 8, color = "black", family="serif"),
panel.grid.major.x = element_blank(),
panel.border = element_blank(),
axis.ticks.x = element_blank(),
legend.position = 'right',
legend.key.height = unit(0.6, 'cm'),
legend.text = element_text(face = "bold", color = 'black',size = 10),
strip.text = element_text(face = "bold", size = 14))
p2 <- ggplot(data, aes(x=x, y=y))+
geom_segment(aes(x=x, xend=x, y=0, yend=y),
color=ifelse(data$x %in% c("A", "D"), "blue", "red"),
size=ifelse(data$x %in% c("A", "D"), 1.3, 0.7) ) +
geom_point(color=ifelse(data$x %in% c("A", "D"), "blue", "red"),
size=ifelse(data$x %in% c("A","D"), 5, 2))+
annotate("text", x=grep("D", data$x),
y=data$y[which(data$x=="D")]*1.2,
label="Group D is very impressive",
color="orange", size=4 , angle=0, fontface="bold", hjust=0)+
annotate("text", x = grep("A", data$x),
y = data$y[which(data$x=="A")]*1.2,
label = paste("Group A is not too bad\n (val=",
data$y[which(data$x=="A")] %>% round(2),")",sep=""),
color="orange", size=4 , angle=0, fontface="bold", hjust=0)+
theme_ipsum()+
coord_flip()+
theme(legend.position="none")+
xlab("")+
ylab("Value of Y")+
ggtitle("How did groups A and D perform?")
cowplot::plot_grid(p1, p2, ncol = 2, align = "h", labels = LETTERS[1:4])
循环条形图 circular barplot
library(tidyverse)
data <- data.frame(
individual=paste("Mister ", seq(1,60), sep=""),
group=c(rep('A', 10), rep('B', 30), rep('C', 14), rep('D', 6)) ,
value=sample( seq(10,100), 60, replace=T)) %>%
mutate(group=factor(group))
# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 3
to_add <- data.frame(matrix(NA, empty_bar*nlevels(data$group), ncol(data)))
colnames(to_add) <- colnames(data)
to_add$group <- rep(levels(data$group), each=empty_bar)
data <- rbind(data, to_add)
data <- data %>% arrange(group)
data$id <- seq(1, nrow(data))
# Get the name and the y position of each label
label_data <- data
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)
# prepare a data frame for base lines
base_data <- data %>%
group_by(group) %>%
summarize(start=min(id), end=max(id) - empty_bar) %>%
rowwise() %>%
mutate(title=mean(c(start, end)))
# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1, ]
# Make the plot
p <- ggplot(data, aes(x=as.factor(id), y=value, fill=group))+
geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5)+
# Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
geom_segment(data=grid_data,
aes(x = end, y = 80, xend = start, yend = 80),
colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE )+
geom_segment(data=grid_data,
aes(x = end, y = 60, xend = start, yend = 60),
colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE )+
geom_segment(data=grid_data,
aes(x = end, y = 40, xend = start, yend = 40),
colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE )+
geom_segment(data=grid_data,
aes(x = end, y = 20, xend = start, yend = 20),
colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE )+
# Add text showing the value of each 100/75/50/25 lines
annotate("text",
x = rep(max(data$id),4),
y = c(20, 40, 60, 80),
label = c("20", "40", "60", "80"),
color="grey", size=3, angle=0, fontface="bold", hjust=1) +
geom_bar(aes(x=as.factor(id), y=value, fill=group), stat="identity", alpha=0.5)+
ylim(-100,120)+
theme_minimal()+
theme(legend.position = "none",
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
plot.margin = unit(rep(-1,4), "cm"))+
coord_polar()+
geom_text(data=label_data,
aes(x=id, y=value+10, label=individual, hjust=hjust),
color="black", fontface="bold",alpha=0.6, size=2.5,
angle= label_data$angle, inherit.aes = FALSE )+
# Add base line information
geom_segment(data=base_data,
aes(x = start, y = -5, xend = end, yend = -5),
colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE ) +
geom_text(data=base_data,
aes(x = title, y = -18, label=group),
hjust=c(1,1,0,0), colour = "black",
alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
p