数据分析的图形可视化是了解数据分布、波动和相关性等属性必不可少的手段。数据整体和部分组成可视化图形:堆积图、矩形树图和组成图等等。
分组堆积图 grouped stacked barplot
library(ggplot2)
library(viridis)
library(hrbrthemes)
specie <- c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition <- rep(c("normal" , "stress" , "Nitrogen") , 4)
value <- abs(rnorm(12 , 0 , 15))
data <- data.frame(specie,condition,value)
ggplot(data, aes(fill=condition, y=value, x=specie)) +
geom_bar(position="stack", stat="identity") +
scale_fill_viridis(discrete = T) +
ggtitle("Studying 4 species..") +
theme_ipsum() +
xlab("")
矩形树图 Treemap
library(treemap)
group <- c(rep("group-1",4),rep("group-2",2),rep("group-3",3))
subgroup <- paste("subgroup" , c(1,2,3,4,1,2,1,2,3), sep="-")
value <- c(13,5,22,12,11,7,3,1,23)
data <- data.frame(group,subgroup,value)
treemap(data,
index=c("group","subgroup"),
vSize="value",
type="index")
圆圈图 doughhut
library(ggplot2)
data <- data.frame(
category=c("A", "B", "C"),
count=c(10, 60, 30))
data$fraction <- data$count / sum(data$count)
data$ymax <- cumsum(data$fraction)
data$ymin <- c(0, head(data$ymax, n=-1))
data$labelPosition <- (data$ymax + data$ymin) / 2
data$label <- paste0(data$category, "\n value: ", data$count)
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
geom_label( x=3.5, aes(y=labelPosition, label=label), size=6) +
scale_fill_brewer(palette=4) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
theme(legend.position = "none")
饼图 pie
library(ggplot2)
library(dplyr)
data <- data.frame(
group=LETTERS[1:5],
value=c(13,7,9,21,2))
data <- data %>%
arrange(desc(group)) %>%
mutate(prop = value / sum(data$value) *100) %>%
mutate(ypos = cumsum(prop)- 0.5*prop )
ggplot(data, aes(x="", y=prop, fill=group)) +
geom_bar(stat="identity", width=1, color="white") +
coord_polar("y", start=0) +
theme_void() +
theme(legend.position="none") +
geom_text(aes(y = ypos, label = group), color = "white", size=6) +
scale_fill_brewer(palette="Set1")
系统树图 dendrogram
library(ggraph)
library(igraph)
library(tidyverse)
theme_set(theme_void())
d1 <- data.frame(from="origin", to=paste("group", seq(1,7), sep=""))
d2 <- data.frame(from=rep(d1$to, each=7), to=paste("subgroup", seq(1,49), sep="_"))
edges <- rbind(d1, d2)
name <- unique(c(as.character(edges$from), as.character(edges$to)))
vertices <- data.frame(
name=name,
group=c( rep(NA,8) , rep( paste("group", seq(1,7), sep=""), each=7)),
cluster=sample(letters[1:4], length(name), replace=T),
value=sample(seq(10,30), length(name), replace=T))
mygraph <- graph_from_data_frame( edges, vertices=vertices)
ggraph(mygraph, layout = 'dendrogram') +
geom_edge_diagonal() +
geom_node_text(aes( label=name, filter=leaf, color=group) , angle=90 , hjust=1, nudge_y=-0.1) +
geom_node_point(aes(filter=leaf, size=value, color=group) , alpha=0.6) +
ylim(-.6, NA) +
theme(legend.position="none")
sample <- paste(rep("sample_",24) , seq(1,24) , sep="")
specie <- c(rep("dicoccoides" , 8) , rep("dicoccum" , 8) , rep("durum" , 8))
treatment <- rep(c(rep("High",4 ) , rep("Low",4)),3)
data <- data.frame(sample,specie,treatment)
for (i in seq(1:5)){
gene=sample(c(1:40) , 24 )
data=cbind(data , gene)
colnames(data)[ncol(data)]=paste("gene_",i,sep="")
}
data[data$treatment=="High" , c(4:8)]=data[data$treatment=="High" , c(4:8)]+100
data[data$specie=="durum" , c(4:8)]=data[data$specie=="durum" , c(4:8)]-30
rownames(data) <- data[,1]
dist <- dist(data[ , c(4:8)] , diag=TRUE)
hc <- hclust(dist)
dhc <- as.dendrogram(hc)
specific_leaf <- dhc[[1]][[1]][[1]]
i=0
colLab<<-function(n){
if(is.leaf(n)){
a=attributes(n)
ligne=match(attributes(n)$label,data[,1])
treatment=data[ligne,3];
if(treatment=="Low"){col_treatment="blue"};if(treatment=="High"){col_treatment="red"}
specie=data[ligne,2];
if(specie=="dicoccoides"){col_specie="red"};if(specie=="dicoccum"){col_specie="Darkgreen"};if(specie=="durum"){col_specie="blue"}
attr(n,"nodePar")<-c(a$nodePar,list(cex=1.5,lab.cex=1,pch=20,col=col_treatment,lab.col=col_specie,lab.font=1,lab.cex=1))
}
return(n)
}
dL <- dendrapply(dhc, colLab)
plot(dL , main="structure of the population")
legend("topright",
legend = c("High Nitrogen" , "Low Nitrogen" , "Durum" , "Dicoccoides" , "Dicoccum"),
col = c("red", "blue" , "blue" , "red" , "Darkgreen"),
pch = c(20,20,4,4,4), bty = "n", pt.cex = 1.5, cex = 0.8 ,
text.col = "black", horiz = FALSE, inset = c(0, 0.1))
library(dendextend)
d1 <- USArrests %>% dist() %>% hclust( method="average" ) %>% as.dendrogram()
d2 <- USArrests %>% dist() %>% hclust( method="complete" ) %>% as.dendrogram()
dl <- dendlist(
d1 %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_lty", 1) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3),
d2 %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_lty", 1) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3)
)
tanglegram(dl,
common_subtrees_color_lines = FALSE, highlight_distinct_edges = TRUE, highlight_branches_lwd=FALSE,
margin_inner=7,
lwd=2)
library(dendextend)
library(tidyverse)
dend <- mtcars %>%
select(mpg, cyl, disp) %>%
dist() %>%
hclust() %>%
as.dendrogram()
my_colors <- ifelse(mtcars$am==0, "forestgreen", "green")
par(mar=c(9,1,1,1))
dend %>%
set("labels_col", value = c("skyblue", "orange", "grey"), k=3) %>%
set("branches_k_color", value = c("skyblue", "orange", "grey"), k = 3) %>%
set("leaves_pch", 19) %>%
set("nodes_cex", 0.7) %>%
plot(axes=FALSE)
rect.dendrogram( dend, k=3, lty = 5, lwd = 0, x=1, col=rgb(0.1, 0.2, 0.4, 0.1) )
colored_bars(colors = my_colors, dend = dend, rowLabels = "am")
library(ggraph)
library(igraph)
library(tidyverse)
library(RColorBrewer)
d1 <- data.frame(from="origin", to=paste("group", seq(1,10), sep=""))
d2 <- data.frame(from=rep(d1$to, each=10), to=paste("subgroup", seq(1,100), sep="_"))
edges <- rbind(d1, d2)
vertices <- data.frame(
name = unique(c(as.character(edges$from), as.character(edges$to))) ,
value = runif(111))
vertices$group <- edges$from[ match( vertices$name, edges$to ) ]
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, edges$from) ))
nleaves <- length(myleaves)
vertices$id[myleaves] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
vertices$hjust <- ifelse( vertices$angle < -90, 1, 0)
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( edges, vertices=vertices )
# Make the plot
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_edge_diagonal(colour="grey") +
scale_edge_colour_distiller(palette = "RdPu") +
geom_node_text(aes(x = x*1.15, y=y*1.15, filter = leaf, label=name, angle = angle, hjust=hjust, colour=group), size=2.7, alpha=1) +
geom_node_point(aes(filter = leaf, x = x*1.07, y=y*1.07, colour=group, size=value, alpha=0.2)) +
scale_colour_manual(values= rep( brewer.pal(9,"Paired") , 30)) +
scale_size_continuous( range = c(0.1,10) ) +
theme_void() +
theme(
legend.position="none",
plot.margin=unit(c(0,0,0,0),"cm"),
) +
expand_limits(x = c(-1.3, 1.3), y = c(-1.3, 1.3))
圆形图 Circular packing
library(ggraph)
library(igraph)
library(tidyverse)
library(viridis)
edges <- flare$edges %>%
filter(to %in% from) %>%
droplevels()
vertices <- flare$vertices %>%
filter(name %in% c(edges$from, edges$to)) %>%
droplevels()
vertices$size <- runif(nrow(vertices))
# Rebuild the graph object
mygraph <- graph_from_data_frame(edges, vertices=vertices)
ggraph(mygraph, layout = 'circlepack') +
geom_node_circle(aes(fill = depth)) +
geom_node_label( aes(label=shortName, filter=leaf, size=size)) +
theme_void() +
theme(legend.position="FALSE") +
scale_fill_viridis()