hello,大家好,不知道下面这张图大家还有印象么
在我之前分享的文章10X单细胞(10X空间转录组)之细胞通讯软件之间的分析比较中,大量采用这样的图进行展示,后来呢,在文章文章中的哑铃图复现(基础知识)尝试完全实现,可惜半路停止,今天就给大家展示如何实现这样的图,不带一点折扣。
其实这个图有个专业的名词,叫韦恩图,但是大家熟悉的韦恩图应该是长下面的样子:
少量的情况下非常美观,但数据一多信息展示就会混乱,于是就有了上面的展示方式,好了,借助R包UpSet,我们来实现这个图。
接下来我们直接通过实例来看下如何进行数据的可视化:
install.packages("UpSetR")
##载入包
library(UpSetR)
library(ggplot2)
library(grid)
library(plyr)
##构建数据
# example oflist input (list of named vectors)
listInput <-list(one = c(1, 2, 3, 5, 7, 8, 11, 12, 13), two = c(1, 2, 4, 5, 10), three =c(1, 5, 6, 7, 8, 9, 10, 12, 13))
# example ofexpression input
expressionInput<- c(one = 2, two = 1, three = 2, `one&two` = 1, `one&three` = 4,`two&three` = 1, `one&two&three` = 2)
##可视化结果
upset(fromList(listInput),order.by = "freq")
upset(fromExpression(expressionInput),order.by = "freq")
##载入数据并绘图
movies <-read.csv(system.file("extdata", "movies.csv", package ="UpSetR"), header = T, sep = ";")
## nsets(频数最多的前六个变量),text.scale =c(intersection size title, intersection size ticklabels, set size title, set size tick labels, set names, numbers above bars)
upset(movies,nsets = 6, number.angles = 30, point.size = 3.5, line.size = 2, mainbar.y.label = "Genre Intersections", sets.x.label = "Movies Per Genre", text.scale =c(1.3, 1.3, 1, 1, 2, 0.75))
##自定义交集的组
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq")
##基于相交的等级进行排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "degree")
##基于等级和频率共同排序,通过先后来确定排序顺序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = c("degree", "freq"))
##保留各组频数,不排序
upset(movies,sets = c("Action", "Adventure", "Comedy","Drama", "Mystery", "Thriller","Romance", "War", "Western"), mb.ratio = c(0.55,0.45), order.by = "freq", keep.order = TRUE)
##对交集结果进行分组,nintersects交叉点的数目阈值,cutoff交叉结果阈值。
upset(movies,nintersects = 70, group.by = "sets", cutoff = 7)
##空的交叉点展示
upset(movies,empty.intersections = "on", order.by = "freq")
##利用不同的颜色显示重要的信息
upset(movies,queries = list(list(query = intersects, params = list("Drama", "Comedy","Action"), color = "orange", active = T), list(query =intersects, params = list("Drama"), color = "red", active =F), list(query = intersects, params = list("Action","Drama"), active = T)))
##通过设置阈值进行标记
upset(movies,queries = list(list(query = elements, params = list("AvgRating", 3.5,4.1), color = "blue", active = T), list(query = elements, params =list("ReleaseDate", 1980, 1990, 2000), color = "red",active = F)))
##通过expression进行筛选
upset(movies,queries = list(list(query = intersects, params = list("Action", "Drama"),active = T), list(query = elements, params = list("ReleaseDate", 1980,1990, 2000), color = "red", active = F)), expression ="AvgRating > 3 & Watches > 100")
##自定义的query结构
Myfunc <-function(row, release, rating) {data <- (row["ReleaseDate"] %in%release) & (row["AvgRating"] > rating)}#row数据源,release,rating指的是parms中的第一,二个参数
upset(movies,queries = list(list(query = Myfunc, params = list(c(1970, 1980, 1990, 1999,2000), 2.5), color = "blue", active = T)))
##增加query的标签legend
upset(movies,query.legend = "top", queries = list(list(query = intersects,
params = list("Drama","Comedy", "Action"), color = "orange", active =T,
query.name = "Funny action"),list(query = intersects, params = list("Drama"),
color = "red", active = F),list(query = intersects, params = list("Action",
"Drama"), active = T, query.name = "Emotionalaction")))
##综合前面的方式的完整例子
upset(movies, query.legend = "bottom", queries =list(list(query = Myfunc, params = list(c(1970,
1980, 1990, 1999, 2000),2.5), color = "orange", active = T), list(query = intersects,
params = list("Action","Drama"), active = F), list(query = elements, params =list("ReleaseDate",
1980, 1990, 2000), color ="red", active = F, query.name = "Decades")),
expression ="AvgRating > 3 & Watches > 100")
##通过柱状图增加变量的其它数据信息其中type=bar plot("hist") or heat map ("heat"/“bool”)
sets <- names(movies[3:19])
avgRottenTomatoesScore <- round(runif(17, min = 0, max = 90))
metadata <- as.data.frame(cbind(sets, avgRottenTomatoesScore))
names(metadata) <- c("sets","avgRottenTomatoesScore")
metadata$avgRottenTomatoesScore <-as.numeric(as.character(metadata$avgRottenTomatoesScore))
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20))))
##增加热图信息
Cities <- sample(c("Boston", "NYC","LA"), 17, replace = T)
metadata <- cbind(metadata, Cities)
metadata$Cities <- as.character(metadata$Cities)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller",
"Romance")), ]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "heat",
column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
LA ="purple")))))
##增加文字信息
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "text",
column ="Cities", assign = 10, colors = c(Boston = "green", NYC ="navy",
LA ="purple")))))
##直接设置连线区域背景
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20), list(type ="matrix_rows",
column ="Cities", colors = c(Boston = "green", NYC ="navy", LA = "purple"),
alpha = 0.5))))
##一次添加多种信息
accepted <- round(runif(17, min = 0, max = 1))
metadata <- cbind(metadata, accepted)
metadata[which(metadata$sets %in% c("Drama","Comedy", "Action", "Thriller", "Romance")),]
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist", column = "avgRottenTomatoesScore",assign = 20), list(type = "bool", column = "accepted", assign= 5, colors = c("#FF3333", "#006400")), list(type ="text", column = "Cities", assign = 5, colors = c(Boston ="green", NYC = "navy", LA = "purple")))))
##混合图的绘制,通过attribute.plots添加
upset(movies, set.metadata = list(data = metadata, plots =list(list(type = "hist",
column ="avgRottenTomatoesScore", assign = 20), list(type = "bool",column = "accepted",
assign = 5, colors =c("#FF3333", "#006400")), list(type = "text",column = "Cities",
assign = 5, colors =c(Boston = "green", NYC = "navy", LA ="purple")),
list(type ="matrix_rows", column = "Cities", colors = c(Boston ="green",
NYC ="navy", LA = "purple"), alpha = 0.5))), queries =list(list(query = intersects,
params =list("Drama"), color = "red", active = F), list(query =intersects,
params =list("Action", "Drama"), active = T), list(query =intersects,
params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
attribute.plots =list(gridrows = 45, plots = list(list(plot = scatter_plot,
x ="ReleaseDate", y = "AvgRating", queries = T), list(plot =scatter_plot,
x ="AvgRating", y = "Watches", queries = F)), ncols = 2),query.legend = "bottom")
##自定义绘图函数的混合绘图
myplot <- function(mydata, x, y) {
plot <- (ggplot(data =mydata, aes_string(x = x, y = y, colour = "color")) +
geom_point() + scale_color_identity()+ theme(plot.margin = unit(c(0,
0, 0, 0),"cm")))
}
another.plot <- function(data, x, y) {
data$decades <-round_any(as.integer(unlist(data[y])), 10, ceiling)
data <-data[which(data$decades >= 1970), ]
myplot <- (ggplot(data,aes_string(x = x)) + geom_density(aes(fill = factor(decades)),
alpha = 0.4) +theme(plot.margin = unit(c(0, 0, 0, 0), "cm"), legend.key.size =unit(0.4,
"cm")))
}
upset(movies, main.bar.color = "black", queries =list(list(query = intersects,
params =list("Drama"), color = "red", active = F), list(query =intersects,
params =list("Action", "Drama"), active = T), list(query =intersects,
params =list("Drama", "Comedy", "Action"), color ="orange", active = T)),
attribute.plots =list(gridrows = 45, plots = list(list(plot = myplot, x ="ReleaseDate",
y ="AvgRating", queries = T), list(plot = another.plot, x ="AvgRating",
y ="ReleaseDate", queries = F)), ncols = 2))
##增加箱线图
upset(movies, boxplot.summary = c("AvgRating","ReleaseDate"))
好了,大家多多学习
生活很好,有你更好