参考链接:https://r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html
利用networkD3画桑基图可以利用两种输入文件:connection data frame(3列)或者incidence matrix(方阵)
connection data frame一般有3列,第一列为source,第二列为target,第三列为赋予该connection的其他信息,例如该flow的值。
#Library
library(networkD3)
library(dplyr)
# A connection data frame is a list of flows with intensity for each flow
links=data.frame(source=c("group_A","group_A", "group_B", "group_C", "group_C", "group_E"),
target=c("group_C","group_D", "group_E", "group_F", "group_G", "group_H")
,value=c(2,3,2,3,1,3)
)
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes=data.frame(name=c(as.character(links$source),as.character(links$target)) %>% unique())
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource=match(links$source,nodes$name)-1
links$IDtarget=match(links$target,nodes$name)-1
#set color for groups of nodes and connections
# Add a 'group' column to the nodes data frame:
nodes$group=as.factor(c("a","a","a","a","a","b","b","b"))
# Add a 'group' column to each connection:
links$group=as.factor(c("type_a","type_a","type_a","type_b","type_b","type_b"))
# Give a color for each group:
my_color='d3.scaleOrdinal() .domain(["a","b","type_a","type_b"]) .range(["#69b3a2","steelblue","#69b3a2","steelblue"])'
# Make the Network
p=sankeyNetwork(Links=links,Nodes=nodes,Source="IDsource",Target="IDtarget",Value="value", NodeID="name",colourScale=my_color,NodeGroup="group",LinkGroup="group")
# save the widget
# library(htmlwidgets)
# saveWidget(p, file=paste0( getwd(), "/HtmlWidget/sankeyBasic1.html"))
incidence matrix一般我一个方阵,行名和列名代表着node,x行y列的数值代表着从x流向y的connection的数值,对于这种数据类型应当先转换成上面的长列表格式,然后再画图
# Library
library(networkD3)
library(dplyr)
# Create an incidence matrix. Usually the flow goes from the row names to the column names.
# Remember that our connection are directed since we are working with a flow.
set.seed(1)
data=matrix(sample(seq(0,40),49,replace=T),7,7)
data[data < 35]=0
colnames(data)=rownames(data)=c("group_A","group_B","group_C","group_D","group_E","group_F","group_G")
# Transform it to connection data frame with tidyr from the tidyverse:
links=data %>%
as.data.frame() %>%
rownames_to_column(var="source") %>%
gather(key="target", value="value", -1) %>%
filter(value != 0)
# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes=data.frame(name=c(as.character(links$source), as.character(links$target)) %>% unique())
# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource=match(links$source,nodes$name)-1
links$IDtarget=match(links$target,nodes$name)-1
# Make the Network
p=sankeyNetwork(Links=links,Nodes=nodes,Source="IDsource",Target="IDtarget",Value="value",NodeID="name",sinksRight=FALSE)
p
# save the widget
# library(htmlwidgets)
# saveWidget(p, file=paste0( getwd(), "/HtmlWidget/sankeyBasic2.html"))