原文见http://www.win-vector.com/blog/2018/10/scatterplot-matrices-pair-plots-with-cdata-and-ggplot2/
散点图矩阵就是把数据集中的每个数值变量两两绘制散点图。基础的R包,绘图函数是pairs()。这是基础包绘制的iris数据集的一个例子:
pairs(iris[1:4],
main = "Anderson's Iris Data -- 3 species",
pch = 21,
bg = c("#1b9e77", "#d95f02", "#7570b3")[unclass(iris$Species)])
另外的绘图方式还有几种。
library(ggplot2)
library(GGally)
ggpairs(iris, columns=1:4, aes(color=Species)) +
ggtitle("Anderson's Iris Data -- 3 species")
library(lattice)
splom(iris[1:4],
groups=iris$Species,
main="Anderson's Iris Data -- 3 species")
用R包中cdata绘制一下散点矩阵图
首先是加载相关的R包
library(ggplot2)
library(cdata)
然后按照需求重塑数据
meas_vars <- colnames(iris)[1:4]
# the data.frame() call strips the attributes from
# the frame returned by expand.grid()
controlTable <- data.frame(expand.grid(meas_vars, meas_vars,
stringsAsFactors = FALSE))
# rename the columns
colnames(controlTable) <- c("x", "y")
# add the key column
controlTable <- cbind(
data.frame(pair_key = paste(controlTable[[1]], controlTable[[2]]),
stringsAsFactors = FALSE),
controlTable)
controlTable
# pair_key x y
## 1 Sepal.Length Sepal.Length Sepal.Length Sepal.Length
## 2 Sepal.Width Sepal.Length Sepal.Width Sepal.Length
## 3 Petal.Length Sepal.Length Petal.Length Sepal.Length
## 4 Petal.Width Sepal.Length Petal.Width Sepal.Length
## 5 Sepal.Length Sepal.Width Sepal.Length Sepal.Width
## 6 Sepal.Width Sepal.Width Sepal.Width Sepal.Width
## 7 Petal.Length Sepal.Width Petal.Length Sepal.Width
## 8 Petal.Width Sepal.Width Petal.Width Sepal.Width
## 9 Sepal.Length Petal.Length Sepal.Length Petal.Length
## 10 Sepal.Width Petal.Length Sepal.Width Petal.Length
## 11 Petal.Length Petal.Length Petal.Length Petal.Length
## 12 Petal.Width Petal.Length Petal.Width Petal.Length
## 13 Sepal.Length Petal.Width Sepal.Length Petal.Width
## 14 Sepal.Width Petal.Width Sepal.Width Petal.Width
## 15 Petal.Length Petal.Width Petal.Length Petal.Width
## 16 Petal.Width Petal.Width Petal.Width Petal.Width
iris_aug = rowrecs_to_blocks(
iris,
controlTable,
columnsToCopy = "Species")
head(iris_aug)
## Species pair_key x y
## 1 setosa Sepal.Length Sepal.Length 5.1 5.1
## 2 setosa Sepal.Width Sepal.Length 3.5 5.1
## 3 setosa Petal.Length Sepal.Length 1.4 5.1
## 4 setosa Petal.Width Sepal.Length 0.2 5.1
## 5 setosa Sepal.Length Sepal.Width 5.1 3.5
## 6 setosa Sepal.Width Sepal.Width 3.5 3.5
然后用facet_grid创建图形
# reorder the key columns to be the same order
# as the base version above
iris_aug$xv <- factor(as.character(iris_aug$xv),
meas_vars)
iris_aug$yv <- factor(as.character(iris_aug$yv),
meas_vars)
ggplot(iris_aug, aes(x=x, y=y)) +
geom_point(aes(color=Species, shape=Species)) +
facet_grid(yv~xv, labeller = label_both, scale = "free") +
ggtitle("Anderson's Iris Data -- 3 species") +
scale_color_brewer(palette = "Dark2") +
ylab(NULL) +
xlab(NULL)
用[
WVPlots
的PairPlot()
函数 ]也可以绘制同样的图形,相对简单一些(https://winvector.github.io/WVPlots/reference/PairPlot.html).
library(WVPlots)
PairPlot(iris,
colnames(iris)[1:4],
"Anderson's Iris Data -- 3 species",
group_var = "Species")