直方图简介
直方图(Histogram),又称质量分布图,是一种统计报告图,由一系列高度不等的纵向条纹或线段组成,表示数据分布的情况。 一般用横轴(X轴)表示数据类型,纵轴(Y轴)表示分布(相应值的频数)情况。
绘制直方图,首先要对数据进行分组,然后统计每个分组内数据元的数量。 在平面直角坐标系中,横轴标出每个组的端点,纵轴表示频数,每个矩形的高代表对应的频数,称这样的统计图为频数分布直方图。一般要了解以下几个名词的概念:
组数:统计数据时,把数据按照不同的范围分成几个组,分成组的个数称为组数;
组距:每一组两个端点的差;
频数:分组内的数据元的数量除以组距;
直方图的作用
1、显示各组频数或者数量分布情况;
2、显示各组间频数或数量的差异;
R绘制直方图
可以利用hist()函数进行绘制:
hist(x, ...)
## Default S3 method:
hist(x, # x是由数据值组成的数值向量
breaks = "Sturges", #多种格式,指定组数
freq = NULL, #逻辑值, 默认值为TRUE , y轴显示的是每个区间内的频数,FALSE, 代表显示的是频率(= 频数/ 总数)
probability = !freq, #逻辑值,和 freq 参数的作用正好相反,TRUE 代表频率, FALSE 代表频数
right = TRUE, #左右显示
density = NULL, #用线条填充柱子
angle = 45, #用线条填充柱子,控制线条的角度,必须和density 参数配合使用,才能发挥作用
col = "lightgray", #柱子的填充色
border = NULL, #柱子的边框的颜色,默认为black, 当border = NA 时, 代表没有边框
main = paste("Histogram of" , xname), #标题
xlim = range(breaks), #x轴范围
ylim = NULL, #y轴范围
xlab = xname, x轴名称
ylab = yname, #y轴名称
axes = TRUE, #逻辑值,是否显示轴线
plot = TRUE, #有待研究
labels = FALSE, #显示在每个柱子上方的标签
nclass = NULL, #有待研究
warn.unused = TRUE, ...)
案例
par(mfrow = c(3,2)
#简单直方图
hist(mtcars$mpg)
#增加breaks参数
hist(mtcars$mpg,
breaks = 15,
)
#增加col参数
hist(mtcars$mpg,
breaks = 15,
col = "red",
)
#增加x和y轴标题
hist(mtcars$mpg,
breaks = 15,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
)
#增加图标题
hist(mtcars$mpg,
breaks = 15,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题")
#增加border参数
hist(mtcars$mpg,
breaks = 15,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题",
border = "green")
#添加密度线
hist(mtcars$mpg,
breaks = 12,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题",
border = "green",
freq = FALSE)
lines(density(mtcars$mpg),
col = "black",
lwd = 5)
#添加填充类型为线条,线条的角度为45度
hist(mtcars$mpg,
breaks = 12,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题",
border = "black",
freq = FALSE,
density = 12,
angle = 45,
)
lines(density(mtcars$mpg),
col = "black",
lwd = 5)
#添加填充类型为线条,线条的角度为45度
hist(mtcars$mpg,
breaks = 12,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题",
border = "black",
freq = FALSE,
density = 12,
angle = 45,
)
#添加密度线
lines(density(mtcars$mpg),
col = "black",
lwd = 3)
#添加外框线
box()
#添加每个柱子上的标签
hist(mtcars$mpg,
breaks = 12,
col = "red",
xlab = "x轴标题",
ylab = "y轴标题",
main = "图片标题",
border = "black",
freq = FALSE,
density = 12,
angle = 45,
labels = T
)
#添加密度线
lines(density(mtcars$mpg),
col = "black",
lwd = 3)
#添加外框线
box()
参考文献
[1] Robert I. Kabacoff (著). R语言实战(高涛/肖楠/陈钢 译). 北京: 人民邮电出版社.
[2] https://www.cnblogs.com/xudongliang/p/6913363.html
[3] https://zhuanlan.zhihu.com/p/259835459