pheatmap,全称Pretty Heatmaps,直译“完美的热图”,是一款非常优秀的热图绘制R包。网上也有很多可供参考的教程[1][2],在初学R后,我也曾写过了一篇教程[3][4],但是对于形形色色的绘图参数,存在堆砌罗列,无章可循,使之不利于后来者学习。
- 官方示例
根据对pheatmap pdf说明文档[5]示例的解读,参考其他网络教程,加以归纳整理,确定了绘制热图的基本元素和绘图框架。
- 构建数据集:创建数据、设置行名、列名
- 绘图参数:颜色、边框线、图例、热图块大小、标签字体大小、热图块文本、字符格式及颜色、自定义行/列名、标签方向、标题
- 构建注释参数:注释数据框、图例、注释颜色
- 聚类相关参数:归一化、聚类数目、行列聚类、聚类树高度、添加gap、分块聚类、聚类方法、自定义聚类
后面参数教程和绘图代码也是按照此流程、框架来进行演示:
由于图片较多,故只列出部分图片,其余可按代码运行依次生成
1. 构建数据集
rm(list = ls())
library(pheatmap)
#############构建数据集###############
test <- matrix(rnorm(200), 20, 10)
test[1:10, seq(1, 10, 2)] <- test[1:10, seq(1, 10, 2)] + 3
test[11:20, seq(2, 10, 2)] <- test[11:20, seq(2, 10, 2)] + 2
test[15:20, seq(2, 10, 2)] <- test[15:20, seq(2, 10, 2)] + 4
# 设置行名、列名
rownames(test) <- paste("Gene", 1:20, sep = "")
colnames(test) <- paste("Test", 1:10, sep = "")
2. 绘图参数
# 默认绘图
pheatmap(test)
# 自定义热图颜色
pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50))
# 去掉热块边框线
pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50),
border = FALSE)
# 设定边框线色
pheatmap(test, border_color = "red")
# 不绘制图例
pheatmap(test, legend = FALSE)
# legend_breaks参数设定图例断点范围,legend_labels参数添加图例断点标签
pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4,
legend_labels = c("0","1e-4", "1e-3", "1e-2", "1e-1", "1"))
图片:Rplot01---Rplot06
- Rplot02.png
# 设置热图块大小
pheatmap(test, cellwidth = 15, cellheight = 12)
#fontsize参数设定标签字体大小
pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8)
# 热图块内显示数值
pheatmap(test, display_numbers = TRUE)
# 设定数值字符串的格式及颜色
pheatmap(test, display_numbers = TRUE, number_format = "%.1e",number_color = "red")
# 构建矩阵文本,填充热图块
pheatmap(test, display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)))
# 等同
pheatmap(test, display_numbers = ifelse(test > 5, "*", ""))
ifelse(test > 5, "", "") test矩阵内数字显著性判定
nrow(test) test矩阵内行数
matrix(ifelse(test > 5, "", ""), nrow(test))) 将标记显著性的矩阵构建矩阵,行数为test行数
图片:Rplot07---Rplot012
- Rplot07.png
- Rplot09.png
- Rplot11.png
# 参数设定是否显示行名和列名
pheatmap(test,show_rownames = F,show_colnames = F)
# 自定义行标签名
labels_row = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "Il10", "Il15", "Il1b")
pheatmap(test, labels_row = labels_row)
# 设定列标签的方向
pheatmap(test, angle_col = "45")
pheatmap(test, angle_col = "0")
# 设置标题
pheatmap(test, main = "The Pretty Heatmaps by myself")
图片:Rplot13---Rplot18
- Rplot17.png
3. 构建注释参数
############构建注释参数###############
# 构建列注释数据框
annotation_col <- data.frame(
CellType = factor(rep(c("CT1", "CT2"), 5)),
Time = 1:5
)
# 注释数据框'列名'必须和绘图矩阵'列名'一致
rownames(annotation_col) = paste("Test", 1:10, sep = "")
head(annotation_col)
# 构建行注释数据框
annotation_row = data.frame(
GeneClass = factor(rep(c("Path1", "Path2", "Path3"), c(10, 4, 6)))
)
#注释数据框'行名'必须和绘图矩阵'行名'一致
rownames(annotation_row) = paste("Gene", 1:20, sep = "")
head(annotation_row)
# 显示列注释数据框
pheatmap(test, annotation_col = annotation_col)
# 显示列注释数据框和行注释数据框
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row)
# 取消注释的图例
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row,
annotation_legend = FALSE)
图片:Rplot19---Rplot21
- Rplot19.png
- Rplot20.png
# 列表指定注释行和注释列的颜色
ann_colors = list(
Time = c("white", "firebrick"),
CellType = c(CT1 = "#1B9E77", CT2 = "#D95F02"),
GeneClass = c(Path1 = "#7570B3", Path2 = "#E7298A", Path3 = "#66A61E")
)
#自定义注释数据框的颜色
pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors,
main = "Title")
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row,
annotation_colors = ann_colors)
#只指定cellType颜色
pheatmap(test, annotation_col = annotation_col, annotation_colors = ann_colors[2])
图片:Rplot22---Rplot24
- Rplot23.png
4. 聚类参数
############聚类参数###############
# 参数对行进行归一化(中心化)
pheatmap(test, scale = "row")
#聚类数目设定为4
pheatmap(test, kmeans_k = 4)
# 不对行进行聚类
pheatmap(test, cluster_row = FALSE)
#参数设定行和列聚类树的高度,默认为50
pheatmap(test, treeheight_row = 30, treeheight_col = 50)
# 取消按行聚类,在第10和14行处添加gap
pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE,
gaps_row = c(10, 14))
# 取消按行聚类,将列分成两部分再进行聚类
pheatmap(test, annotation_col = annotation_col, cluster_rows = FALSE,
gaps_row = c(10, 14),
cutree_col = 2)
图片:Rplot25---Rplot30
- Rplot29.png
- Rplot30.png
# 设定不同聚类方法
pheatmap(test,scale = "row", clustering_method = "average")
# 设定聚类距离的方法
pheatmap(test, scale = "row", clustering_distance_rows = "correlation")
# 自定义聚类的距离方法
drows = dist(test, method = "minkowski")
dcols = dist(t(test), method = "minkowski")
pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols)
# 修饰聚类的回调函数
callback = function(hc, mat){
sv = svd(t(mat))$v[,1]
dend = reorder(as.dendrogram(hc), wts = sv)
as.hclust(dend)
}
pheatmap(test, clustering_callback = callback)
图片:Rplot31---Rplot34
效果图:
按照数据-绘图参数-注释参数-聚类参数的顺序,思路清晰,可读性强,亦便于后续修改。
#按照数据-绘图参数-注释参数-聚类参数的顺序,思路清晰,可读性强,亦便于后续修改
pheatmap(test,
color = colorRampPalette(c("navy","white","firebrick3"))(256),
cellwidth = 20, cellheight = 20, border_color = "grey60", fontsize = 8,
display_numbers = matrix(ifelse(test > 5, "*", ""), nrow(test)),
main = "The Pretty Heatmaps By Myself",
annotation_col = annotation_col,annotation_row = annotation_row,
annotation_colors = ann_colors,
scale = "row", treeheight_row = 20, treeheight_col =30,
cutree_cols = 2, cutree_rows = 2,
clustering_method = "average")
- Rplot35.png
参考资料: