当需要展示多个数据集之间的关联并揭示可能存在的潜在模式时,ComplexHeatmap包就有了用武之地。下面提供一个实例。
1. 文件准备。
需要4个文件。包括两个heatmap数据文件mRNA.txt和circRNA.txt,两个anotation文件anotation_con.txt和anotation_dis.txt。其中mRNA.txt和circRNA.txt里是基因的表达谱数值,anotation_con.txt里是连续型变量属性值,anotation_dis.txt里是离散型变量属性值。4个文件的第一列需要保持一致。
a. mRNA.txt
b. circRNA.txt
c. anotation_con.txt
d. anotation_dis.txt
2. 代码。
a. 加载包,读取表达谱文件,构建聚类树。
library(ComplexHeatmap)
library(circlize)
mydata1 <- read.table("mRNA.txt", header =T, row.names=1, sep = "\t", quote = "\"'", check.names=F)
groupn1 <- sub("\\d+", "", colnames(mydata1), perl=TRUE)
mydata1_colnames <- colnames(mydata1)
mydata1 <- t(apply(mydata1, 1, scale)) # 进行标准化
colnames(mydata1) <- mydata1_colnames
mydata2 <- read.table("circRNA.txt", header =T, row.names=1, sep = "\t", quote = "\"'", check.names=F)
groupn2 <- sub("\\d+", "", colnames(mydata2), perl=TRUE)
mydata2_colnames <- colnames(mydata2)
mydata2 <- t(apply(mydata2, 1, scale))
colnames(mydata2) <- mydata2_colnames
column_tree = hclust(dist(t(mydata1))) # 对mydata1聚类
colset <- colorRamp2(seq(min(mydata1, mydata2),max(mydata1, mydata2),length=3), c("blue", "#EEEEEE", "red")) # 设置颜色
b. 配置heatmap1。行列分别聚类。
annot1 <- data.frame(Group1=groupn1)
col_group1=list(Group1=structure(rainbow(length(unique(groupn1))), names=unique(groupn1)))
ha1 = HeatmapAnnotation(df=annot1, col=col_group1, show_legend = F, show_annotation_name=F)
H1 = Heatmap(mydata1, clustering_distance_rows = "euclidean", clustering_method_rows = "average", clustering_distance_columns = "euclidean", clustering_method_columns = "average", cluster_columns = column_tree, column_dend_reorder = F, show_row_names = T, show_column_dend=T, column_title = "mRNA", name = "mRNA", col = colset, top_annotation = ha1, row_title_gp = gpar(fontsize = 8))
c. 配置heatmap2。按照heatmap1的聚类模式进行聚类。
annot2 <- data.frame(Group2=groupn2)
col_group2=list(Group2=structure(rainbow(length(unique(groupn2))), names=unique(groupn2[column_tree$order])))
ha2 = HeatmapAnnotation(df=annot2, col=col_group2, show_legend = F, show_annotation_name=F)
H2 = Heatmap(mydata2[, column_tree$order], name = "circRNA", col = colset, cluster_columns = F, top_annotation = ha2, show_row_names = T, column_title = "circRNA", show_heatmap_legend=T)
d. 配置连续型anotation的heatmap。
andata_con <- read.table("anotation_con.txt", header =T, row.names=1, sep = "\t", quote = "\"'", check.names=F)
Han_con1 = Heatmap(andata_con[,1], name = colnames(andata_con)[1], col = colorRamp2(seq(min(andata_con[,1]),max(andata_con[,1]),length=2), c("white", "red")))
Han_con2 = Heatmap(andata_con[,2], name = colnames(andata_con)[2], col = colorRamp2(seq(min(andata_con[,2]),max(andata_con[,2]),length=2), c("white", "red")))
e. 配置离散型anotation的heatmap。
andata_dis <- read.table("anotation_dis.txt", header =T, row.names=1, sep = "\t", quote = "\"'", check.names=F)
Han_dis1 = Heatmap(andata_dis[,1], name = colnames(andata_dis)[1])
Han_dis2 = Heatmap(andata_dis[,2], name = colnames(andata_dis)[2])
f. 配置并输出图片。
ht_opt(legend_title_gp = gpar(fontsize = 12, fontface = "bold"), legend_labels_gp = gpar(fontsize = 10), heatmap_column_names_gp = gpar(fontsize = 8))
draw(H1 + H2 + Han_con1 + Han_con2 + Han_dis1 + Han_dis2, newpage = F, auto_adjust = FALSE, column_title = "Comprehensive correspondence between mRNA and CircRNA", column_title_gp = gpar(fontsize = 15, fontface = "bold"), heatmap_legend_side = "right")