相关性热图的完美解决方案 – pheatmap包
install.packages('pheatmap')# 安装包,加载数据
library(pheatmap)
# 生成测试数据集
test = matrix(rnorm(200),20,10)
# 取出1-10行,13579列,全部加3
test[1: 10, seq(1,10,2)] = test[1:10, seq(1, 10,2)]+3
# 取出11-20行,246810列,全部加1
test[11: 20, seq(2,10,2)] = test[11:20, seq(2, 10,2)]+1
# 取出15-20行,246810列,全部加5
test[15:20, seq(2,10,2)] = test[15:20, seq(2,10,2)]+5
> head(test)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 5.862158 1.0025110 4.643383 1.3927443 3.078703 -0.70255575
[2,] 2.170332 0.9238553 3.270871 -1.5441517 2.466412 2.79953282
[3,] 3.214988 0.8554705 3.535641 -0.1205533 3.057674 0.53514690
[4,] 2.112973 -0.3915936 3.574049 0.1224274 2.816445 0.02743305
[5,] 3.875925 0.1835678 4.237681 -1.1217373 4.015698 -1.19018925
[6,] 4.492317 -1.3624362 3.605374 -0.7925712 1.541456 0.70211275
[,7] [,8] [,9] [,10]
[1,] 2.412597 -0.68667529 2.693478 0.5390070
[2,] 3.499522 -0.57118915 2.724884 -0.3781316
[3,] 3.164220 -0.80261049 5.139696 0.3642769
[4,] 1.781402 -0.02899568 3.702517 -0.4853577
[5,] 3.435076 0.71499645 2.017988 -0.7940890
[6,] 2.561281 1.31073192 1.372816 -0.4561410
# 添加行名和列名
colnames(test) = paste("test", 1:10, sep = '')
rownames(test) = paste("gene", 1:20, sep = '')
head(test)
test1 test2 test3 test4 test5 test6
gene1 5.862158 1.0025110 4.643383 1.3927443 3.078703 -0.70255575
gene2 2.170332 0.9238553 3.270871 -1.5441517 2.466412 2.79953282
gene3 3.214988 0.8554705 3.535641 -0.1205533 3.057674 0.53514690
gene4 2.112973 -0.3915936 3.574049 0.1224274 2.816445 0.02743305
gene5 3.875925 0.1835678 4.237681 -1.1217373 4.015698 -1.19018925
gene6 4.492317 -1.3624362 3.605374 -0.7925712 1.541456 0.70211275
test7 test8 test9 test10
gene1 2.412597 -0.68667529 2.693478 0.5390070
gene2 3.499522 -0.57118915 2.724884 -0.3781316
gene3 3.164220 -0.80261049 5.139696 0.3642769
gene4 1.781402 -0.02899568 3.702517 -0.4853577
gene5 3.435076 0.71499645 2.017988 -0.7940890
gene6 2.561281 1.31073192 1.372816 -0.4561410
# 直接画图,设置行高和列的树高
pheatmap(test, treeheight_row = 50, treeheight_col = 50)
# 取消列聚类,并且更改颜色
pheatmap(test, cluster_cols = FALSE,
color = colorRampPalette(c("green","white","red"))(1000))
# 增加分组信息,使得pheatmap显示行或列的分组信息
# 增加Time,CellType分组信息
annotation_col = data.frame(CellType = factor(rep(c("X1", "X2"), 5)),
Time = 1:5)
rownames(annotation_col) = paste("test", 1:10, sep = "")
# 增加GeneClass分组信息
annotation_row = data.frame(GeneClass = factor(rep(c("P1", "P2", "P3"),
c(10, 7, 3))))
rownames(annotation_row) = paste("gene", 1:20, sep = "")
# 画图
pheatmap(test, annotation_col = annotation_col, annotation_row = annotation_row)
## 使用annotation_colors参数设定各个分组的颜色
ann_colors <- list(Time = c("white", "green"),
cellType = c(X1= "#1B9E77", X2 = "#D95F02"),
GeneClass = c(P1 = "#7570B3", P2 = "#E7298A", P3 = "#66A61E"))
# 画图
pheatmap(test, annotation_col = annotation_col,
annotation_row = annotation_row,
annotation_colors = ann_colors)
# 使用cut_rows,和cut_cols可以根据行列的聚类数据将热图分隔开
pheatmap(test, annotation_col = annotation_col,
annotation_row = annotation_row,
annotation_colors = ann_colors,
cutree_rows = 3,cutree_cols = 2)
# 数字3和2是根据聚类树加载出来的
# 查看pheatmap的聚类结果
> result <- pheatmap(test)
> summary(result)
Length Class Mode
tree_row 7 hclust list
tree_col 7 hclust list
kmeans 1 -none- logical
gtable 6 gtable list