首先要明白 plot_pseudotime_heatmap的原理,先调出它的源代码
function (cds_subset, cluster_rows = TRUE, hclust_method = "ward.D2",
num_clusters = 6, hmcols = NULL, add_annotation_row = NULL,
add_annotation_col = NULL, show_rownames = FALSE, use_gene_short_name = TRUE,
norm_method = c("log", "vstExprs"), scale_max = 3, scale_min = -3,
trend_formula = "~sm.ns(Pseudotime, df=3)", return_heatmap = FALSE,
cores = 1)
{
num_clusters <- min(num_clusters, nrow(cds_subset))
pseudocount <- 1
newdata <- data.frame(Pseudotime = seq(min(pData(cds_subset)$Pseudotime),
max(pData(cds_subset)$Pseudotime), length.out = 100))
#注意这里将拟时间数据简化运算了
m <- genSmoothCurves(cds_subset, cores = cores, trend_formula = trend_formula,
relative_expr = T, new_data = newdata)#在这里使用了平滑处理的函数
m = m[!apply(m, 1, sum) == 0, ]
norm_method <- match.arg(norm_method)
#这里对数据进一步标准化!!!1
if (norm_method == "vstExprs" && is.null(cds_subset@dispFitInfo[["blind"]]$disp_func) ==
FALSE) {
m = vstExprs(cds_subset, expr_matrix = m)
}
else if (norm_method == "log") {
m = log10(m + pseudocount)
}
m = m[!apply(m, 1, sd) == 0, ]
m = Matrix::t(scale(Matrix::t(m), center = TRUE))
m = m[is.na(row.names(m)) == FALSE, ]
m[is.nan(m)] = 0
m[m > scale_max] = scale_max
m[m < scale_min] = scale_min
heatmap_matrix <- m
row_dist <- as.dist((1 - cor(Matrix::t(heatmap_matrix)))/2)
row_dist[is.na(row_dist)] <- 1
if (is.null(hmcols)) {
bks <- seq(-3.1, 3.1, by = 0.1)
hmcols <- blue2green2red(length(bks) - 1)
}
else {
bks <- seq(-3.1, 3.1, length.out = length(hmcols))
}
ph <- pheatmap(heatmap_matrix, useRaster = T, cluster_cols = FALSE,
cluster_rows = cluster_rows, show_rownames = F, show_colnames = F,
clustering_distance_rows = row_dist, clustering_method = hclust_method,
cutree_rows = num_clusters, silent = TRUE, filename = NA,
breaks = bks, border_color = NA, color = hmcols)
if (cluster_rows) {
annotation_row <- data.frame(Cluster = factor(cutree(ph$tree_row,
num_clusters)))
}
else {
annotation_row <- NULL
}
if (!is.null(add_annotation_row)) {
old_colnames_length <- ncol(annotation_row)
annotation_row <- cbind(annotation_row, add_annotation_row[row.names(annotation_row),
])
colnames(annotation_row)[(old_colnames_length + 1):ncol(annotation_row)] <- colnames(add_annotation_row)
}
if (!is.null(add_annotation_col)) {
if (nrow(add_annotation_col) != 100) {
stop("add_annotation_col should have only 100 rows (check genSmoothCurves before you supply the annotation data)!")
}
annotation_col <- add_annotation_col
}
else {
annotation_col <- NA
}
if (use_gene_short_name == TRUE) {
if (is.null(fData(cds_subset)$gene_short_name) == FALSE) {
feature_label <- as.character(fData(cds_subset)[row.names(heatmap_matrix),
"gene_short_name"])
feature_label[is.na(feature_label)] <- row.names(heatmap_matrix)
row_ann_labels <- as.character(fData(cds_subset)[row.names(annotation_row),
"gene_short_name"])
row_ann_labels[is.na(row_ann_labels)] <- row.names(annotation_row)
}
else {
feature_label <- row.names(heatmap_matrix)
row_ann_labels <- row.names(annotation_row)
}
}
else {
feature_label <- row.names(heatmap_matrix)
if (!is.null(annotation_row))
row_ann_labels <- row.names(annotation_row)
}
row.names(heatmap_matrix) <- feature_label
if (!is.null(annotation_row))
row.names(annotation_row) <- row_ann_labels
colnames(heatmap_matrix) <- c(1:ncol(heatmap_matrix))
ph_res <- pheatmap(heatmap_matrix[, ], useRaster = T, cluster_cols = FALSE,
cluster_rows = cluster_rows, show_rownames = show_rownames,
show_colnames = F, clustering_distance_rows = row_dist,
clustering_method = hclust_method, cutree_rows = num_clusters,
annotation_row = annotation_row, annotation_col = annotation_col,
treeheight_row = 20, breaks = bks, fontsize = 6, color = hmcols,
border_color = NA, silent = TRUE, filename = NA)
grid::grid.rect(gp = grid::gpar("fill", col = NA))
grid::grid.draw(ph_res$gtable)
if (return_heatmap) {
return(ph_res)
}
}
根据上述代码,总结起来就是,plot_pseudotime_heatmap=matrix+genSmoothCurves+pheatmap
因此我们只需要将我们的表达矩阵先使用genSmoothCurves函数处理,然后使用pheatmap做出图即可。
以下是尝试的例子
1、先使用monocle2中的plot_psudotime_heatmap
plot_pseudotime_heatmap(x_monocle[1:10,],cluster_rows = F)
2、使用genSmoothCurves+pheatmap
b<-genSmoothCurves(x_monocle[1:10,],new_data = pData(x)[,3,drop=F])
pheatmap(b[,order(pData(x_monocle)[,3])],
color = colorRampPalette(c("green","black","red"))(100),
breaks = seq(-3,3,6/100),
show_rownames = F,
show_colnames = F,
cluster_rows = F,
cluster_cols = F,
scale = "row",
clustering_method = "ward.D2")
ps:
1、我没有去查包装的函数使用的调色板,不过对颜色的调整应该是下面的函数
bks <- seq(-3.1, 3.1, by = 0.1)
hmcols <- blue2green2red(length(bks) - 1)
#但是这个blue2green2red我没有查到!
2、注意这只是简单的重现,在原包装函数中还有对平滑处理后的数据的进一步标准化,比如对表达矩阵取对指数等
3、要复现一摸一样的图需要再细调整pheatmap的其它参数
4、之所以纠结这些是因为包装函数里能修改热图的参数太少了!!!!
在此补充(再次更细致地复现):
1、plot_pseudotime_heatmap=matrix+genSmoothCurves(该处拟时序被统一简化为100个数值)+log10(mat+1)+pheatmap(其中的blue2green2red函数经查为colorRamps包的函数)
2、重新复现包装函数的结果
(1)包装函数代码和结果
b=plot_pseudotime_heatmap(x_monocle[gene_towards_pseudotime_filter$gene_short_name,],return_heatmap = T)
(2)使用不同的函数复现上述结果
newdata<-data.frame(Pseudotime = seq(min(pData(x_monocle)$Pseudotime),
max(pData(x_monocle)$Pseudotime), length.out = 100))
genSmoothCurves_mat<-genSmoothCurves(x_monocle[gene_towards_pseudotime_filter$gene_short_name,],
new_data = newdata,
cores = 10)
pheatmap(log10(genSmoothCurves_mat[b$tree_row$order,]+1),
scale = "row",
cluster_rows = F,
cluster_cols = F,
#annotation_col = annocol,
#annotation_colors = annocolor,
show_rownames = F,
show_colnames = F,
color = hmcols,
breaks = bks)