R:树状图的颜色分支,同时保留颜色图例

问题描述 投票:0回答:2

目标:创建一个树状图,其中分支由因子变量着色,结果图包含图例,用于将分支颜色转换为每个因子变量值。

我有一些带有因子变量的数据,后面是我正在创建树状图的数值数据:

> cleaned_mayo[1:5,1:20]
          patient                Source         Tissue RIN Diagnosis Gender  AgeAtDeath ApoE   FLOWCELL PMI N_unmapped N_multimapping N_noFeature N_ambiguous ENSG00000223972
1924_TCX 1924_TCX MayoBrainBank_Dickson TemporalCortex 5.6   Control      F 90_or_above   33 AC5R6PACXX   2    2773880        9656114     8225967     2876479               1
1926_TCX 1926_TCX MayoBrainBank_Dickson TemporalCortex 7.8   Control      F          88   33 AC44HKACXX   2    2279283       12410116     9503353     3600252               2
1935_TCX 1935_TCX MayoBrainBank_Dickson TemporalCortex 8.6   Control      F          88   33 AC5T2GACXX   3    3120169        8650081     9640468     4603751               0
1925_TCX 1925_TCX MayoBrainBank_Dickson TemporalCortex 6.6   Control      F          89   33 BC6178ACXX   4    2046886       10627577     7533671     3361385               1
1963_TCX 1963_TCX MayoBrainBank_Dickson TemporalCortex 9.7   Control      M 90_or_above   33 AC5T1WACXX   4    1810116        9611375     5343437     2983079               2
         ENSG00000227232 ENSG00000278267 ENSG00000243485 ENSG00000274890 ENSG00000237613
1924_TCX              80               7               1               0               0
1926_TCX             113              22               9               0               0
1935_TCX             181              21               2               0               0
1925_TCX              75               9               5               0               0
1963_TCX              73              14               1               0               0

数据尺寸为:161 x 60,739。利用这些数据,我获得了带有彩色分支但没有图例的树状图,以及带有带有图例的彩色标签(不是分支)的树状图。我想将两者结合起来。

创建带有彩色分支但没有图例的树状图:

# Create the dendrogram for visualization
dend_expr<- cleaned_mayo[,15:60739] %>% # Isolate expression data
                  scale %>% # Normalize
                  dist  %>% # Compute distance measure
                  hclust %>% # Cluster hierarchically
                  as.dendrogram()

# Arrange labels in order with tree
tree_labels<- cleaned_mayo[order.dendrogram(dend_expr),]

# Color branches by diagnosis
dend_expr<- assign_values_to_leaves_edgePar(dend_expr, value= tree_labels$Diagnosis, edgePar= "col") %>%
            as.ggdend()

# Plot dendrogram
ggplot(dend_expr, horiz= T, theme= NULL, labels= F) +
  ggtitle("Mayo Cohort: Hierarchical Clustering of Patients Colored by Diagnosis")

使用彩色标签(不是分支)和图例创建树状图:

# Create the dendrogram for visualization
dend_expr<- cleaned_mayo[,15:60739] %>% # Isolate expression data
                  scale %>% # Normalize
                  dist  %>% # Compute distance measure
                  hclust %>% # Cluster hierarchically
                  as.dendrogram()

tree_labels<- dendro_data(dend_expr, type = "rectangle")
tree_labels$labels<- merge(x= tree_labels$labels, y= cleaned_mayo, by.x= "label", by.y= "patient")

ggplot() +
  geom_segment(data=segment(tree_labels), aes(x=x, y=y, xend=xend, yend=yend)) +
  geom_text(data = label(tree_labels), aes(x=x, y=y, label=label, colour = Diagnosis, hjust=0), size=3) +
  #geom_point(data = label(tree_labels), aes(x=x, y=y), size=2, shape = 21) +
  coord_flip() +
  scale_y_reverse(expand=c(0.2, 0)) +
  scale_colour_brewer(palette = "Dark2") + 
  theme_dendro() +
  ggtitle("Mayo Cohort: Hierarchical Clustering of Patients Colored by Diagnosis")

各输出示例: 彩色树枝带有图例的彩色标签

如有任何帮助,我们将不胜感激。谢谢!

r data-visualization dendrogram dendextend ggdendro
2个回答
1
投票

以下是如何实现所需着色的示例:

library(tidyverse)
library(ggdendro)
library(dendextend)

一些数据:

matrix(rnorm(1000), ncol = 10) %>%
  scale %>% 
  dist  %>% 
  hclust %>% 
  as.dendrogram() -> dend_expr

tree_labels<- dendro_data(dend_expr, type = "rectangle")
tree_labels$labels <- cbind(tree_labels$labels, Diagnosis = as.factor(sample(1:2, 100, replace = T)))

剧情:

ggplot() +
  geom_segment(data = segment(tree_labels), aes(x=x, y=y, xend=xend, yend=yend))+
  geom_segment(data = tree_labels$segments %>%
                 filter(yend == 0) %>%
                 left_join(tree_labels$labels, by = "x"), aes(x=x, y=y.x, xend=xend, yend=yend, color = Diagnosis)) +
  geom_text(data = label(tree_labels), aes(x=x, y=y, label=label, colour = Diagnosis, hjust=0), size=3) +
  coord_flip() +
  scale_y_reverse(expand=c(0.2, 0)) +
  scale_colour_brewer(palette = "Dark2") + 
  theme_dendro() +
  ggtitle("Mayo Cohort: Hierarchical Clustering of Patients Colored by Diagnosis")

enter image description here

关键在于我所做的第二个 geom_segment 调用:

tree_labels$segments %>%
     filter(yend == 0) %>%
     left_join(tree_labels$labels, by = "x")

过滤所有叶子

yend == 0
并通过
tree_labels$labels
左连接
x


0
投票

添加另一个选项,ggalign包,具体来说,它可以将热图分成小平面组,并确保即使在分面之后树状图也能正确对齐。

ggstack(mat) + align_dendro(aes(color = branch), k = 3L) +
  scale_x_reverse(expand = expansion()) +
  scale_y_continuous(position = "right") +
  theme(axis.text.y = element_text())

enter image description here

mat <- matrix(rnorm(81), nrow = 9)
rownames(mat) <- paste0("row", seq_len(nrow(mat)))
colnames(mat) <- paste0("column", seq_len(ncol(mat)))
ggheatmap(mat) +
  scale_fill_viridis_c() +
  hmanno("t") +
  align_dendro(aes(color = branch), k = 3L) +
  labs(color = "top-branch") +
  ggalign(aes(y = value), data = rowSums) +
  geom_bar(stat = "identity", aes(fill = factor(.panel))) +
  scale_fill_brewer(name = NULL, palette = "Dark2") +
  hmanno("l") +
  ggalign(aes(x = value), data = rowSums, size = 0.5) +
  geom_bar(
    aes(y = .y, fill = factor(.y)),
    stat = "identity",
    orientation = "y"
  ) +
  scale_x_reverse() +
  align_dendro(aes(color = branch),
    size = unit(1, "null"),
    k = 4L
  ) +
  labs(color = "left-branch") +
  scale_x_reverse()

enter image description here

最新问题
© www.soinside.com 2019 - 2025. All rights reserved.