使用 ggplot2 的冲积图 - 突出显示某些配对

问题描述 投票:0回答:1

我已经使用ggplot2绘制了冲积图,但是我似乎无法弄清楚如何仅对变量“CTaa_alpha”中最常见的对“CAGGFNYQLIW”进行着色,该变量与变量“CTaa_beta”中的“CASSVAGPNTEAFF”配对,同时保持其他一切都是灰色的。

我的代码如下:

a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW", 
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF", 
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF", 
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF", 
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF", 
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF", 
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF", 
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF", 
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF", 
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF", 
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF", 
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF", 
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF", 
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF", 
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF", 
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF", 
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF", 
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF", 
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF", 
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF", 
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF", 
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF", 
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF", 
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF", 
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF", 
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L, 
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L, 
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L, 
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df", 
"tbl", "data.frame"))


ggplot(data = a,
       aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
  geom_alluvium(aes(fill = "green")) +
  geom_stratum() +
  geom_text(stat = "stratum",
            aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
                   expand = c(0.15, 0.05)) +
  scale_fill_viridis_d() + 
  theme_classic() +theme(legend.position = "none")

上面的代码给了我以下情节:

enter image description here

正如你所看到的,它有点“混乱”,我希望有一个选项来突出显示特定的配对(无论是第 1 行是最频繁的配对,还是第 10 行是第 t0 个最频繁的配对)。欢迎任何见解!

r ggplot2 ggalluvial
1个回答
0
投票

要突出显示某些类别,您可以在

fill
aes 上映射条件,然后使用
scale_fill_manual
设置所需的颜色,例如突出显示您可以执行的前 3 个类别:

library(ggplot2)
library(ggalluvial)

# Highlight Top 3
.highlight <- a[order(a$n, decreasing = TRUE), ] |>
  head(3) |>
  subset(select = CTaa_alpha, drop = TRUE)

ggplot(
  data = a,
  aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
) +
  geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
  geom_stratum() +
  geom_text(
    stat = "stratum",
    aes(label = after_stat(stratum))
  ) +
  scale_x_discrete(
    limits = c("CDR3_alpha", "CDR3_beta"),
    expand = c(0.15, 0.05)
  ) +
  scale_fill_manual(
    values = c("grey65", "steelblue")
  ) +
  theme_classic() +
  theme(legend.position = "none")

© www.soinside.com 2019 - 2024. All rights reserved.