我已经使用ggplot2绘制了冲积图,但是我似乎无法弄清楚如何仅对变量“CTaa_alpha”中最常见的对“CAGGFNYQLIW”进行着色,该变量与变量“CTaa_beta”中的“CASSVAGPNTEAFF”配对,同时保持其他一切都是灰色的。
我的代码如下:
a<- structure(list(CTaa_alpha = c("CAGGFNYQLIW", "CVVNRDDKIIF", "CAVRGDSNYQLIW",
"CVVNTRSNDYKLSF", "CAVQAAANAGKSTF", "CVVLNTGGFKTIF", "CAYVNNNDMRF",
"CATDANTGFQKLVF", "CAVRAPDQTGANNLFF", "CALREYGNKLVF", "CATDRDDKIIF",
"CAYRGGSNYKLTF", "CAMRELTSNTGKLIF", "CALISYNTDKLIF", "CALTPYGNNRLAF",
"CAAVPNAGNMLTF", "CAWEYGNKLVF", "CVVSVDYGQNFVF", "CAFYGQNFVF",
"CAPGMETSYDKVIF", "CAVTRNSDGQKLLF", "CAGASGGGSYIPTF", "CAVRDTHNTDKLIF",
"CAVNIHSGYALNF", "CAGVDTNAGKSTF", "CAPRDSNYQLIW", "CVVNAPSGNTPLVF",
"CALSELPYSSASKIIF", "CALGDGGATNKLIF", "CAVALSGYALNF", "CLVGDVTAGNKLTF",
"CAGPFSGGYNKLIF", "CATAPNYGGATNKLIF", "CAGITGGGNKLTF", "CAVTGAAYNTDKLIF",
"CALPPQKLVF", "CAVGDGQNFVF", "CILRIYQGGSEKLVF", "CAMREITGNTGKLIF",
"CAVSSSSGSARQLTF", "CASRYNFNKFYF", "CATREAGNMLTF", "CAVRENQAGTALIF",
"CAVTSPGANNLFF", "CAVSTPTGANSKLTF", "CAVSKSARQLTF", "CAVLSNDYKLSF",
"CAVRDGDYKLSF", "CAARGVYGNKLVF", "CALSEAPYGGATNKLIF"), CTaa_beta = c("CASSVAGPNTEAFF",
"CASSVGNRGGTDTQYF", "CASSLRQGPSYEQYF", "CASKPGTTSNQPQHF", "CSVAGTGVYNEQFF",
"CSVVPGGQGGYEQYF", "CASSSGGLDEQYF", "CATSIGGPPYEQYF", "CASSAGLAGGYEQYF",
"CASSSPGTTNEKLFF", "CASSLLAGGNNEQFF", "CASSLLQGPSSPLHF", "CASSLGGSSYEQYF",
"CASSLRDGHYGYTF", "CASSLRDSHYEQYF", "CASSQWMYSPNGYTF", "CSASFGDGGEGETQYF",
"CASSEGHRGGTDTQYF", "CASSLSGSPAYGYTF", "CASSGTGTGASGNEQFF", "CAWSRPLGYTF",
"CASSLVGAGANVLTF", "CASSRQAEAFF", "CASSLLAGGNNEQFF", "CASSSHYRGGTDTQYF",
"CASSEVGGSMETQYF", "CASSTDISSYNEQFF", "CASGLVQQGGTEAFF", "CASSLLPGLAGAGNEQFF",
"CASTPAVRDGNYEQYF", "CASGPGLQQTYGYTF", "CASSPDRTGEANNEQFF", "CASSLAKAGTGGEKLFF",
"CASGGTGPYNEQFF", "CSVEDPSSGSYEQYF", "CASSQYRGTEAFF", "CASSPGSSGSETQYF",
"CASSYSEVTEAFF", "CSARAGGWGTDTQYF", "CSATAYRTGAYEQYF", "CASRPERGHTDTQYF",
"CASSFEGGGTEAFF", "CASSQYRGTEAFF", "CASSTQGQSYTEAFF", "CASSVGLYSTDTQYF",
"CASSQDPTDQPQHF", "CASSSTEKDTQYF", "CSAFTGNTEAFF", "CASSYTGRPEQYF",
"CASSPGQGLLSGELFF"), n = c(268L, 145L, 142L, 109L, 95L, 84L,
60L, 60L, 56L, 55L, 53L, 52L, 51L, 49L, 48L, 48L, 45L, 42L, 36L,
34L, 33L, 32L, 32L, 32L, 31L, 31L, 28L, 28L, 27L, 27L, 27L, 26L,
26L, 26L, 25L, 25L, 23L, 22L, 22L, 20L, 20L, 20L, 20L, 19L, 19L,
19L, 18L, 18L, 17L, 17L)), row.names = c(NA, -50L), class = c("tbl_df",
"tbl", "data.frame"))
ggplot(data = a,
aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)) +
geom_alluvium(aes(fill = "green")) +
geom_stratum() +
geom_text(stat = "stratum",
aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("CDR3_alpha", "CDR3_beta"),
expand = c(0.15, 0.05)) +
scale_fill_viridis_d() +
theme_classic() +theme(legend.position = "none")
上面的代码给了我以下情节:
正如你所看到的,它有点“混乱”,我希望有一个选项来突出显示特定的配对(无论是第 1 行是最频繁的配对,还是第 10 行是第 t0 个最频繁的配对)。欢迎任何见解!
要突出显示某些类别,您可以在
fill
aes 上映射条件,然后使用 scale_fill_manual
设置所需的颜色,例如突出显示您可以执行的前 3 个类别:
library(ggplot2)
library(ggalluvial)
# Highlight Top 3
.highlight <- a[order(a$n, decreasing = TRUE), ] |>
head(3) |>
subset(select = CTaa_alpha, drop = TRUE)
ggplot(
data = a,
aes(axis1 = CTaa_alpha, axis2 = CTaa_beta, y = n)
) +
geom_alluvium(aes(fill = CTaa_alpha %in% .highlight)) +
geom_stratum() +
geom_text(
stat = "stratum",
aes(label = after_stat(stratum))
) +
scale_x_discrete(
limits = c("CDR3_alpha", "CDR3_beta"),
expand = c(0.15, 0.05)
) +
scale_fill_manual(
values = c("grey65", "steelblue")
) +
theme_classic() +
theme(legend.position = "none")