我在 R 中有一个使用
ggsankey
的桑基图,我想要一个填充流的渐变颜色。我按照 this 示例创建了一个示例图。
library(ggsankey)
library(tidyverse)
s1 <- sample(x= c("Single",
"Married",
"Married with kids",
"Married Without kids"),
size = 100,
replace=TRUE)
s2 <- sample(x= c("Male",
"Female"),
size = 100,
replace=TRUE)
s3 <- sample(x= c("Happy",
"Not Happy"),
size = 100,
replace=TRUE)
d <- data.frame(cbind(s1,s2,s3))
names(d) <- c('Relationship',
'Gender',
'Outcome')
df <- d%>%
make_long(Relationship,
Gender,
Outcome)
然后我创建一个自定义颜色向量。
new_colors <- c(RColorBrewer::brewer.pal(name = "Set3", n = 8))
new_colors <- setNames(new_colors, levels(as.factor(df$node)))
我策划。
pl <- ggplot(df, aes(x = x,
next_x = next_x,
node = node,
next_node = next_node,
fill = factor(node))) +
geom_sankey(flow.alpha = 0.5,
node.color = "black",
show.legend = TRUE) +
scale_fill_manual(values = new_colors)
例如,当流量从“单一”变为“男性”时,我希望填充物的颜色从粉色变为紫色,依此类推。我正在尝试使用
scale_fill_gradient()
,但我不知道如何有条件地指定 low =
和 high =
(即基于 node
)。
ggplot 中没有可用的单个多边形的渐变填充,尽管此功能将在下一版本(v 3.5.0)中提供给扩展开发人员。
scale_fill_gradient
函数不执行渐变填充,而是将对象的填充颜色映射到分级比例。使用数百个多边形在当前 ggplot 版本中是可以给出渐变填充的外观,但这需要相当多的工作才能实现。
例如,此函数将采用数据帧
d
格式的数据帧,并仅使用geom_rect
创建具有渐变填充的桑基:
library(tidyverse)
make_sankey <- function(
dat,
colors = RColorBrewer::brewer.pal(name = "Set3",
n = length(unique(unlist(dat))))
) {
sig <- function(x1, x2, y1, y2) {
y1 + (y2 - y1) * pnorm(seq(x1, x2, len = 500), (x1 + x2)/2, (x2 - x1)/6)
}
df_c <- data.frame(var = levels(factor(unlist(dat))), color = colors)
df_n <- do.call("rbind", lapply(seq_along(dat), function(i) {
gap <- nrow(dat) / 10
table(dat[i]) |>
as.data.frame() |>
cbind(xpos = i) |>
setNames(c("Var", "Freq", "xpos")) |>
within({
ymin <- c(0, head(cumsum(Freq + gap), -1))
ymax <- cumsum(Freq + c(0, rep(gap, length(Freq) - 1)))
}) |>
within({
ymin <- ymin - 0.5 * max(ymax)
ymax <- ymax - 0.5 * max(ymax)
xmin <- xpos - 0.05
xmax <- xpos + 0.05
xpos <- NULL
})
}))
df_f <- do.call("rbind", lapply(head(seq_along(dat), -1), function(i) {
table(dat[[i]], dat[[i + 1]]) |> as.data.frame()
})) |>
left_join(df_c, by = c(Var1 = "var")) |>
left_join(df_c, by = c(Var2 = "var"),
suffix = c("_left", "_right")) |>
left_join(select(df_n, -Freq, -ymax), by = c(Var1 = "Var")) |>
left_join(select(df_n, -Freq, -ymax), by = c(Var2 = "Var"),
suffix = c("_left", "_right")) |>
mutate(ymin_left = ymin_left + c(0, head(cumsum(Freq), -1)),
ymax_left = ymin_left + Freq, .by = Var1) |>
mutate(ymin_right = ymin_right + c(0, head(cumsum(Freq), -1)),
ymax_right = ymin_right + Freq, .by = Var2) |>
rowwise() |>
reframe(color = colorRampPalette(c(color_left, color_right))(500),
xmin = seq(xmax_left, xmin_right, length = 500) - 0.001,
xmax = xmin + 0.002,
ymin = sig(xmax_left, xmin_right, ymin_left, ymin_right),
ymax = sig(xmax_left, xmin_right, ymax_left, ymax_right))
df_n <- df_n |> left_join(df_c, by = c(Var = "var"))
ggplot(df_f, aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)) +
geom_rect(aes(fill = color), color = NA, alpha = 0.5) +
scale_fill_identity() +
ggnewscale::new_scale_fill() +
geom_rect(data = df_n, aes(fill = color), color = "black") +
scale_fill_identity(NULL, guide = guide_legend(),
labels = ~ df_c$var[match(.x, df_c$color)]) +
scale_x_continuous(breaks = seq_along(dat), labels = names(dat),
minor_breaks = NULL)
}
使用非常简单:
make_sankey(d) + theme_minimal(base_size = 20)