我正在尝试使用
ggplot2
绘制几个具有密度、平均值和计数的分组直方图,如下所示。
library(ggplot2)
library(dplyr)
data(mtcars)
mtcars$gear <- as.factor(mtcars$gear)
mtcars_summ <- summarise(mtcars, .by = gear,
count = n(),
mean = mean(qsec, na.rm = TRUE))
ggplot() +
geom_histogram(data = mtcars[, setdiff(colnames(mtcars), "gear")],
mapping = aes(x = qsec),
alpha = 0.2, bins = 10) +
geom_histogram(data = mtcars,
mapping = aes(x = qsec, fill = gear, colour = gear),
alpha = 0.5, bins = 10) +
geom_density(data = mtcars,
mapping = aes(x = qsec, fill = gear, colour = gear,
y = after_stat(count)),
alpha = 0.01) +
geom_vline(data = mtcars_summ,
aes(xintercept = mean, colour = gear),
linetype = "dashed") +
geom_text(data = mtcars_summ,
aes(vjust = 1, hjust = 1.5,
colour = gear, label = paste("n =", count)),
x = Inf, y = Inf) +
facet_wrap(~gear)
但是对于我的一些数据集,整个组都缺少数据。在这种情况下,情节就会变得混乱。
mtcars[mtcars$gear == 5, ]$qsec <- NA
mtcars_summ <- summarise(mtcars, .by = gear,
count = sum(!is.na(qsec)),
mean = mean(qsec, na.rm = TRUE))
ggplot() +
geom_histogram(data = mtcars[, setdiff(colnames(mtcars), "gear")],
mapping = aes(x = qsec),
alpha = 0.2, bins = 10) +
geom_histogram(data = mtcars,
mapping = aes(x = qsec, fill = gear, colour = gear),
alpha = 0.5, bins = 10) +
geom_density(data = mtcars,
mapping = aes(x = qsec, fill = gear, colour = gear,
y = after_stat(count)),
alpha = 0.01) +
geom_vline(data = mtcars_summ,
aes(xintercept = mean, colour = gear),
linetype = "dashed") +
geom_text(data = mtcars_summ,
aes(vjust = 1, hjust = 1.5,
colour = gear, label = paste("n =", count)),
x = Inf, y = Inf) +
facet_wrap(~gear)
geom_histogram
完全忽略了缺少数据的组来修复填充和颜色比例。
如何使颜色和组顺序与那些没有丢失数据的图一致?
为了防止缺失的类别从填充比例中删除,请添加
+ scale_fill_discrete(drop = FALSE)
。此外,由于 ggplot2 >= 3.5.0
,我们必须添加 show.legend=TRUE
(在 geom_histogram
中)以获得缺失类别的图例键:
library(ggplot2)
packageVersion("ggplot2")
ggplot() +
geom_histogram(
data = mtcars[, setdiff(colnames(mtcars), "gear")],
mapping = aes(x = qsec),
alpha = 0.2, bins = 10
) +
geom_histogram(
data = mtcars,
mapping = aes(x = qsec, fill = gear, colour = gear),
alpha = 0.5, bins = 10,
show.legend = TRUE
) +
geom_density(
data = mtcars,
mapping = aes(
x = qsec, fill = gear, colour = gear,
y = after_stat(count)
),
alpha = 0.01
) +
geom_vline(
data = mtcars_summ,
aes(xintercept = mean, colour = gear),
linetype = "dashed"
) +
geom_text(
data = mtcars_summ,
aes(
vjust = 1, hjust = 1.5,
colour = gear, label = paste("n =", count)
),
x = Inf, y = Inf
) +
scale_fill_discrete(drop = FALSE) +
facet_wrap(~gear)