按组直方图和密度图,整个组在“ggplot2”中缺少数据

问题描述 投票:0回答:1

我正在尝试使用

ggplot2
绘制几个具有密度、平均值和计数的分组直方图,如下所示。

library(ggplot2)
library(dplyr)

data(mtcars)
mtcars$gear <- as.factor(mtcars$gear)

mtcars_summ <- summarise(mtcars, .by = gear,
                         count = n(),
                         mean = mean(qsec, na.rm = TRUE))


ggplot() +
  geom_histogram(data = mtcars[, setdiff(colnames(mtcars), "gear")],
                 mapping = aes(x = qsec),
                 alpha = 0.2, bins = 10) +
  geom_histogram(data = mtcars,
                 mapping = aes(x = qsec, fill = gear, colour = gear),
                 alpha = 0.5, bins = 10) +
  geom_density(data = mtcars,
               mapping = aes(x = qsec, fill = gear, colour = gear,
                             y = after_stat(count)),
               alpha = 0.01) +
  geom_vline(data = mtcars_summ,
             aes(xintercept = mean, colour = gear),
             linetype = "dashed") +
  geom_text(data = mtcars_summ,
            aes(vjust = 1, hjust = 1.5,
                colour = gear, label = paste("n =", count)),
                x = Inf, y = Inf) +
  facet_wrap(~gear)

enter image description here

但是对于我的一些数据集,整个组都缺少数据。在这种情况下,情节就会变得混乱。

mtcars[mtcars$gear == 5, ]$qsec <- NA

mtcars_summ <- summarise(mtcars, .by = gear,
                         count = sum(!is.na(qsec)),
                         mean = mean(qsec, na.rm = TRUE))


ggplot() +
  geom_histogram(data = mtcars[, setdiff(colnames(mtcars), "gear")],
                 mapping = aes(x = qsec),
                 alpha = 0.2, bins = 10) +
  geom_histogram(data = mtcars,
                 mapping = aes(x = qsec, fill = gear, colour = gear),
                 alpha = 0.5, bins = 10) +
  geom_density(data = mtcars,
               mapping = aes(x = qsec, fill = gear, colour = gear,
                             y = after_stat(count)),
               alpha = 0.01) +
  geom_vline(data = mtcars_summ,
             aes(xintercept = mean, colour = gear),
             linetype = "dashed") +
  geom_text(data = mtcars_summ,
            aes(vjust = 1, hjust = 1.5,
                colour = gear, label = paste("n =", count)),
                x = Inf, y = Inf) +
  facet_wrap(~gear)


geom_histogram
完全忽略了缺少数据的组来修复填充和颜色比例。

如何使颜色和组顺序与那些没有丢失数据的图一致?

enter image description here

r ggplot2 visualization histogram missing-data
1个回答
0
投票

为了防止缺失的类别从填充比例中删除,请添加

+ scale_fill_discrete(drop = FALSE)
。此外,由于
ggplot2 >= 3.5.0
,我们必须添加
show.legend=TRUE
(在
geom_histogram
中)以获得缺失类别的图例键:

library(ggplot2)

packageVersion("ggplot2")

ggplot() +
  geom_histogram(
    data = mtcars[, setdiff(colnames(mtcars), "gear")],
    mapping = aes(x = qsec),
    alpha = 0.2, bins = 10
  ) +
  geom_histogram(
    data = mtcars,
    mapping = aes(x = qsec, fill = gear, colour = gear),
    alpha = 0.5, bins = 10,
    show.legend = TRUE
  ) +
  geom_density(
    data = mtcars,
    mapping = aes(
      x = qsec, fill = gear, colour = gear,
      y = after_stat(count)
    ),
    alpha = 0.01
  ) +
  geom_vline(
    data = mtcars_summ,
    aes(xintercept = mean, colour = gear),
    linetype = "dashed"
  ) +
  geom_text(
    data = mtcars_summ,
    aes(
      vjust = 1, hjust = 1.5,
      colour = gear, label = paste("n =", count)
    ),
    x = Inf, y = Inf
  ) +
  scale_fill_discrete(drop = FALSE) +
  facet_wrap(~gear)

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.