忽略ggplot2中的箱小提琴图中的异常值

问题描述 投票:0回答:1

我正在尝试在 ggplot2 中绘制箱小提琴图,但我似乎找不到一种方法来忽略

geom_violin
中的异常值,而
geom_boxplot
中的异常值由
outlier.shape = NA
处理。结果,小提琴的尾部一直延伸到 y 轴的顶部。

这是我的数据:

> dput(data)
structure(list(Group = c("A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B", 
"B", "B", "B", "B", "B", "B", "B", "B", "B", "B"), Type = c("1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2"), Value = c(1245.2261, 2886.96, 3572.6615, 
2011.1111, 3321.2025, 229.5533, 14.1449, 135.291, 54.4526, 36.0926, 
74.5434, 86.335, 131.4279, 105.4935, 14.5906, 1.503, 2.7716, 
42.381, 88.9701, 869.6742, 316.855, 32.9683, 6.4267, 52.2946, 
164.4073, 54.2387, 37.5134, 71.2792, 145.958, 114.6187, 36.133, 
1.8108, 67.9746, 39.8386, 382.5043, 40.1728, 37.1252, 288.6866, 
25.085, 21.8553, 15.0067, 143.8127, 16.8865, 26.8421, 8.8349, 
188.1872, 42.2323, 64.2163, 56.7453, 85.4888, 29.6905, 6.1148, 
43.0328, 158.0811, 90.4613, 217.033, 111.5344, 271.5655, 195.7022, 
79.7093, 6.0458, 116.6274, 43.6644, 72.4189, 89.9063, 37.6572, 
294.5133, 46.8855, 16.7959, 50.4155, 39.6882, 18.7457, 12.728, 
40.2756, 129.6219, 190.0905, 796.7611, 30.1724, 14.8736, 551.2666, 
18.2315, 57.9076, 129.7094, 158.1109, 256.6553, 79.6724, 75.2056, 
7.2661, 18.7643, 79.4748, 445.5713, 9.9553, 106.6388, 50.0596, 
56.4002, 157.1143, 9.805, 117.2691, 8.9047, 3.6258, 387.132, 
56.8996, 40.7247, 1117.4439, 79.4742, 224.0688, 134.8485, 8.4794, 
23.1996, 65.2439, 389.3144, 294.4159, 671.4736, 541.8969, 64.3243, 
25.0634, 7.727, 20.8132, 149.3634, 160.7447, 114.1869, 38.4615, 
28.502, 34.0532, 15.0038, 1028.626, 166.3813, 24.7788, 306.6516, 
204.0348, 18.1818, 77.4041, 24.1017, 96.4706, 59.4937, 23.2078, 
3.192, 37.8065, 40.8055, 8.3577, 7.4273, 66.426, 1548.8338, 3.6242, 
92.264, 42.8195, 282.1101, 104.0848, 42.5784, 9.9258, 63.8066, 
99.6852, 26.5864, 270.322, 121.4097, 32.6258, 287.2582, 7.4627, 
851.5289, 156.0563, 324.1189, 101.5936, 5.618, 114.3788, 54.6875, 
96.5594, 446.1059, 95.1883, 30.3678, 48.2655, 61.4182, 66.5381, 
4.0973, 8.1744, 2.7192, 0.3697, 0.3681, 0.7488, 0, 7.9272, 1.1391, 
1.4375, 0.7535, 0.8256, 1.0323, 0.9053, 2.7822, 0.6899, 3.037, 
2.423, 0.7045, 6.1298, 1.7498, 10.5565, 0.684, 2.1433, 1.5334, 
1.7043, 1.3783, 0.6146, 8.9179, 1.3879, 4.2004, 2.0747, 0.3508, 
4.4362, 0.7214, 1.2232, 4.1245, 17.8295, 240.18, 61.0013, 0.813, 
69.9786, 0.4346, 1.624, 30.4569, 4.4143, 5.3119, 0.4459, 0, 1.1484, 
3.7614, 2.863, 1452.5581, 3.7736, 1.7705, 10.6081, 2743.5433, 
6.019, 0.4851, 2.4719, 7.5529, 0, 6.9739, 1.5783, 0, 1.3115, 
2.7701, 2.6135, 0, 0.9915, 4.0413, 2.3496, 1.796, 0.8745, 5.6391, 
0.2803, 3202.3684, 19.5453, 17.5439, 1.831, 4.1848, 0.7547, 0, 
0.5253, 0, 38.07, 0.6656, 5.6184, 0, 1.6858, 0.4801, 0.6676, 
3.0412)), row.names = c(NA, -265L), class = c("tbl_df", "tbl", 
"data.frame"))

和代码:

ggplot(data = data,
   aes(x = Group,
       y = Value,
       fill = Type)) +
  geom_violin(width = 0.5,
              scale = "width",
              color = "black",
              show.legend = FALSE) +
  geom_boxplot(position = position_dodge(width = 0.5),
               width = 0.1,
               color = "black",
               lwd = 0.5,
               outlier.shape = NA,
               show.legend = TRUE) +
  scale_fill_manual(name = "Type",
                    breaks = c("1", "2"),
                    values = c("1" = "red",
                               "2" = "forestgreen")) +
  stat_compare_means(aes(group = Type),
                     method = "wilcox.test",
                     label = "p.signif",
                     vjust = 0.5,
                     size = 10) +
  scale_y_continuous(breaks = pretty(data$Value, n = 8)) +
  xlab("Group") +
  ylab("Value") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = "black"),
        panel.background = element_blank(),
        axis.ticks = element_line(color = "black"),
        axis.text.x = element_text(size = 12,
                                   color = "black",
                                   face = "bold",
                                   vjust = 0.5),
        axis.text.y = element_text(size = 12,
                                   color = "black",
                                   face = "bold"),
        axis.title = element_text(size = 15,
                                  face = "bold"),
        legend.title = element_text(size = 15,
                                    face = "bold")) +
  guides(fill = guide_legend(title = "Type",
                             override.aes = list(size = 10)))

我尝试了this,但它对我不起作用。我希望将小提琴的尾部限制在箱线图的两端(最小值/最大值)。

r ggplot2 boxplot outliers violin-plot
1个回答
0
投票

这是一个选项。使用原始

Value
列作为箱线图,并为小提琴创建一个新列,并将所有异常值设置为 NA。请记住,您始终可以更改数据以适合您的用例!

library(dplyr)
library(ggplot2)
library(ggpubr)
data <- structure(...)

# helper function
replace_outliers <- function(x) {
    Q1 <- quantile(x, 0.25)
    Q3 <- quantile(x, 0.75)
    IQR <- Q3 - Q1
    x[(x < Q1 - 1.5 * IQR) | (x > Q3 + 1.5 * IQR)] <- NA
    x
}

data %>%
    group_by(Group, Type) %>%
    mutate(Value_NA = replace_outliers(Value)) %>% # replace outliers with NA
    ggplot(aes(x = Group,
        fill = Type)) +
    geom_violin(aes(y = Value_NA),
        width = 0.5,
        scale = "width",
        color = "black",
        show.legend = FALSE) +
    geom_boxplot(aes(y = Value),
        position = position_dodge(width = 0.5),
        width = 0.1,
        color = "black",
        lwd = 0.5,
        outlier.shape = NA,
        show.legend = TRUE) +
    scale_fill_manual(name = "Type",
        breaks = c("1", "2"),
        values = c("1" = "red",
            "2" = "forestgreen")) +
    stat_compare_means(aes(y = Value, group = Type),
        method = "wilcox.test",
        label = "p.signif",
        vjust = 0.5,
        size = 10) +
    scale_y_continuous(breaks = pretty(data$Value, n = 8)) +
    xlab("Group") +
    ylab("Value") +
    theme_bw() +
    theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_rect(color = "black"),
        panel.background = element_blank(),
        axis.ticks = element_line(color = "black"),
        axis.text.x = element_text(size = 12,
            color = "black",
            face = "bold",
            vjust = 0.5),
        axis.text.y = element_text(size = 12,
            color = "black",
            face = "bold"),
        axis.title = element_text(size = 15,
            face = "bold"),
        legend.title = element_text(size = 15,
            face = "bold")) +
    guides(fill = guide_legend(title = "Type",
        override.aes = list(size = 10)))

创建于 2024-05-14,使用 reprex v2.1.0

© www.soinside.com 2019 - 2024. All rights reserved.