在 ggplot2() 箱线图中添加具有相同 x 值的两组之间的显着性条

问题描述 投票:0回答:1

我使用内置的

ToothGrowth
数据集作为玩具数据来使用包
ggplot2
制作箱线图,并且还尝试添加显着性条形图。我使用包
ggpubr
让它工作,我在其中比较 x 值。下面是仅比较
dose
的误差线的屏幕截图,但我想计算每个
supp
值之间的显着性。例如,当仅查看
dose
= 0.5 时,
len
OJ
中的
VC
值在统计上是否显着不同。我有比较每个
OJ
中的
VC
dose
的 p 值,但不知道如何绘制它们

我已经包含了完全重现此数据的所有代码,现在我正在尝试将

pval_0.5
pval_1
pval_2
添加到该图中。我不知道如何指定
geom_signif()
来比较样本。

https://i.sstatic.net/oE7g79A4.png

下面是制作带有显着性条的原始图(非常手动格式化):


ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)

# Filter based on len
len_0.5 <- ToothGrowth |>
            dplyr::filter(dose == "0.5") |>
            dplyr::select(len)

len_1 <- ToothGrowth |>
            dplyr::filter(dose == "1") |>
            dplyr::select(len)

len_2 <- ToothGrowth |>
            dplyr::filter(dose == "2") |>
            dplyr::select(len)

# Get p-values
stat_0.5v1 <- t.test(len_0.5, len_1,
                     var = F)

stat_0.5v2 <- t.test(len_0.5, len_2,
                     var = F)

stat_1v2 <- t.test(len_0.5, len_2,
                     var = F)

pval_0.5v1 <- stat_0.5v1$p.value
pval_0.5v2 <- stat_0.5v2$p.value
pval_1v2 <- stat_1v2$p.value

手动设置ggplot2参数


t.test_compare <- list(c("0.5", "1"), 
                       c("0.5", "2"), 
                       c("1", "2"))

t.test_annot <- c(signif(pval_0.5v1, digits = 3), 
                  signif(pval_0.5v2, digits = 3), 
                  signif(pval_1v2, digits = 3))

t.test_ypos <- c(max(ToothGrowth$len)*1.05, 
                 max(ToothGrowth$len)*1.15, 
                 max(ToothGrowth$len)*1.25)

绘图


library(ggpubr)

p <- ggplot(data = ToothGrowth,
            mapping = aes(x = dose,
                          y = len,
                          fill = supp)) +
      geom_boxplot(position = position_dodge(1)) +
      theme_classic() +
      geom_signif(data = ToothGrowth, 
                  comparisons = t.test_compare,
                  map_signif_level = T,
                  annotations = t.test_annot,
                  y_position = t.test_ypos)
    
p

获取更多 p 值以比较

OJ
VC


len_0.5.oj <- ToothGrowth |>
            dplyr::filter(dose == "0.5", supp == "OJ") |>
            dplyr::select(len)

len_0.5.vc <- ToothGrowth |>
            dplyr::filter(dose == "0.5", supp == "VC") |>
            dplyr::select(len)

len_1.oj <- ToothGrowth |>
            dplyr::filter(dose == "1", supp == "OJ") |>
            dplyr::select(len)

len_1.vc <- ToothGrowth |>
            dplyr::filter(dose == "1", supp == "VC") |>
            dplyr::select(len)

len_2.oj <- ToothGrowth |>
            dplyr::filter(dose == "2", supp == "OJ") |>
            dplyr::select(len)

len_2.vc <- ToothGrowth |>
            dplyr::filter(dose == "2", supp == "VC") |>
            dplyr::select(len)

stat_0.5 <- t.test(len_0.5.oj, len_0.5.vc,
                   var = F)

stat_1 <- t.test(len_1.oj, len_1.vc,
                   var = F)

stat_2 <- t.test(len_2.oj, len_2.vc,
                   var = F)

pval_0.5 <- stat_0.5$p.value

pval_1 <- stat_1$p.value

pval_2 <- stat_2$p.value

r ggplot2 boxplot ggpubr
1个回答
0
投票

首先从统计学的角度来看。我不会计算一堆 2 个样本 t.tests,而是依赖方差分析模型来解释总体变异性。话虽如此,当然您可以按照自己喜欢的方式计算 p 值,只需调整下面代码中的 p 值即可。 library(dplyr) library(emmeans) library(ggplot2) library(ggsignif) ToothGrowth$dose <- as.factor(ToothGrowth$dose) ## lm model which accounts for `dose` and `supp` tg_mod <- lm(len ~ dose * supp, data = ToothGrowth) ## use emmeans to get the contrasts emm <- emmeans(tg_mod, ~ supp | dose) ## depending on your use case you want to adjust for multiplicity ## (remove the `adjust` argument) (con <- contrast(emm, method = "pairwise", adjust = "none") %>% as_tibble() %>% mutate(x_pos = as.integer(dose)) %>% inner_join(ToothGrowth %>% summarize(y_pos = max(len), .by = dose) %>% mutate(y_pos = y_pos + 5), "dose") %>% mutate(p.value = signif(p.value, digits = 3L))) # # A tibble: 3 × 9 # contrast dose estimate SE df t.ratio p.value x_pos y_pos # <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> # 1 OJ - VC 0.5 5.25 1.62 54 3.23 0.00209 1 26.5 # 2 OJ - VC 1 5.93 1.62 54 3.65 0.00059 2 32.3 # 3 OJ - VC 2 -0.0800 1.62 54 -0.0493 0.961 3 38.9

现在我们已经计算出 p 值,我们可以将它们添加到图中。但参数 
comparisons

不行,因为我们使用了躲避箱线图,所以我们需要交替使用

xmin
xmax
ggplot(data = ToothGrowth,
       mapping = aes(x = dose,
                     y = len,
                     fill = supp)) +
  geom_boxplot(position = position_dodge(1)) +
  theme_classic() +
  geom_signif(xmin = con %>% pull(x_pos) - .25,
              xmax = con %>% pull(x_pos) + .25,
              annotations = con %>% pull(p.value),
              y_position = con %>% pull(y_pos))

A boxplot with dose on the x-Axis and dodges boxplots (according to supp). A significance bracket is added on top of each doe group with the p-value as annotation

© www.soinside.com 2019 - 2024. All rights reserved.