在 ggplot2() 箱线图中添加具有相同 x 值的两组之间的显着性条

Question

我使用内置的

ToothGrowth

数据集作为玩具数据来使用包

ggplot2

制作箱线图，并且还尝试添加显着性条形图。我使用包

ggpubr

让它工作，我在其中比较 x 值。下面是仅比较

dose

的误差线的屏幕截图，但我想计算每个

supp

值之间的显着性。例如，当仅查看

dose

= 0.5 时，

len

和

OJ

中的

VC

值在统计上是否显着不同。我有比较每个

OJ

中的

VC

和

dose

的 p 值，但不知道如何绘制它们

我已经包含了完全重现此数据的所有代码，现在我正在尝试将

pval_0.5

、

pval_1

和

pval_2

添加到该图中。我不知道如何指定

geom_signif()

来比较样本。

下面是制作带有显着性条的原始图（非常手动格式化）：


ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)

# Filter based on len
len_0.5 <- ToothGrowth |>
            dplyr::filter(dose == "0.5") |>
            dplyr::select(len)

len_1 <- ToothGrowth |>
            dplyr::filter(dose == "1") |>
            dplyr::select(len)

len_2 <- ToothGrowth |>
            dplyr::filter(dose == "2") |>
            dplyr::select(len)

# Get p-values
stat_0.5v1 <- t.test(len_0.5, len_1,
                     var = F)

stat_0.5v2 <- t.test(len_0.5, len_2,
                     var = F)

stat_1v2 <- t.test(len_0.5, len_2,
                     var = F)

pval_0.5v1 <- stat_0.5v1$p.value
pval_0.5v2 <- stat_0.5v2$p.value
pval_1v2 <- stat_1v2$p.value

手动设置ggplot2参数


t.test_compare <- list(c("0.5", "1"), 
                       c("0.5", "2"), 
                       c("1", "2"))

t.test_annot <- c(signif(pval_0.5v1, digits = 3), 
                  signif(pval_0.5v2, digits = 3), 
                  signif(pval_1v2, digits = 3))

t.test_ypos <- c(max(ToothGrowth$len)*1.05, 
                 max(ToothGrowth$len)*1.15, 
                 max(ToothGrowth$len)*1.25)

绘图


library(ggpubr)

p <- ggplot(data = ToothGrowth,
            mapping = aes(x = dose,
                          y = len,
                          fill = supp)) +
      geom_boxplot(position = position_dodge(1)) +
      theme_classic() +
      geom_signif(data = ToothGrowth, 
                  comparisons = t.test_compare,
                  map_signif_level = T,
                  annotations = t.test_annot,
                  y_position = t.test_ypos)
    
p

获取更多 p 值以比较

OJ

和
VC


len_0.5.oj <- ToothGrowth |>
            dplyr::filter(dose == "0.5", supp == "OJ") |>
            dplyr::select(len)

len_0.5.vc <- ToothGrowth |>
            dplyr::filter(dose == "0.5", supp == "VC") |>
            dplyr::select(len)

len_1.oj <- ToothGrowth |>
            dplyr::filter(dose == "1", supp == "OJ") |>
            dplyr::select(len)

len_1.vc <- ToothGrowth |>
            dplyr::filter(dose == "1", supp == "VC") |>
            dplyr::select(len)

len_2.oj <- ToothGrowth |>
            dplyr::filter(dose == "2", supp == "OJ") |>
            dplyr::select(len)

len_2.vc <- ToothGrowth |>
            dplyr::filter(dose == "2", supp == "VC") |>
            dplyr::select(len)

stat_0.5 <- t.test(len_0.5.oj, len_0.5.vc,
                   var = F)

stat_1 <- t.test(len_1.oj, len_1.vc,
                   var = F)

stat_2 <- t.test(len_2.oj, len_2.vc,
                   var = F)

pval_0.5 <- stat_0.5$p.value

pval_1 <- stat_1$p.value

pval_2 <- stat_2$p.value

Answer 1

首先从统计学的角度来看。我不会计算一堆 2 个样本 t.tests，而是依赖方差分析模型来解释总体变异性。话虽如此，当然您可以按照自己喜欢的方式计算 p 值，只需调整下面代码中的 p 值即可。 library(dplyr) library(emmeans) library(ggplot2) library(ggsignif) ToothGrowth$dose <- as.factor(ToothGrowth$dose) ## lm model which accounts for `dose` and `supp` tg_mod <- lm(len ~ dose * supp, data = ToothGrowth) ## use emmeans to get the contrasts emm <- emmeans(tg_mod, ~ supp | dose) ## depending on your use case you want to adjust for multiplicity ## (remove the `adjust` argument) (con <- contrast(emm, method = "pairwise", adjust = "none") %>% as_tibble() %>% mutate(x_pos = as.integer(dose)) %>% inner_join(ToothGrowth %>% summarize(y_pos = max(len), .by = dose) %>% mutate(y_pos = y_pos + 5), "dose") %>% mutate(p.value = signif(p.value, digits = 3L))) # # A tibble: 3 × 9 # contrast dose estimate SE df t.ratio p.value x_pos y_pos # <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> # 1 OJ - VC 0.5 5.25 1.62 54 3.23 0.00209 1 26.5 # 2 OJ - VC 1 5.93 1.62 54 3.65 0.00059 2 32.3 # 3 OJ - VC 2 -0.0800 1.62 54 -0.0493 0.961 3 38.9

现在我们已经计算出 p 值，我们可以将它们添加到图中。但参数

comparisons

不行，因为我们使用了躲避箱线图，所以我们需要交替使用

xmin

和

xmax

：

ggplot(data = ToothGrowth,
       mapping = aes(x = dose,
                     y = len,
                     fill = supp)) +
  geom_boxplot(position = position_dodge(1)) +
  theme_classic() +
  geom_signif(xmin = con %>% pull(x_pos) - .25,
              xmax = con %>% pull(x_pos) + .25,
              annotations = con %>% pull(p.value),
              y_position = con %>% pull(y_pos))

在 ggplot2() 箱线图中添加具有相同 x 值的两组之间的显着性条

问题描述投票：0回答：1

1个回答

最新问题

在 ggplot2() 箱线图中添加具有相同 x 值的两组之间的显着性条

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1