我使用内置的
ToothGrowth
数据集作为玩具数据来使用包 ggplot2
制作箱线图,并且还尝试添加显着性条形图。我使用包 ggpubr
让它工作,我在其中比较 x 值。下面是仅比较 dose
的误差线的屏幕截图,但我想计算每个 supp
值之间的显着性。例如,当仅查看 dose
= 0.5 时,len
和 OJ
中的 VC
值在统计上是否显着不同。我有比较每个 OJ
中的 VC
和 dose
的 p 值,但不知道如何绘制它们
我已经包含了完全重现此数据的所有代码,现在我正在尝试将
pval_0.5
、pval_1
和 pval_2
添加到该图中。我不知道如何指定 geom_signif()
来比较样本。
下面是制作带有显着性条的原始图(非常手动格式化):
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
head(ToothGrowth)
# Filter based on len
len_0.5 <- ToothGrowth |>
dplyr::filter(dose == "0.5") |>
dplyr::select(len)
len_1 <- ToothGrowth |>
dplyr::filter(dose == "1") |>
dplyr::select(len)
len_2 <- ToothGrowth |>
dplyr::filter(dose == "2") |>
dplyr::select(len)
# Get p-values
stat_0.5v1 <- t.test(len_0.5, len_1,
var = F)
stat_0.5v2 <- t.test(len_0.5, len_2,
var = F)
stat_1v2 <- t.test(len_0.5, len_2,
var = F)
pval_0.5v1 <- stat_0.5v1$p.value
pval_0.5v2 <- stat_0.5v2$p.value
pval_1v2 <- stat_1v2$p.value
手动设置ggplot2参数
t.test_compare <- list(c("0.5", "1"),
c("0.5", "2"),
c("1", "2"))
t.test_annot <- c(signif(pval_0.5v1, digits = 3),
signif(pval_0.5v2, digits = 3),
signif(pval_1v2, digits = 3))
t.test_ypos <- c(max(ToothGrowth$len)*1.05,
max(ToothGrowth$len)*1.15,
max(ToothGrowth$len)*1.25)
绘图
library(ggpubr)
p <- ggplot(data = ToothGrowth,
mapping = aes(x = dose,
y = len,
fill = supp)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
geom_signif(data = ToothGrowth,
comparisons = t.test_compare,
map_signif_level = T,
annotations = t.test_annot,
y_position = t.test_ypos)
p
获取更多 p 值以比较
OJ
和 VC
len_0.5.oj <- ToothGrowth |>
dplyr::filter(dose == "0.5", supp == "OJ") |>
dplyr::select(len)
len_0.5.vc <- ToothGrowth |>
dplyr::filter(dose == "0.5", supp == "VC") |>
dplyr::select(len)
len_1.oj <- ToothGrowth |>
dplyr::filter(dose == "1", supp == "OJ") |>
dplyr::select(len)
len_1.vc <- ToothGrowth |>
dplyr::filter(dose == "1", supp == "VC") |>
dplyr::select(len)
len_2.oj <- ToothGrowth |>
dplyr::filter(dose == "2", supp == "OJ") |>
dplyr::select(len)
len_2.vc <- ToothGrowth |>
dplyr::filter(dose == "2", supp == "VC") |>
dplyr::select(len)
stat_0.5 <- t.test(len_0.5.oj, len_0.5.vc,
var = F)
stat_1 <- t.test(len_1.oj, len_1.vc,
var = F)
stat_2 <- t.test(len_2.oj, len_2.vc,
var = F)
pval_0.5 <- stat_0.5$p.value
pval_1 <- stat_1$p.value
pval_2 <- stat_2$p.value
首先从统计学的角度来看。我不会计算一堆 2 个样本 t.tests,而是依赖方差分析模型来解释总体变异性。话虽如此,当然您可以按照自己喜欢的方式计算 p 值,只需调整下面代码中的 p 值即可。
library(dplyr)
library(emmeans)
library(ggplot2)
library(ggsignif)
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
## lm model which accounts for `dose` and `supp`
tg_mod <- lm(len ~ dose * supp, data = ToothGrowth)
## use emmeans to get the contrasts
emm <- emmeans(tg_mod, ~ supp | dose)
## depending on your use case you want to adjust for multiplicity
## (remove the `adjust` argument)
(con <- contrast(emm, method = "pairwise", adjust = "none") %>%
as_tibble() %>%
mutate(x_pos = as.integer(dose)) %>%
inner_join(ToothGrowth %>%
summarize(y_pos = max(len), .by = dose) %>%
mutate(y_pos = y_pos + 5),
"dose") %>%
mutate(p.value = signif(p.value, digits = 3L)))
# # A tibble: 3 × 9
# contrast dose estimate SE df t.ratio p.value x_pos y_pos
# <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl>
# 1 OJ - VC 0.5 5.25 1.62 54 3.23 0.00209 1 26.5
# 2 OJ - VC 1 5.93 1.62 54 3.65 0.00059 2 32.3
# 3 OJ - VC 2 -0.0800 1.62 54 -0.0493 0.961 3 38.9
现在我们已经计算出 p 值,我们可以将它们添加到图中。但参数
comparisons
不行,因为我们使用了躲避箱线图,所以我们需要交替使用
xmin
和 xmax
:ggplot(data = ToothGrowth,
mapping = aes(x = dose,
y = len,
fill = supp)) +
geom_boxplot(position = position_dodge(1)) +
theme_classic() +
geom_signif(xmin = con %>% pull(x_pos) - .25,
xmax = con %>% pull(x_pos) + .25,
annotations = con %>% pull(p.value),
y_position = con %>% pull(y_pos))