我的箱线图发生了一些奇怪的事情。从 stat_pvalue_manual() 和 ggboxplot() 显示的 pvalue 线正在脱离图形。
我使用相同的代码但不同的数据创建了下面的箱线图,并且它显示正确
但是当我使用不同的数据集运行相同的代码时,显着性值线超出了图表。
这是我的代码:
#source data
t1c2_.5_all<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), levels = c("T1", "C1", "C2", "C3"), class = "factor"), Year = c(2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 2019, 2020, 2021, 2022, 2023), buff_dist = c(0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
NDVI_Avg3 = c(NA, 6395.874774, 6567.268516, 6625.956351,
6396.39154166667, 6329.32849466667, 6327.09833166667, 6431.656085,
6376.82626833333, 6399.35978466667, 6457.27028166667, 6530.29056866667,
6504.67240266667, 6665.66358833333, 6590.31482566667, 6680.14639133333,
6509.14329666667, 6504.518513, 6448.14020766667, NA, NA,
4509.88554533333, 4563.733277, 4755.027259, 4346.68286633333,
4471.81173166667, 4241.25849033333, 4387.160754, 4318.91640866667,
4412.97042333333, 4442.207563, 4346.34542466667, 4386.22608033333,
4727.431841, 4709.10905166667, 4936.07561766667, 4694.84953633333,
4616.632718, 4293.052728, NA)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
#pairwise testing
t1c2_.5_all.pw <- t1c2_.5_all %>% pairwise_t_test(NDVI_Avg3 ~ Type, p.adjust.method = "bonferroni")
t1c2_.5_all.pw
#add the pvalues to the graph by plotting its position on the x axis
t1c2_.5_all.pw <- t1c2_.5_all.pw %>% add_xy_position(x = "Type")
t1c2_.5_all.pw
####the x="" must be the same as the x="" in the box plot to work
#the box plot
t1c2_.5_all.box <- ggboxplot(t1c2_.5_all, x = "Type", y = "NDVI_Avg3") +
ggtitle(label = "PWP Watersheds (all.5yr) - All Years ")+
stat_pvalue_manual(t1c2_.5_all.pw , label = "p.adj.signif", tip.length = 0, step.increase = 0.1) +
labs(
subtitle = get_test_label(t1c2_.5_all.pw, detailed = TRUE),
caption = get_pwc_label(t1c2_.5_all.pw)
)
#label = "p.adj" would provide the values
#hide.ns = TRUE would hide values that are not significant
t1c2_.5_all.box
ggsave("t1c2_.5km_allyears_boxplot.png", width=6, height=5)
我需要在 ggboxplot() 的 stat_pvalue_manual() 部分包含一个参数吗?
我过去也使用过这段代码来比较多个箱线图,它的作用就像一个魅力!只是在比较一些箱线图对时遇到困难
提前感谢您的帮助!
我无法找出问题的原因 - 可能是
add_xy_position()
中的错误 - 但一个简单的解决方法是自己指定 xmax (即将其从 3 更改为 2):
library(tidyverse)
library(ggpubr)
library(rstatix)
#>
#> Attaching package: 'rstatix'
#> The following object is masked from 'package:stats':
#>
#> filter
t1c2_.5_all<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L), levels = c("T1", "C1", "C2", "C3"), class = "factor"), Year = c(2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015,
2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
2018, 2019, 2020, 2021, 2022, 2023), buff_dist = c(0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5),
NDVI_Avg3 = c(NA, 6395.874774, 6567.268516, 6625.956351,
6396.39154166667, 6329.32849466667, 6327.09833166667, 6431.656085,
6376.82626833333, 6399.35978466667, 6457.27028166667, 6530.29056866667,
6504.67240266667, 6665.66358833333, 6590.31482566667, 6680.14639133333,
6509.14329666667, 6504.518513, 6448.14020766667, NA, NA,
4509.88554533333, 4563.733277, 4755.027259, 4346.68286633333,
4471.81173166667, 4241.25849033333, 4387.160754, 4318.91640866667,
4412.97042333333, 4442.207563, 4346.34542466667, 4386.22608033333,
4727.431841, 4709.10905166667, 4936.07561766667, 4694.84953633333,
4616.632718, 4293.052728, NA)), row.names = c(NA, -40L), class = c("tbl_df",
"tbl", "data.frame"))
#pairwise testing
t1c2_.5_all.pw <- t1c2_.5_all %>% pairwise_t_test(NDVI_Avg3 ~ Type, p.adjust.method = "bonferroni")
t1c2_.5_all.pw
#> # A tibble: 1 × 9
#> .y. group1 group2 n1 n2 p p.signif p.adj p.adj.signif
#> * <chr> <chr> <chr> <int> <int> <dbl> <chr> <dbl> <chr>
#> 1 NDVI_Avg3 T1 C2 20 20 2.14e-29 **** 2.14e-29 ****
#add the pvalues to the graph by plotting its position on the x axis
t1c2_.5_all.pw <- t1c2_.5_all.pw %>% add_xy_position(x = "Type")
# what is xmax?
t1c2_.5_all.pw$xmax
#> [1] 3
# change it to 2
t1c2_.5_all.pw$xmax <- 2
#the box plot
t1c2_.5_all.box <- ggboxplot(t1c2_.5_all, x = "Type", y = "NDVI_Avg3") +
ggtitle(label = "PWP Watersheds (all.5yr) - All Years ")+
stat_pvalue_manual(t1c2_.5_all.pw, label = "p.adj.signif", tip.length = 0, step.increase = 0.1) +
labs(
subtitle = get_test_label(t1c2_.5_all.pw, detailed = TRUE),
caption = get_pwc_label(t1c2_.5_all.pw)
)
#label = "p.adj" would provide the values
#hide.ns = TRUE would hide values that are not significant
t1c2_.5_all.box
#> Warning: Removed 4 rows containing non-finite outside the scale range
#> (`stat_boxplot()`).
ggsave("t1c2_.5km_allyears_boxplot.png", width=6, height=5)
创建于 2024 年 12 月 13 日,使用 reprex v2.1.0
这能解决你的问题吗?