我的箱线图发生了一些奇怪的事情。从 stat_pvalue_manual() 和 ggboxplot() 显示的 pvalue 线正在脱离图表

问题描述 投票:0回答:1

我的箱线图发生了一些奇怪的事情。从 stat_pvalue_manual() 和 ggboxplot() 显示的 pvalue 线正在脱离图形。

我使用相同的代码但不同的数据创建了下面的箱线图,并且它显示正确

box plot displaying correctly

但是当我使用不同的数据集运行相同的代码时,显着性值线超出了图表。

这是我的代码:

#source data

t1c2_.5_all<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L), levels = c("T1", "C1", "C2", "C3"), class = "factor"), Year = c(2004, 
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2004, 2005, 2006, 
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 
2018, 2019, 2020, 2021, 2022, 2023), buff_dist = c(0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), 
    NDVI_Avg3 = c(NA, 6395.874774, 6567.268516, 6625.956351, 
    6396.39154166667, 6329.32849466667, 6327.09833166667, 6431.656085, 
    6376.82626833333, 6399.35978466667, 6457.27028166667, 6530.29056866667, 
    6504.67240266667, 6665.66358833333, 6590.31482566667, 6680.14639133333, 
    6509.14329666667, 6504.518513, 6448.14020766667, NA, NA, 
    4509.88554533333, 4563.733277, 4755.027259, 4346.68286633333, 
    4471.81173166667, 4241.25849033333, 4387.160754, 4318.91640866667, 
    4412.97042333333, 4442.207563, 4346.34542466667, 4386.22608033333, 
    4727.431841, 4709.10905166667, 4936.07561766667, 4694.84953633333, 
    4616.632718, 4293.052728, NA)), row.names = c(NA, -40L), class = c("tbl_df", 
"tbl", "data.frame"))

#pairwise testing 
t1c2_.5_all.pw <- t1c2_.5_all %>% pairwise_t_test(NDVI_Avg3 ~ Type, p.adjust.method = "bonferroni")
t1c2_.5_all.pw


#add the pvalues to the graph by plotting its position on the x axis 
t1c2_.5_all.pw <- t1c2_.5_all.pw %>% add_xy_position(x = "Type")
t1c2_.5_all.pw
####the x="" must be the same as the x="" in the box plot to work 

#the box plot
t1c2_.5_all.box <- ggboxplot(t1c2_.5_all, x = "Type", y = "NDVI_Avg3") +
  ggtitle(label = "PWP Watersheds (all.5yr) - All Years ")+
  stat_pvalue_manual(t1c2_.5_all.pw , label = "p.adj.signif", tip.length = 0, step.increase = 0.1) +
  labs(
    subtitle = get_test_label(t1c2_.5_all.pw, detailed = TRUE),
    caption = get_pwc_label(t1c2_.5_all.pw)
  ) 
#label = "p.adj" would provide the values
#hide.ns = TRUE would hide values that are not significant

t1c2_.5_all.box
ggsave("t1c2_.5km_allyears_boxplot.png", width=6, height=5)

但是当我运行这个时,我几乎有了我想要显示的内容。 boxplot not displaying correctly

我需要在 ggboxplot() 的 stat_pvalue_manual() 部分包含一个参数吗?

我过去也使用过这段代码来比较多个箱线图,它的作用就像一个魅力!只是在比较一些箱线图对时遇到困难

enter image description here

提前感谢您的帮助!

ggplot2 plot ggboxplot
1个回答
0
投票

我无法找出问题的原因 - 可能是

add_xy_position()
中的错误 - 但一个简单的解决方法是自己指定 xmax (即将其从 3 更改为 2):

library(tidyverse)
library(ggpubr)
library(rstatix)
#> 
#> Attaching package: 'rstatix'
#> The following object is masked from 'package:stats':
#> 
#>     filter

t1c2_.5_all<-structure(list(Type = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                                               1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 
                                               3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
                                               3L), levels = c("T1", "C1", "C2", "C3"), class = "factor"), Year = c(2004, 
                                                                                                                    2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                                                                                                                    2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2004, 2005, 2006, 
                                                                                                                    2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 
                                                                                                                    2018, 2019, 2020, 2021, 2022, 2023), buff_dist = c(0.5, 0.5, 
                                                                                                                                                                       0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
                                                                                                                                                                       0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 
                                                                                                                                                                       0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), 
                            NDVI_Avg3 = c(NA, 6395.874774, 6567.268516, 6625.956351, 
                                          6396.39154166667, 6329.32849466667, 6327.09833166667, 6431.656085, 
                                          6376.82626833333, 6399.35978466667, 6457.27028166667, 6530.29056866667, 
                                          6504.67240266667, 6665.66358833333, 6590.31482566667, 6680.14639133333, 
                                          6509.14329666667, 6504.518513, 6448.14020766667, NA, NA, 
                                          4509.88554533333, 4563.733277, 4755.027259, 4346.68286633333, 
                                          4471.81173166667, 4241.25849033333, 4387.160754, 4318.91640866667, 
                                          4412.97042333333, 4442.207563, 4346.34542466667, 4386.22608033333, 
                                          4727.431841, 4709.10905166667, 4936.07561766667, 4694.84953633333, 
                                          4616.632718, 4293.052728, NA)), row.names = c(NA, -40L), class = c("tbl_df", 
                                                                                                             "tbl", "data.frame"))

#pairwise testing 
t1c2_.5_all.pw <- t1c2_.5_all %>% pairwise_t_test(NDVI_Avg3 ~ Type, p.adjust.method = "bonferroni")
t1c2_.5_all.pw
#> # A tibble: 1 × 9
#>   .y.       group1 group2    n1    n2        p p.signif    p.adj p.adj.signif
#> * <chr>     <chr>  <chr>  <int> <int>    <dbl> <chr>       <dbl> <chr>       
#> 1 NDVI_Avg3 T1     C2        20    20 2.14e-29 ****     2.14e-29 ****

#add the pvalues to the graph by plotting its position on the x axis 
t1c2_.5_all.pw <- t1c2_.5_all.pw %>% add_xy_position(x = "Type")

# what is xmax?
t1c2_.5_all.pw$xmax
#> [1] 3

# change it to 2
t1c2_.5_all.pw$xmax <- 2

#the box plot
t1c2_.5_all.box <- ggboxplot(t1c2_.5_all, x = "Type", y = "NDVI_Avg3") +
  ggtitle(label = "PWP Watersheds (all.5yr) - All Years ")+
  stat_pvalue_manual(t1c2_.5_all.pw, label = "p.adj.signif", tip.length = 0, step.increase = 0.1) +
  labs(
    subtitle = get_test_label(t1c2_.5_all.pw, detailed = TRUE),
    caption = get_pwc_label(t1c2_.5_all.pw)
  )
#label = "p.adj" would provide the values
#hide.ns = TRUE would hide values that are not significant

t1c2_.5_all.box
#> Warning: Removed 4 rows containing non-finite outside the scale range
#> (`stat_boxplot()`).

ggsave("t1c2_.5km_allyears_boxplot.png", width=6, height=5)

创建于 2024 年 12 月 13 日,使用 reprex v2.1.0

这能解决你的问题吗?

© www.soinside.com 2019 - 2024. All rights reserved.