在 R 中绘制时间序列数据的累积均值的累积不确定性图

问题描述 投票:0回答:1

我偶然发现了一项艰巨的任务,即显示温室气体通量时间序列上具有累积不确定性的累积图。我的原始数据集由 30 分钟的流量数据组成,然后使用 cumsum() 函数将其聚合为每小时的数据。

我的代码如下所示:

### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data 
str(gp_NEE  )
    tibble [1,270 × 18] (S3: tbl_df/tbl/data.frame)
     $ DateTime: POSIXct[1:1270], format: "2024-06-11 00:00:00" "2024-06-11 00:30:00" ...
     $ daytime : num [1:1270] 0 0 0 0 0 0 0 0 0 0 ...
     $ NEE     : num [1:1270] 1.831 0.702 2.017 8.33 3.872 ...

gp_NEE_hour <- gp_NEE %>% 
        mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
        select(-c(DateTime)) %>%  # removing Date and Time columns: cannot be averaged
        group_by(hourlyDate) %>% 
        summarize(
                across(everything(),mean))

    ## make cumulative data 
    
    gp_NEE_hour<- gp_NEE_hour %>%
            mutate(
                cum_NEE = cumsum(replace_na(NEE, 0)))
    
    str(gp_NEE_hour)
            tibble [635 × 19] (S3: tbl_df/tbl/data.frame)
             $ hourlyDate: POSIXct[1:635], format: "2024-06-11 00:00:00" "2024-06-11 01:00:00" "2024-06-11 02:00:00" "2024-06-11 03:00:00" ...
             $ daytime   : num [1:635] 0 0 0 0 0 0 1 1 1 1 ...
             $ NEE       : num [1:635] 1.27 5.17 2.58 NA 1.62 ...
             $ cum_NEE   : num [1:635] 1.27 6.44 9.02 9.02 10.64 ...


    cumplot <- gp_NEE_hour %>%
                ggplot( aes(x=hourlyDate, y=cum_NEE)) +
                        geom_line( color="grey") +
                        geom_point(shape=21, color="black", fill="#69b3a2", size=1) +
                        theme_bw() +theme(legend.position="none", 
            text = element_text(size=15))+
        labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~g~m^{-2}~~jam^{-1}~")"))+xlab ( "Waktu")
    cumplot

结果图是这样的: My Cumulative Flux Curve

我可以在 dplyr 中使用

summarise()
编写代码来计算 stdev,但无法通过累积平均值传播它。我很难找到 R 包或 tidyverse 函数来开发这样的图(https://doi.org/10.1038/s41561-021-00785-2):

Cumulative NEE with cumulative flux uncertainty

提前谢谢您。

编辑。

我尝试改进编码,但不知何故,

geom_ribbon()
无法显示不确定性。

gp_NEE_hour_no_NA <-gp_NEE %>% 
                    mutate(NEE_Mgha = NEE * 0.24) %>% 
                    select(DateTime, NEE_Mgha) %>% 
                    mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
                    group_by(hourlyDate) %>% 
                    summarise(
                            meanH_NEE = mean(NEE_Mgha), 
                            sd_NEE = sd(NEE_Mgha), 
                            n = n(),
                            se_NEE = sd_NEE / sqrt(n),.groups = "drop") %>%
                    mutate(cum_NEE = cumsum(replace_na(meanH_NEE, 0)))%>% 
                    mutate(cum_se_NEE = sqrt(cumsum(se_NEE^2)))
                    
str(gp_NEE_hour_no_NA)
        tibble [635 × 7] (S3: tbl_df/tbl/data.frame)
         $ hourlyDate: POSIXct[1:635], format:  ...
         $ meanH_NEE : num [1:635] 0.304 1.242 0.619 NA 0.39 ...
         $ sd_NEE    : num [1:635] 0.1915 1.0714 0.4392 NA 0.0646 ...
         $ n         : int [1:635] 2 2 2 2 2 2 2 2 2 2 ...
         $ se_NEE    : num [1:635] 0.1354 0.7576 0.3106 NA 0.0457 ...
         $ cum_NEE   : num [1:635] 0.304 1.546 2.164 2.164 2.554 ...
         $ cum_se_NEE: num [1:635] 0.135 0.77 0.83 NA NA ...

plot_NEE2 <-  ggplot(data = gp_NEE_hour_no_NA, aes(x = hourlyDate, y = cum_NEE)) +                  
                geom_line() + 
                geom_ribbon(aes(ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), alpha = .5,
                            fill = "darkseagreen3", color = "transparent") +                    
                #geom_point(aes(y = mean),color="aquamarine4")+
                ylim(-1.5, 1.5) + theme(text = element_text(size=15))+
                labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~Mg~ha^{-1}~~hari^{-1}~")"))+xlab("Waktu") + 
                theme_bw()
plot_NEE2
r ggplot2 dplyr cumulative-sum timeserieschart
1个回答
0
投票

是的,经过多次修改,我意识到我没有足够的数据来开发累积平均值的错误。因此,我使用时间受限的滚动窗口来填补空白。之后,代码成功运行。

# gap-fill All variables using MDV

EFgp_NEE_mdv <- gp_NEE  %>%
  group_by(Time) %>%
  mutate(across(c(NEE, H, LE, Rg, Tair, PPFD, VPD, Ustar, rH, Tsoil, SWC), 
                ~ ifelse(is.na(.), 
                         mean(.[which(DateTime >= DateTime - 604800 & DateTime <= DateTime + 604800)], na.rm = TRUE), 
                        .))) %>%
  ungroup()
      
write_xlsx(EFgp_NEE_mdv, "D:/Licor7500/EFgp_NEE_mdv.xlsx") # test if the process run well. Inspect the result = OK !! 

# plot NEE over Time

### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data 

gp_NEE_hour <- EFgp_NEE_mdv %>% 
        mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
        select(-c(DateTime, Date, Time)) %>%  # removing Date and Time columns: cannot be averaged
        group_by(hourlyDate) %>% 
        summarize(
                across(everything(),mean)) %>%
        mutate(
                Date = format(hourlyDate, format = "%Y/%m/%d"), #summoning Date again from hourlyDate
                Time = format(hourlyDate, format = "%H:%M:%S"))%>% # summoning Time again from hourlyDate


      ungroup()

str(gp_NEE_hour)
        tibble [867 × 18] (S3: tbl_df/tbl/data.frame)
         $ hourlyDate: POSIXct[1:867], format: "2024-06-10 14:00:00" "2024-06-10 15:00:00" "2024-06-10 16:00:00" ...
         $ day       : num [1:867] 10 10 10 10 10 10 10 10 10 10 ...
         $ month     : num [1:867] 6 6 6 6 6 6 6 6 6 6 ...
         $ year      : num [1:867] 2024 2024 2024 2024 2024 ...
         $ daytime   : num [1:867] 1 1 1 1 0 0 0 0 0 0 ...
         $ NEE       : num [1:867] 0.636 1.902 1.506 1.101 0.804 ...
                 
         
plot1 <- ggplot(gp_NEE_hour, aes(Time, NEE,colour = Time)) +
    geom_jitter(alpha = 0.40)+ 
    geom_boxplot(col = "grey40", alpha = 0.40) +
    geom_smooth(aes(group = 1, col = "yellow"), method="lm", formula =  y ~ poly(x, 6))+
    scale_size_manual(values = 3, guide = FALSE)+
     ylim(-5, 7.5)+
    theme_bw()+
    theme(legend.position="none", 
        text = element_text(size=15), 
        axis.title.x=element_blank(), 
        axis.text.x = element_text(angle = 90))+
    labs(y=expression(CO[2]~Flux~Netto~~"("~g~m^{-2}~~jam^{-1}~")"))
plot1
    

## make cumulative data 

gp_NEE_hour_cum <-EFgp_NEE_mdv %>% 
                    mutate(NEE_Mgha = NEE * 0.01) %>% 
                    select(DateTime, NEE_Mgha) %>% 
                    mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
                    group_by(hourlyDate) %>% 
                    summarise(
                            meanH_NEE = mean(NEE_Mgha, na.rm = TRUE), 
                            sd_NEE = sd(NEE_Mgha, na.rm = TRUE), 
                            n = sum(!is.na(NEE_Mgha))
                                ) %>% 
                    mutate(se_NEE = sd_NEE / sqrt(n)) %>% 
                    mutate(
                            cum_NEE = cumsum(replace_na(meanH_NEE, 0)), 
                            cum_se_NEE = sqrt(cumsum(se_NEE^2))                         
    )
                    
str(gp_NEE_hour_cum)
        tibble [867 × 7] (S3: tbl_df/tbl/data.frame)
         $ hourlyDate: POSIXct[1:867], format:  ...
         $ meanH_NEE : num [1:867] 0.153 0.456 0.361 0.264 0.193 ...
         $ sd_NEE    : num [1:867] 0.4305 0.2814 0.3762 0.0306 0.1345 ...
         $ n         : int [1:867] 2 2 2 2 2 2 2 2 2 2 ...
         $ se_NEE    : num [1:867] 0.3044 0.199 0.266 0.0216 0.0951 ...
         $ cum_NEE   : num [1:867] 0.153 0.609 0.97 1.235 1.428 ...
         $ cum_se_NEE: num [1:867] 0.304 0.364 0.451 0.451 0.461 ...
         
         
plot_NEE2 <- ggplot(data = gp_NEE_hour_cum, aes(x = hourlyDate, y = cum_NEE)) + 
  geom_line(aes(group = 1), size = 1.5, color = "black", stroke = 2) + 
  geom_ribbon(aes(y = cum_NEE, ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), 
              alpha = 0.5, fill = "darkseagreen3") + 
  # geom_point(aes(y = mean), color = "aquamarine4") + 
  theme_bw() + 
  ylim(0,8)+
  theme(text = element_text(size = 15)) + 
  labs(y = expression(CO[2] ~ Flux ~ Netto ~~ " (" ~ Mg ~ ha^{-1} ~~ jam ^{-1} ~ ")"), 
       x = "")+ 
  scale_x_datetime(breaks = seq(min(gp_NEE_hour_cum$hourlyDate), max(gp_NEE_hour_cum$hourlyDate), by = "1 days"), 
                   labels = function(x) format(x, "%Y-%m-%d %H:%M"))+ 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10))

plot_NEE2

最后是漂亮的图表 Cumulative Fluxes

我把这个留给研究温室气体通量的新科学家。

© www.soinside.com 2019 - 2024. All rights reserved.