我偶然发现了一项艰巨的任务,即显示温室气体通量时间序列上具有累积不确定性的累积图。我的原始数据集由 30 分钟的流量数据组成,然后使用 cumsum() 函数将其聚合为每小时的数据。
我的代码如下所示:
### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data
str(gp_NEE )
tibble [1,270 × 18] (S3: tbl_df/tbl/data.frame)
$ DateTime: POSIXct[1:1270], format: "2024-06-11 00:00:00" "2024-06-11 00:30:00" ...
$ daytime : num [1:1270] 0 0 0 0 0 0 0 0 0 0 ...
$ NEE : num [1:1270] 1.831 0.702 2.017 8.33 3.872 ...
gp_NEE_hour <- gp_NEE %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
select(-c(DateTime)) %>% # removing Date and Time columns: cannot be averaged
group_by(hourlyDate) %>%
summarize(
across(everything(),mean))
## make cumulative data
gp_NEE_hour<- gp_NEE_hour %>%
mutate(
cum_NEE = cumsum(replace_na(NEE, 0)))
str(gp_NEE_hour)
tibble [635 × 19] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:635], format: "2024-06-11 00:00:00" "2024-06-11 01:00:00" "2024-06-11 02:00:00" "2024-06-11 03:00:00" ...
$ daytime : num [1:635] 0 0 0 0 0 0 1 1 1 1 ...
$ NEE : num [1:635] 1.27 5.17 2.58 NA 1.62 ...
$ cum_NEE : num [1:635] 1.27 6.44 9.02 9.02 10.64 ...
cumplot <- gp_NEE_hour %>%
ggplot( aes(x=hourlyDate, y=cum_NEE)) +
geom_line( color="grey") +
geom_point(shape=21, color="black", fill="#69b3a2", size=1) +
theme_bw() +theme(legend.position="none",
text = element_text(size=15))+
labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~g~m^{-2}~~jam^{-1}~")"))+xlab ( "Waktu")
cumplot
我可以在 dplyr 中使用
summarise()
编写代码来计算 stdev,但无法通过累积平均值传播它。我很难找到 R 包或 tidyverse 函数来开发这样的图(https://doi.org/10.1038/s41561-021-00785-2):
提前谢谢您。
编辑。
我尝试改进编码,但不知何故,
geom_ribbon()
无法显示不确定性。
gp_NEE_hour_no_NA <-gp_NEE %>%
mutate(NEE_Mgha = NEE * 0.24) %>%
select(DateTime, NEE_Mgha) %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
group_by(hourlyDate) %>%
summarise(
meanH_NEE = mean(NEE_Mgha),
sd_NEE = sd(NEE_Mgha),
n = n(),
se_NEE = sd_NEE / sqrt(n),.groups = "drop") %>%
mutate(cum_NEE = cumsum(replace_na(meanH_NEE, 0)))%>%
mutate(cum_se_NEE = sqrt(cumsum(se_NEE^2)))
str(gp_NEE_hour_no_NA)
tibble [635 × 7] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:635], format: ...
$ meanH_NEE : num [1:635] 0.304 1.242 0.619 NA 0.39 ...
$ sd_NEE : num [1:635] 0.1915 1.0714 0.4392 NA 0.0646 ...
$ n : int [1:635] 2 2 2 2 2 2 2 2 2 2 ...
$ se_NEE : num [1:635] 0.1354 0.7576 0.3106 NA 0.0457 ...
$ cum_NEE : num [1:635] 0.304 1.546 2.164 2.164 2.554 ...
$ cum_se_NEE: num [1:635] 0.135 0.77 0.83 NA NA ...
plot_NEE2 <- ggplot(data = gp_NEE_hour_no_NA, aes(x = hourlyDate, y = cum_NEE)) +
geom_line() +
geom_ribbon(aes(ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), alpha = .5,
fill = "darkseagreen3", color = "transparent") +
#geom_point(aes(y = mean),color="aquamarine4")+
ylim(-1.5, 1.5) + theme(text = element_text(size=15))+
labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~Mg~ha^{-1}~~hari^{-1}~")"))+xlab("Waktu") +
theme_bw()
plot_NEE2
是的,经过多次修改,我意识到我没有足够的数据来开发累积平均值的错误。因此,我使用时间受限的滚动窗口来填补空白。之后,代码成功运行。
# gap-fill All variables using MDV
EFgp_NEE_mdv <- gp_NEE %>%
group_by(Time) %>%
mutate(across(c(NEE, H, LE, Rg, Tair, PPFD, VPD, Ustar, rH, Tsoil, SWC),
~ ifelse(is.na(.),
mean(.[which(DateTime >= DateTime - 604800 & DateTime <= DateTime + 604800)], na.rm = TRUE),
.))) %>%
ungroup()
write_xlsx(EFgp_NEE_mdv, "D:/Licor7500/EFgp_NEE_mdv.xlsx") # test if the process run well. Inspect the result = OK !!
# plot NEE over Time
### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data
gp_NEE_hour <- EFgp_NEE_mdv %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
select(-c(DateTime, Date, Time)) %>% # removing Date and Time columns: cannot be averaged
group_by(hourlyDate) %>%
summarize(
across(everything(),mean)) %>%
mutate(
Date = format(hourlyDate, format = "%Y/%m/%d"), #summoning Date again from hourlyDate
Time = format(hourlyDate, format = "%H:%M:%S"))%>% # summoning Time again from hourlyDate
ungroup()
str(gp_NEE_hour)
tibble [867 × 18] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:867], format: "2024-06-10 14:00:00" "2024-06-10 15:00:00" "2024-06-10 16:00:00" ...
$ day : num [1:867] 10 10 10 10 10 10 10 10 10 10 ...
$ month : num [1:867] 6 6 6 6 6 6 6 6 6 6 ...
$ year : num [1:867] 2024 2024 2024 2024 2024 ...
$ daytime : num [1:867] 1 1 1 1 0 0 0 0 0 0 ...
$ NEE : num [1:867] 0.636 1.902 1.506 1.101 0.804 ...
plot1 <- ggplot(gp_NEE_hour, aes(Time, NEE,colour = Time)) +
geom_jitter(alpha = 0.40)+
geom_boxplot(col = "grey40", alpha = 0.40) +
geom_smooth(aes(group = 1, col = "yellow"), method="lm", formula = y ~ poly(x, 6))+
scale_size_manual(values = 3, guide = FALSE)+
ylim(-5, 7.5)+
theme_bw()+
theme(legend.position="none",
text = element_text(size=15),
axis.title.x=element_blank(),
axis.text.x = element_text(angle = 90))+
labs(y=expression(CO[2]~Flux~Netto~~"("~g~m^{-2}~~jam^{-1}~")"))
plot1
## make cumulative data
gp_NEE_hour_cum <-EFgp_NEE_mdv %>%
mutate(NEE_Mgha = NEE * 0.01) %>%
select(DateTime, NEE_Mgha) %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
group_by(hourlyDate) %>%
summarise(
meanH_NEE = mean(NEE_Mgha, na.rm = TRUE),
sd_NEE = sd(NEE_Mgha, na.rm = TRUE),
n = sum(!is.na(NEE_Mgha))
) %>%
mutate(se_NEE = sd_NEE / sqrt(n)) %>%
mutate(
cum_NEE = cumsum(replace_na(meanH_NEE, 0)),
cum_se_NEE = sqrt(cumsum(se_NEE^2))
)
str(gp_NEE_hour_cum)
tibble [867 × 7] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:867], format: ...
$ meanH_NEE : num [1:867] 0.153 0.456 0.361 0.264 0.193 ...
$ sd_NEE : num [1:867] 0.4305 0.2814 0.3762 0.0306 0.1345 ...
$ n : int [1:867] 2 2 2 2 2 2 2 2 2 2 ...
$ se_NEE : num [1:867] 0.3044 0.199 0.266 0.0216 0.0951 ...
$ cum_NEE : num [1:867] 0.153 0.609 0.97 1.235 1.428 ...
$ cum_se_NEE: num [1:867] 0.304 0.364 0.451 0.451 0.461 ...
plot_NEE2 <- ggplot(data = gp_NEE_hour_cum, aes(x = hourlyDate, y = cum_NEE)) +
geom_line(aes(group = 1), size = 1.5, color = "black", stroke = 2) +
geom_ribbon(aes(y = cum_NEE, ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE),
alpha = 0.5, fill = "darkseagreen3") +
# geom_point(aes(y = mean), color = "aquamarine4") +
theme_bw() +
ylim(0,8)+
theme(text = element_text(size = 15)) +
labs(y = expression(CO[2] ~ Flux ~ Netto ~~ " (" ~ Mg ~ ha^{-1} ~~ jam ^{-1} ~ ")"),
x = "")+
scale_x_datetime(breaks = seq(min(gp_NEE_hour_cum$hourlyDate), max(gp_NEE_hour_cum$hourlyDate), by = "1 days"),
labels = function(x) format(x, "%Y-%m-%d %H:%M"))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10))
plot_NEE2
我把这个留给研究温室气体通量的新科学家。