我最初创建了一个 for 循环来计算许多变量,这些变量有时依赖于先前的迭代,例如[i] 和 [i-1]。
为了提高较大数据集的性能,我尝试使用矢量化并生成了以下代码,速度更快。我希望使用此代码的数据框的长度约为 200,000 个观察值。无论如何,我可以进一步提高这段代码的性能吗?希望以后能跑不同的场景,这样也会增加计算时间。
感谢您的帮助!
# Load required packages
library(dplyr)
library(purrr)
# Set seed for reproducibility
set.seed(42)
# Generate random data for storm
n <- 10000 # number of rows
storm <- data.frame(
date_time = seq.POSIXt(from = as.POSIXct("2021-01-01"), by = "day", length.out = n),
temp = runif(n, min = -10, max = 35),
Rain = runif(n, min = 0, max = 50),
PET = runif(n, min = 0, max = 10),
Qin_m3 = runif(n, min = 0, max = 100)
)
# Placeholder functions
TSA_depth.fn <- function(volume, TSA_dimensions) {
# Example behavior: return volume divided by some constant
return(volume / TSA_dimensions)
}
soil_infiltration.fn <- function(volume, MRC_df, MRC_name, pipe_base_vol) {
# Example behavior: return volume times some fraction
return(volume * 0.1)
}
outlet_pipe.fn <- function(diameter, waterDepth, pipe_base, Cd, maxTSA_height) {
# Example behavior: return some function of the inputs
return(diameter * waterDepth * Cd)
}
# Constants
maxTSA_area <- 1000
TSA_dimensions <- 50
pipe_diameter <- 0.5
pipe_base_m <- 0.1
Cd <- 0.6
maxTSA_height <- 2.5
maxTSA_volume <- 2000
MRC_df <- data.frame() # Example placeholder
MRC_name <- "example"
pipe_base_vol <- 0.1
# Initial data preparation
TSA_model <- storm %>%
select(date_time, temp, Rain, PET, Qin_m3) %>%
mutate(Qin_m3 = replace(Qin_m3, 1, 0))
# Initialize new columns with 0 and precompute PET_m3
TSA_model <- TSA_model %>%
mutate(S = 0, dS = 0, depth = 0, PET_m3 = round((PET / 1000) * maxTSA_area, digits = 3),
soil_m3 = 0, pipe_m3 = 0, overflow_m3 = 0, Qout = 0)
# Function to calculate the values for the rows
calculate_values <- function(df) {
n <- nrow(df)
for (i in 2:n) {
df$dS[i] <- df$Qin_m3[i] - df$Qout[i - 1]
df$S[i] <- df$S[i - 1] + df$dS[i]
df$depth[i] <- TSA_depth.fn(volume = df$S[i], TSA_dimensions = TSA_dimensions)
soil_infiltration <- soil_infiltration.fn(volume = df$S[i], MRC_df = MRC_df, MRC_name = MRC_name, pipe_base_vol = pipe_base_vol)
df$soil_m3[i] <- max(soil_infiltration - df$PET_m3[i], 0)
df$soil_m3[i] <- ifelse(df$S[i] - df$soil_m3[i] < 0, df$S[i], df$soil_m3[i])
pipe_outflow <- outlet_pipe.fn(diameter = pipe_diameter, waterDepth = df$depth[i], pipe_base = pipe_base_m, Cd = Cd, maxTSA_height = maxTSA_height)
df$pipe_m3[i] <- max(pipe_outflow, 0)
df$overflow_m3[i] <- ifelse(df$S[i] > maxTSA_volume,
max(df$S[i] - maxTSA_volume - df$pipe_m3[i] - df$soil_m3[i] - df$PET_m3[i], 0),
0)
df$Qout[i] <- df$PET_m3[i] + df$soil_m3[i] + df$pipe_m3[i] + df$overflow_m3[i]
df$Qout[i] <- ifelse(df$S[i] - df$Qout[i] < 0, df$S[i], df$Qout[i])
}
return(df)
}
# Calculate the new values
TSA_model <- calculate_values(TSA_model)
# Display the updated TSA_model
str(TSA_model)
summary(TSA_model)
library(dplyr)
TSA_model |>
mutate(
dS = Qin_m3 - lag(Qout, default = 0L),
S = lag(S) + dS,
depth = TSA_depth.fn(S, TSA_dimensions),
soil_infiltration = soil_infiltration.fn(S, MRC_df = MRC_df, MRC_name = MRC_name, pipe_base_vol = pipe_base_vol),
soil_m3 = max(soil_infiltration - PET_m3, 0),
soil_m3 = ifelse(S - soil_m3 < 0, S, soil_m3),
pipe_outflow = outlet_pipe.fn(pipe_diameter, waterDepth = depth, pipe_base = pipe_base_m, Cd = Cd, maxTSA_height = maxTSA_height),
pipe_m3 = max(pipe_outflow, 0),
overflow_m3 = ifelse(S[i] > maxTSA_volume,
max(S[i] - maxTSA_volume - pipe_m3 - soil_m3 - PET_m3[i], 0),
0),
Qout = PET_m3 + soil_m3 + pipe_m3 + overflow_m3,
Qout = ifelse(S - Qout < 0, S, Qout)
)