提高 R 中迭代的性能

问题描述 投票:0回答:1

我最初创建了一个 for 循环来计算许多变量,这些变量有时依赖于先前的迭代,例如[i] 和 [i-1]。

为了提高较大数据集的性能,我尝试使用矢量化并生成了以下代码,速度更快。我希望使用此代码的数据框的长度约为 200,000 个观察值。无论如何,我可以进一步提高这段代码的性能吗?希望以后能跑不同的场景,这样也会增加计算时间。

感谢您的帮助!

# Load required packages
library(dplyr)
library(purrr)

# Set seed for reproducibility
set.seed(42)

# Generate random data for storm
n <- 10000  # number of rows
storm <- data.frame(
  date_time = seq.POSIXt(from = as.POSIXct("2021-01-01"), by = "day", length.out = n),
  temp = runif(n, min = -10, max = 35),
  Rain = runif(n, min = 0, max = 50),
  PET = runif(n, min = 0, max = 10),
  Qin_m3 = runif(n, min = 0, max = 100)
)

# Placeholder functions
TSA_depth.fn <- function(volume, TSA_dimensions) {
  # Example behavior: return volume divided by some constant
  return(volume / TSA_dimensions)
}

soil_infiltration.fn <- function(volume, MRC_df, MRC_name, pipe_base_vol) {
  # Example behavior: return volume times some fraction
  return(volume * 0.1)
}

outlet_pipe.fn <- function(diameter, waterDepth, pipe_base, Cd, maxTSA_height) {
  # Example behavior: return some function of the inputs
  return(diameter * waterDepth * Cd)
}

# Constants
maxTSA_area <- 1000
TSA_dimensions <- 50
pipe_diameter <- 0.5
pipe_base_m <- 0.1
Cd <- 0.6
maxTSA_height <- 2.5
maxTSA_volume <- 2000
MRC_df <- data.frame()  # Example placeholder
MRC_name <- "example"
pipe_base_vol <- 0.1

# Initial data preparation
TSA_model <- storm %>%
  select(date_time, temp, Rain, PET, Qin_m3) %>%
  mutate(Qin_m3 = replace(Qin_m3, 1, 0))

# Initialize new columns with 0 and precompute PET_m3
TSA_model <- TSA_model %>%
  mutate(S = 0, dS = 0, depth = 0, PET_m3 = round((PET / 1000) * maxTSA_area, digits = 3), 
         soil_m3 = 0, pipe_m3 = 0, overflow_m3 = 0, Qout = 0)

# Function to calculate the values for the rows
calculate_values <- function(df) {
  n <- nrow(df)
  for (i in 2:n) {
    df$dS[i] <- df$Qin_m3[i] - df$Qout[i - 1]
    df$S[i] <- df$S[i - 1] + df$dS[i]
    df$depth[i] <- TSA_depth.fn(volume = df$S[i], TSA_dimensions = TSA_dimensions)
    
    soil_infiltration <- soil_infiltration.fn(volume = df$S[i], MRC_df = MRC_df, MRC_name = MRC_name, pipe_base_vol = pipe_base_vol)
    df$soil_m3[i] <- max(soil_infiltration - df$PET_m3[i], 0)
    df$soil_m3[i] <- ifelse(df$S[i] - df$soil_m3[i] < 0, df$S[i], df$soil_m3[i])
    
    pipe_outflow <- outlet_pipe.fn(diameter = pipe_diameter, waterDepth = df$depth[i], pipe_base = pipe_base_m, Cd = Cd, maxTSA_height = maxTSA_height)
    df$pipe_m3[i] <- max(pipe_outflow, 0)
    
    df$overflow_m3[i] <- ifelse(df$S[i] > maxTSA_volume,
                                max(df$S[i] - maxTSA_volume - df$pipe_m3[i] - df$soil_m3[i] - df$PET_m3[i], 0),
                                0)
    
    df$Qout[i] <- df$PET_m3[i] + df$soil_m3[i] + df$pipe_m3[i] + df$overflow_m3[i]
    df$Qout[i] <- ifelse(df$S[i] - df$Qout[i] < 0, df$S[i], df$Qout[i])
  }
  return(df)
}

# Calculate the new values
TSA_model <- calculate_values(TSA_model)

# Display the updated TSA_model
str(TSA_model)
summary(TSA_model)
r performance loops iteration vectorization
1个回答
0
投票
library(dplyr)

TSA_model |>
  mutate(
    dS = Qin_m3 - lag(Qout, default = 0L),
    S = lag(S) + dS,
    depth = TSA_depth.fn(S, TSA_dimensions),
    
    soil_infiltration = soil_infiltration.fn(S, MRC_df = MRC_df, MRC_name = MRC_name, pipe_base_vol = pipe_base_vol),
    soil_m3 = max(soil_infiltration - PET_m3, 0),
    soil_m3 = ifelse(S - soil_m3 < 0, S, soil_m3),
    
    pipe_outflow = outlet_pipe.fn(pipe_diameter, waterDepth = depth, pipe_base = pipe_base_m, Cd = Cd, maxTSA_height = maxTSA_height),
    pipe_m3 = max(pipe_outflow, 0),
    
    overflow_m3 = ifelse(S[i] > maxTSA_volume,
                         max(S[i] - maxTSA_volume - pipe_m3 - soil_m3 - PET_m3[i], 0),
                         0),
    Qout = PET_m3 + soil_m3 + pipe_m3 + overflow_m3,
    Qout = ifelse(S - Qout < 0, S, Qout)
  )
© www.soinside.com 2019 - 2024. All rights reserved.