在 tidyverse 中,我经常发现我正在编写很长的 mutate 调用链,
... |> mutate(...) |> mutate(...) |> mutate(...) |> mutate(...) |> ...
有更紧凑的写法吗?
示例(请向下滚动):
library(tidyverse)
REPEATS = 100
SAMPLE_SIZE = 617
N = REPEATS * SAMPLE_SIZE
BASELINE_SECURE_P = 0.6
LOG_BASELINE_SECURE_ODDS = log(BASELINE_SECURE_P / (1 - BASELINE_SECURE_P))
DAYCARE_LOG_OR_PER_HOUR = log(2.0)/3561
WEEKS_PER_MONTH = 52 / 12
CARE_TYPES <- c("Mother", "Father", "Grandparent", "In-Home", "Child-Care Home", "Daycare")
CARE_TYPE_P <- c(.24, .15, .15, .15, .15, .36)
ACD <- c("A", "C", "D")
ACD_FREQ <- c(55, 197, 187) # Frequencies from 2001 Table 3
ACD_P <- ACD_FREQ/sum(ACD_FREQ)
df <- data.frame(
sample_no = rep(1:REPEATS, each=SAMPLE_SIZE),
care_type = as.factor(sample(CARE_TYPES, N, prob = CARE_TYPE_P, replace = TRUE)),
starting_age = runif(N, 0, 36)
) |> mutate(
nonmaternal_hours_per_week = ifelse(care_type == "Mother", 0, pmax(0, rnorm(N, 30, 15))),
) |> mutate(
daycare_hours_per_week = ifelse(care_type == "Daycare", nonmaternal_hours_per_week, 0)
) |> mutate(
nonmaternal_total_hours = nonmaternal_hours_per_week * WEEKS_PER_MONTH * (36 - starting_age),
daycare_total_hours = daycare_hours_per_week * WEEKS_PER_MONTH * (36 - starting_age)
) |> mutate(
secure_log_or = LOG_BASELINE_SECURE_ODDS - DAYCARE_LOG_OR_PER_HOUR * daycare_total_hours
) |> mutate(
secure_p = exp(secure_log_or) / (1 + exp(secure_log_or))
) |> mutate(
is_secure = rbinom(N, 1, secure_p),
# Choose one of A, C, D attachment at random
acd_random = sample(ACD, N, prob = ACD_P, replace = TRUE)
) |> mutate(
ssp_abcd = as.factor(ifelse(is_secure, 'B', acd_random))
)
mutate
(和其他 {dplyr} 动词)允许在单个调用中发出以逗号分隔的操作序列(请参阅官方示例),例如:
some_dataframe |>
mutate(var_1 = ...,
var_2 = ...,
...
)
甚至更方便地允许在表达式内部引用上游操作:
some_dataframe |>
mutate(var_1 = ...,
var_2 = var_1 * 42,
...
)