我的原始数据中有 4 个不同的变量(全部以前缀 raw_ 开头),后面是时间/金钱和学校/社交的组合。在我的真实数据中,我对变量的每个值都有更多选项,因此最终会变成相当长的代码块,必须迭代 4 次。
必须有一种简单的方法可以在 R 中执行相当于 Stata 中的 foreach 循环(我更常使用它来进行数据清理),以将 4 次迭代压缩为 1 次。
在 Stata 中,循环看起来像这样:
foreach suffix in hours_school hours_social money_school money_social {
gen olderch_`suffix' = .
replace olderch_`suffix' = 90 if raw_`suffix' == "90% or more 10% or less"
replace olderch_`suffix' = 10 if raw_`suffix' == "10% or less 90% or more"
}
这是我当前版本的 R 代码,我希望对其进行类似的压缩:
d <- d %>%
mutate(olderch_hours_school = case_when (
raw_hours_school == "90% or more 10% or less" ~ 90,
raw_hours_school == "10% or less 90% or more" ~ 10
))
d <- d %>%
mutate(olderch_hours_social = case_when (
raw_hours_social == "90% or more 10% or less" ~ 90,
raw_hours_social == "10% or less 90% or more" ~ 10
))
d <- d %>%
mutate(olderch_money_school = case_when (
raw_money_school == "90% or more 10% or less" ~ 90,
raw_money_school == "10% or less 90% or more" ~ 10
))
d <- d %>%
mutate(olderch_money_social = case_when (
raw_money_social == "90% or more 10% or less" ~ 90,
raw_money_social == "10% or less 90% or more" ~ 10
))
dplyr::across()
(docs) 允许您在数据框中的多个列上应用函数。您可以通过添加 case_when
来创建 lambda 函数并使用 ~
代表每一列来调整您的 .
语句。
您可以提供所有正在转换的列的向量,或使用选择助手,例如
everything()
。
玩具数据遵循示例。
library(dplyr)
glimpse(df)
#> Rows: 10
#> Columns: 4
#> $ hours_school <chr> "90% or more 10% or less", "90% or more 10% or less", "90…
#> $ hours_social <chr> "10% or less 90% or more", "10% or less 90% or more", "10…
#> $ money_school <chr> "90% or more 10% or less", "10% or less 90% or more", "90…
#> $ money_social <chr> "90% or more 10% or less", "10% or less 90% or more", "90…
df |>
mutate(across(
c(hours_school, hours_social, money_school, money_social),
~ case_when(. == "90% or more 10% or less" ~ 90,
. == "10% or less 90% or more" ~ 10)))
#> hours_school hours_social money_school money_social
#> 1 90 10 90 90
#> 2 90 10 10 10
#> 3 90 10 90 90
#> 4 10 90 90 10
#> 5 90 10 90 10
#> 6 10 90 90 90
#> 7 10 10 10 90
#> 8 10 90 10 90
#> 9 90 90 90 90
#> 10 90 90 10 10
df |>
mutate(across(
everything(),
~ case_when(. == "90% or more 10% or less" ~ 90,
. == "10% or less 90% or more" ~ 10)))
#> hours_school hours_social money_school money_social
#> 1 90 10 90 90
#> 2 90 10 10 10
#> 3 90 10 90 90
#> 4 10 90 90 10
#> 5 90 10 90 10
#> 6 10 90 90 90
#> 7 10 10 10 90
#> 8 10 90 10 90
#> 9 90 90 90 90
#> 10 90 90 10 10
set.seed(123)
df <- data.frame(
hours_school = sample(c("90% or more 10% or less",
"10% or less 90% or more"),
10, replace = TRUE),
hours_social = sample(c("90% or more 10% or less",
"10% or less 90% or more"),
10, replace = TRUE),
money_school = sample(c("90% or more 10% or less",
"10% or less 90% or more"),
10, replace = TRUE),
money_social = sample(c("90% or more 10% or less",
"10% or less 90% or more"),
10, replace = TRUE)
)