我有一个与此类似的数据集:
dataset <- structure(
list(
Participant.Id = 1:5,
x1 = c(10L, 20L, 30L, 40L, 50L),
x2 = c(15L, 25L, 35L, 45L, 55L),
x3 = c(20L, 25L, NA, 45L, NA),
x4 = c(25L, 30L, NA, 50L, NA),
x5 = c(NA, 35L, NA, 55L, NA),
x6 = c(NA, 35L, NA, NA, NA),
y1 = c(10L, 20L, 30L, 40L, 50L),
y2 = c(15L, 25L, 35L, 45L, 55L),
y3 = c(20L, 25L, NA, 45L, NA),
y4 = c(25L, 30L, NA, 50L, NA),
y5 = c(NA, 35L, NA, 55L, NA),
y6 = c(NA, 35L, NA, NA, NA),
z1 = c(10L, 20L, 30L, 40L, 50L),
z2 = c(15L, 25L, 35L, 45L, 55L),
z3 = c(20L, 25L, NA, 45L, NA),
z4 = c(25L, 30L, NA, 50L, NA),
z5 = c(NA, 35L, NA, 55L, NA),
z6 = c(NA, 35L, NA, NA, NA),
mt1_oranges_vol = c(100L, 200L, 300L, 400L, 500L),
mt2_oranges_vol = c(110L, 210L, 310L, 410L, 510L),
mt3_oranges_vol = c(120L, 220L, NA, 420L, 520L),
mt4_oranges_vol = c(130L, 230L, NA, 430L, NA),
mt5_oranges_vol = c(NA, 240L, NA, NA, NA),
mt6_oranges_vol = c(NA, NA, NA, NA, NA),
mt1_pears_vol = c(101L, 201L, 301L, 401L, 501L),
mt2_pears_vol = c(111L, 211L, 311L, 411L, 511L),
mt3_pears_vol = c(121L, 221L, NA, 421L, 521L),
mt4_pears_vol = c(131L, 231L, NA, 431L, NA),
mt5_pears_vol = c(NA, 241L, NA, NA, NA),
mt6_pears_vol = c(NA, NA, NA, NA, NA),
mt1_apples_vol = c(102L, 202L, 302L, 402L, 502L),
mt2_apples_vol = c(112L, 212L, 312L, 412L, 512L),
mt3_apples_vol = c(122L, 222L, NA, 422L, 522L),
mt4_apples_vol = c(132L, 232L, NA, 432L, NA),
mt5_apples_vol = c(NA, 242L, NA, NA, NA),
mt6_apples_vol = c(NA, NA, NA, NA, NA)),
class = "data.frame",
row.names = c(NA, -5L)
)
我需要制作一个总计列,即 mt1_apples_vol + mt1_pears_vol + mt1_oranges_vol 的总和; mt2_apples_vol + mt2_pears_vol + mt2_oranges_vol 等
目前我计算为:
dataset <- dataset %>%
mutate(ct1_total_vol = rowSums(select(., starts_with("mt1_")), na.rm = F),
mt2_total_vol = rowSums(select(., starts_with("mt2_")), na.rm = F),
mt3_total_vol = rowSums(select(., starts_with("mt3_")), na.rm = F)
)
但是,将来可能会添加更多测量。因此我希望它迭代 mt_range:
mt_range <- 1:6
我无法以创建新列并根据 mt_range 选择所有变量的方式编写代码
如果您将数据放在长格式中,您会发现这会容易得多:
dataset |>
tidyr::pivot_longer(
cols = starts_with("mt"),
values_to = "vol",
names_transform = \(x) gsub("^mt(\\d)_(\\w+)_vol$", "\\2,\\1", x)
) |>
tidyr::separate_wider_delim(name, ",", names = c("fruit", "fruit_num")) |>
group_by(Participant.Id, fruit_num) |>
summarise(
total_vol = sum(vol, na.rm = TRUE)
)
# # A tibble: 30 × 3
# # Groups: Participant.Id [5]
# Participant.Id fruit_num total_vol
# <int> <chr> <int>
# 1 1 1 303
# 2 1 2 333
# 3 1 3 363
# 4 1 4 393
# 5 1 5 0
# 6 1 6 0
您可以尝试对所需列进行子集化,然后按名称将它们拆分为列表,然后依次计算
rowSums
。
以下是基本 R 选项
dfmt <- dataset[startsWith(names(dataset), "mt")]
cbind(
dataset,
list2DF(
lapply(
split.default(
dfmt,
paste0(gsub("_.*", "", names(dfmt)), "_total_vol")
),
rowSums
)
)
)