R 按组汇总,然后建立平均值。根据条件

问题描述 投票:0回答:1

我必须创建一个表来显示每个变量 DT06_1-DT06_19 的平均值(注意:我只有变量 DT06_1-DT06_3、DT06_7、DT06_10-DT06_19。 平均值应按组计算。我在 insuff_suff_res_cat 中显示了四个组(注意:并非每个参与者都显示四个类别中每个类别的值。我没有参与者显示类别值的行)。 最后,仅当相应的 DT01 变量显示值“1”而不是“0”时,才应计算每个 DT06 变量的平均值。 所以一起:对于变量“insuff_suff_res_cat”的每组,仅当相应的 DT01 变量 ==“1”时,请计算每个 DT06 变量的平均值。

我的数据集:

structure(list(ID = c("poHYvL", "FgKAdE", "34HEg4", "poHYvL", 
"FgKAdE", "34HEg4", "Y0FPVt", "uXaA9j", "nIogJg", "9tAGOE", "zlq6KJ", 
"Y0FPVt", "uXaA9j", "nIogJg"), insuff_suff_res_cat = c("not_enough_resources", 
"not_enough_resources", "not_enough_resources", "not_enough_resources_help", 
"not_enough_resources_help", "not_enough_resources_help", "enough_resources", 
"enough_resources", "enough_resources", "enough_resources", "enough_resources", 
"enough_resources_help", "enough_resources_help", "enough_resources_help"
), DT01_1 = c("1", "0", "0", "1", "0", "0", "1", "0", "1", "0", 
"0", "1", "0", "0"), DT01_2 = c("0", "0", "0", "0", "1", "0", 
"1", "0", "1", "0", "1", "1", "0", "1"), DT01_3 = c("0", "0", 
"0", "0", "0", "0", "1", "0", "1", "0", "0", "1", "0", "0"), 
    DT01_4 = c("0", "1", "0", "1", "1", "0", "1", "0", "1", "0", 
    "1", "1", "0", "0"), DT01_5 = c("1", "1", "0", "1", "1", 
    "0", "1", "0", "0", "1", "0", "1", "0", "0"), DT01_6 = c("0", 
    "1", "0", "0", "1", "0", "1", "1", "0", "0", "0", "1", "0", 
    "0"), DT01_7 = c("1", "1", "0", "0", "1", "0", "1", "1", 
    "0", "0", "0", "1", "1", "0"), DT01_8 = c("0", "1", "0", 
    "0", "1", "0", "1", "0", "0", "0", "0", "1", "0", "0"), DT01_9 = c("0", 
    "0", "1", "0", "0", "0", "1", "0", "1", "0", "0", "1", "0", 
    "1"), DT01_10 = c("0", "1", "0", "1", "1", "0", "1", "1", 
    "0", "0", "1", "1", "1", "0"), DT01_11 = c("0", "0", "1", 
    "1", "0", "1", "1", "1", "0", "1", "0", "1", "1", "0"), DT01_12 = c("0", 
    "0", "1", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", 
    "0"), DT01_13 = c("0", "1", "0", "0", "1", "1", "1", "0", 
    "0", "0", "0", "1", "0", "0"), DT01_14 = c("0", "1", "0", 
    "0", "0", "0", "1", "1", "0", "0", "1", "1", "1", "0"), DT01_15 = c("0", 
    "0", "0", "0", "0", "0", "0", "0", "0", "1", "0", "0", "0", 
    "0"), DT01_16 = c("0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0"), DT01_17 = c("0", "0", "0", 
    "0", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1"), DT01_18 = c("0", 
    "0", "0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "0", 
    "1"), DT01_19 = c("0", "0", "0", "0", "0", "0", "0", "0", 
    "0", "0", "0", "0", "0", "0"), DT06_1 = c(NA, NA, NA, NA, 
    NA, NA, 300, NA, NA, NA, NA, 300, NA, NA), DT06_2 = c(NA, 
    NA, NA, NA, NA, NA, "0", NA, "40", NA, NA, "0", NA, "20"), 
    DT06_3 = c(NA, NA, NA, NA, NA, NA, "0", NA, NA, NA, NA, "0", 
    NA, NA), DT06_7 = c(NA, "0", NA, NA, "0", NA, "0", "0", NA, 
    NA, NA, "0", "0", NA), DT06_10 = c(NA, "0", NA, NA, "0", 
    NA, "0", "0", NA, NA, "20", "0", "0", NA), DT06_11 = c(NA, 
    NA, "0", NA, NA, "0", "0", "0", NA, "40", NA, "0", "0", NA
    ), DT06_12 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA), DT06_13 = c(NA, "0", NA, NA, "0", NA, "0", NA, 
    NA, NA, NA, "0", NA, NA), DT06_14 = c(NA, NA, NA, NA, NA, 
    NA, "50", "0", NA, NA, "109", "60", "0", NA), DT06_15 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, "21", NA, NA, NA, NA), DT06_16 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), DT06_17 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), DT06_18 = c(NA, 
    NA, NA, NA, NA, NA, NA, NA, "60", NA, "20", NA, NA, "55"), 
    DT06_19 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-14L))

期望的结果(数字是发明的): this pictures shows my desired outcome

我已经尝试应用汇总命令,但我真的找不到将其连接到我的条件的解决方案。任何帮助表示赞赏。

r mean countif summarize mutate
1个回答
0
投票

使用

dplyr
tidyr

  1. 将 DT06* 列转换为数字
  2. 仅过滤 DT01_1 == "1" 的行
  3. 按 insuff_suff_res_cat 中的组汇总以“DT06”开头的所有列
  4. 枢轴长,然后宽
library(dplyr)
library(tidyr)

result <- df |>
  mutate_at(vars(DT06_1:DT06_19), as.numeric) |>
  filter(DT01_1 == "1") |>
  summarise(across(starts_with("DT06"), \(x) mean(x, na.rm = TRUE)),
            .by = insuff_suff_res_cat) |>
  pivot_longer(-insuff_suff_res_cat) |>
  pivot_wider(id_cols = name,
              names_from = insuff_suff_res_cat,
              values_from = value)

result
# # A tibble: 14 × 5
#    name    not_enough_resources not_enough_resources_help enough_resources enough_resources_help
#    <chr>                  <dbl>                     <dbl>            <dbl>                 <dbl>
#  1 DT06_1                   NaN                       NaN              300                   300
#  2 DT06_2                   NaN                       NaN               20                     0
#  3 DT06_3                   NaN                       NaN                0                     0
#  4 DT06_7                   NaN                       NaN                0                     0
#  5 DT06_10                  NaN                       NaN                0                     0
#  6 DT06_11                  NaN                       NaN                0                     0
#  7 DT06_12                  NaN                       NaN              NaN                   NaN
#  8 DT06_13                  NaN                       NaN                0                     0
#  9 DT06_14                  NaN                       NaN               50                    60
# 10 DT06_15                  NaN                       NaN              NaN                   NaN
# 11 DT06_16                  NaN                       NaN              NaN                   NaN
# 12 DT06_17                  NaN                       NaN              NaN                   NaN
# 13 DT06_18                  NaN                       NaN               60                   NaN
# 14 DT06_19                  NaN                       NaN              NaN                   NaN
© www.soinside.com 2019 - 2024. All rights reserved.