ID AT07 DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
1 vD0FPVt AT07_1 1 1 1 1 1 1 1 1 1 1 1 0 0
2 UwwHYWL AT07_1 0 0 0 0 1 0 0 0 0 0 0 0 0
3 Lw7HM6J AT07_1 0 0 0 0 0 0 1 1 0 1 0 0 0
4 Llpww1k AT07_1 0 0 0 0 0 0 0 0 0 1 0 0 0
5 OwXwA9j AT07_1 0 0 0 0 0 0 1 0 0 1 1 0 0
大家好, 您能否帮我计算值“1”(分别针对每个列 DT01)的百分比并在组级别上聚合,其中组为 AT07_1 到 AT07_11。 变量DT01的范围为DT01_1至DT01_19。 谢谢。 下图再次显示了当前数据集以及我想要产生的结果。图像中的百分比值是发明的。
这是我迄今为止尝试过的(但不经济):
library(dplyr)
resLong <- resLong %>%
group_by(AT07) %>%
mutate(DT01_1_percent1 = label_percent()(sum(DT01_1 == 1)/n())) %>%
mutate(DT01_2_percent1 = label_percent()(sum(DT01_2 == 1)/n())) %>%
mutate(DT01_3_percent1 = label_percent()(sum(DT01_3 == 1)/n())) %>%
mutate(DT01_4_percent1 = label_percent()(sum(DT01_4 == 1)/n())) %>%
mutate(DT01_5_percent1 = label_percent()(sum(DT01_5 == 1)/n())) %>%
mutate(DT01_6_percent1 = label_percent()(sum(DT01_6 == 1)/n())) %>%
mutate(DT01_7_percent1 = label_percent()(sum(DT01_7 == 1)/n())) %>%
mutate(DT01_8_percent1 = label_percent()(sum(DT01_8 == 1)/n())) %>%
mutate(DT01_9_percent1 = label_percent()(sum(DT01_9 == 1)/n())) %>%
mutate(DT01_10_percent1 = label_percent()(sum(DT01_10 == 1)/n())) %>%
mutate(DT01_11_percent1 = label_percent()(sum(DT01_11 == 1)/n())) %>%
mutate(DT01_12_percent1 = label_percent()(sum(DT01_12 == 1)/n())) %>%
mutate(DT01_13_percent1 = label_percent()(sum(DT01_13 == 1)/n())) %>%
mutate(DT01_14_percent1 = label_percent()(sum(DT01_14 == 1)/n())) %>%
mutate(DT01_15_percent1 = label_percent()(sum(DT01_15 == 1)/n())) %>%
mutate(DT01_16_percent1 = label_percent()(sum(DT01_16 == 1)/n())) %>%
mutate(DT01_17_percent1 = label_percent()(sum(DT01_17 == 1)/n())) %>%
mutate(DT01_18_percent1 = label_percent()(sum(DT01_18 == 1)/n())) %>%
mutate(DT01_19_percent1 = label_percent()(sum(DT01_19 == 1)/n()))
resLong <- resLong[,-c(1:21)]
library(dplyr)
resLong <- distinct(resLong)
这是:
# library(tidyverse)
library(dplyr)
# You're looking for `summarise` + `across` -------------------------------
aux_out <- summarise(
aux_in,
.by = AT07, # group column (one or more)
across( # applies over multiple columns
starts_with("DT"), # all columns starting with "DT"
\(x) sum(x)/n() # `sum()` works if there's only `1` and `0`
)) # Multiply by `100` if you want in `%`
输出:
> aux_out
# A tibble: 3 × 14
AT07 DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AT07_1 0.2 0.2 0.2 0.2 0.4 0.2 0.6 0.4 0.2 0.8 0.4 0.2 0
2 AT07_2 0.5 0.5 0.5 0.5 1 0.5 0.5 0.5 1 0.5 0.5 0 0.5
3 AT07_3 1 0.667 0.333 0.667 0 0.333 0.667 0.333 0.333 0.667 0.333 0.333 0.667
玩具数据:
library(dplyr)
# Toy data ----------------------------------------------------------------
aux_in <- tibble::tribble(
~ID, ~AT07, ~DT01_1, ~DT01_2, ~DT01_3, ~DT01_4, ~DT01_5, ~DT01_6, ~DT01_7, ~DT01_8, ~DT01_9, ~DT01_10, ~DT01_11, ~DT01_12, ~DT01_12.1,
"vD0FPVt", "AT07_1", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
"UwwHYWL", "AT07_1", 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
"Lw7HM6J", "AT07_1", 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0,
"Llpww1k", "AT07_1", 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
"OwXwA9j", "AT07_1", 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,
"_D0FPVt", "AT07_2", 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
"_wwHYWL", "AT07_2", 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
"_w7HM6J", "AT07_3", 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0,
"_lpww1k", "AT07_3", 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1,
"_wXwA9j", "AT07_3", 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1)
创建于 2024-05-10,使用 reprex v2.1.0