计算特定变量上每列和每组值“1”的百分比

Question

       ID   AT07 DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
1 vD0FPVt AT07_1      1      1      1      1      1      1      1      1      1       1       1       0         0
2 UwwHYWL AT07_1      0      0      0      0      1      0      0      0      0       0       0       0         0
3 Lw7HM6J AT07_1      0      0      0      0      0      0      1      1      0       1       0       0         0
4 Llpww1k AT07_1      0      0      0      0      0      0      0      0      0       1       0       0         0
5 OwXwA9j AT07_1      0      0      0      0      0      0      1      0      0       1       1       0         0

大家好，您能否帮我计算值“1”（分别针对每个列 DT01）的百分比并在组级别上聚合，其中组为 AT07_1 到 AT07_11。变量DT01的范围为DT01_1至DT01_19。谢谢。下图再次显示了当前数据集以及我想要产生的结果。图像中的百分比值是发明的。

这是我迄今为止尝试过的（但不经济）：

library(dplyr)
resLong <- resLong %>%
   group_by(AT07) %>%
   mutate(DT01_1_percent1 = label_percent()(sum(DT01_1 == 1)/n())) %>%
   mutate(DT01_2_percent1 = label_percent()(sum(DT01_2 == 1)/n())) %>%
   mutate(DT01_3_percent1 = label_percent()(sum(DT01_3 == 1)/n())) %>%
   mutate(DT01_4_percent1 = label_percent()(sum(DT01_4 == 1)/n())) %>%
   mutate(DT01_5_percent1 = label_percent()(sum(DT01_5 == 1)/n())) %>%
   mutate(DT01_6_percent1 = label_percent()(sum(DT01_6 == 1)/n())) %>%
   mutate(DT01_7_percent1 = label_percent()(sum(DT01_7 == 1)/n())) %>%
   mutate(DT01_8_percent1 = label_percent()(sum(DT01_8 == 1)/n())) %>%
   mutate(DT01_9_percent1 = label_percent()(sum(DT01_9 == 1)/n())) %>%
   mutate(DT01_10_percent1 = label_percent()(sum(DT01_10 == 1)/n())) %>%
   mutate(DT01_11_percent1 = label_percent()(sum(DT01_11 == 1)/n())) %>%
   mutate(DT01_12_percent1 = label_percent()(sum(DT01_12 == 1)/n())) %>%
   mutate(DT01_13_percent1 = label_percent()(sum(DT01_13 == 1)/n())) %>%
   mutate(DT01_14_percent1 = label_percent()(sum(DT01_14 == 1)/n())) %>%
   mutate(DT01_15_percent1 = label_percent()(sum(DT01_15 == 1)/n())) %>%
   mutate(DT01_16_percent1 = label_percent()(sum(DT01_16 == 1)/n())) %>% 
   mutate(DT01_17_percent1 = label_percent()(sum(DT01_17 == 1)/n())) %>%
   mutate(DT01_18_percent1 = label_percent()(sum(DT01_18 == 1)/n())) %>%
   mutate(DT01_19_percent1 = label_percent()(sum(DT01_19 == 1)/n()))
 
  
resLong <- resLong[,-c(1:21)]

library(dplyr)
resLong <- distinct(resLong)

Answer 1

这是：

# library(tidyverse)
library(dplyr)

# You're looking for `summarise` + `across` -------------------------------
aux_out <- summarise(
  aux_in, 
  .by = AT07,           # group column (one or more)
  across(               # applies over multiple columns
    starts_with("DT"),  # all columns starting with "DT"
    \(x) sum(x)/n()     # `sum()` works if there's only `1` and `0`
  ))                    # Multiply by `100` if you want in `%`

输出：

> aux_out
# A tibble: 3 × 14
  AT07   DT01_1 DT01_2 DT01_3 DT01_4 DT01_5 DT01_6 DT01_7 DT01_8 DT01_9 DT01_10 DT01_11 DT01_12 DT01_12.1
  <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl>   <dbl>     <dbl>
1 AT07_1    0.2  0.2    0.2    0.2      0.4  0.2    0.6    0.4    0.2     0.8     0.4     0.2       0    
2 AT07_2    0.5  0.5    0.5    0.5      1    0.5    0.5    0.5    1       0.5     0.5     0         0.5  
3 AT07_3    1    0.667  0.333  0.667    0    0.333  0.667  0.333  0.333   0.667   0.333   0.333     0.667

玩具数据：

library(dplyr)

# Toy data ----------------------------------------------------------------
aux_in <- tibble::tribble(
  ~ID,    ~AT07, ~DT01_1, ~DT01_2, ~DT01_3, ~DT01_4, ~DT01_5, ~DT01_6, ~DT01_7, ~DT01_8, ~DT01_9, ~DT01_10, ~DT01_11, ~DT01_12, ~DT01_12.1,
  "vD0FPVt", "AT07_1",       1,       1,       1,       1,       1,       1,       1,       1,       1,        1,        1,        0,          0,
  "UwwHYWL", "AT07_1",       0,       0,       0,       0,       1,       0,       0,       0,       0,        0,        0,        0,          0,
  "Lw7HM6J", "AT07_1",       0,       0,       0,       0,       0,       0,       1,       1,       0,        1,        0,        0,          0,
  "Llpww1k", "AT07_1",       0,       0,       0,       0,       0,       0,       0,       0,       0,        1,        0,        0,          0,
  "OwXwA9j", "AT07_1",       0,       0,       0,       0,       0,       0,       1,       0,       0,        1,        1,        1,          0,
  "_D0FPVt", "AT07_2",       1,       1,       1,       1,       1,       1,       1,       1,       1,        1,        1,        0,          1,
  "_wwHYWL", "AT07_2",       0,       0,       0,       0,       1,       0,       0,       0,       1,        0,        0,        0,          0,
  "_w7HM6J", "AT07_3",       1,       1,       0,       1,       0,       0,       1,       1,       0,        1,        0,        0,          0,
  "_lpww1k", "AT07_3",       1,       1,       1,       1,       0,       1,       0,       0,       1,        0,        0,        1,          1,
  "_wXwA9j", "AT07_3",       1,       0,       0,       0,       0,       0,       1,       0,       0,        1,        1,        0,          1)

^{创建于 2024-05-10，使用 reprex v2.1.0}

计算特定变量上每列和每组值“1”的百分比

问题描述投票：0回答：1

1个回答

最新问题

计算特定变量上每列和每组值“1”的百分比

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1