R 中的属性表,带有分组因子和李克特量表答案列

问题描述 投票:0回答:1

我有一个包含李克特量表答案的数据集。我想创建一个新的数据框来计算每个级别的每个类别的百分比。为此,我编写了以下代码:


likert_levels <- c(
  "Very Dissatisfied",
  "Dissatisfied",
  "Average",
  "Satisfied",
  "Very Satisfied"
)

df <-
  tibble(
    "q1" = sample(likert_levels, 10, replace = TRUE),
    "q2" = sample(likert_levels, 10, replace = TRUE, prob = 5:1),
    "q3" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
    "q4" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
    "q5" = sample(c(likert_levels, NA), 10, replace = TRUE)
  ) %>%
  mutate(across(everything(), ~ factor(.x, levels = likert_levels)))%>%
  mutate(Country = c("USA","BRAZIL","BRAZIL","BRAZIL","USA","GERMANY","ITALY","GERMANY","BRAZIL","USA"))%>%
  relocate(Country,.before=q1)


df$O_1 <- apply(df, 1, function(x) sum(x=="Very Dissatisfied", na.rm=TRUE)) #How many ONEstars in                                    
df$O_2 <- apply(df, 1, function(x) sum(x=="Dissatisfied", na.rm=TRUE)) #each row
df$O_3 <- apply(df, 1, function(x) sum(x=="Average", na.rm=TRUE))
df$O_4 <- apply(df, 1, function(x) sum(x=="Satisfied", na.rm=TRUE))
df$O_5 <- apply(df, 1, function(x) sum(x=="Very Satisfied", na.rm=TRUE))

df$O_sum <-  df$O_1 + df$O_2 + df$O_3 + df$O_4 + df$O_5
df <- df[,-c(2: (ncol(df)-6))]



Likert_df =  as.data.frame (df %>% group_by(Country,O_sum) %>% summarise( 
  OO_1 = sum(O_1) / (n() * (O_sum[1])) * 100,
  OO_2 = sum(O_2) / (n() * (O_sum[1])) * 100,
  OO_3 = sum(O_3) / (n() * (O_sum[1])) * 100,
  OO_4 = sum(O_4) / (n() * (O_sum[1])) * 100,
  OO_5 = sum(O_5) / (n() * (O_sum[1])) * 100 ) ) 

Likert_df$ O_sum <- NULL

Likert_df <- as.data.frame(Likert_df %>% group_by(Country) %>% summarise(
  
  OO_1 = mean(OO_1),
  OO_2 = mean(OO_2),
  OO_3 = mean(OO_3),
  OO_4 = mean(OO_4),
  OO_5 = mean(OO_5) ))


colnames(Likert_df) <- c("Item",  "Strongly Disagree",  "Disagree",  "So So",  "Agree",  "Strongly Agree")
DF = Likert_df

导致:

DF
     Item Strongly Disagree Disagree So So Agree Strongly Agree
1  BRAZIL                10       25  25.0  30.0             10
2 GERMANY                30       20  20.0  20.0             10
3   ITALY                20       40  20.0   0.0             20
4     USA                 5       10  42.5  17.5             25

我的问题是:是否有另一种方法更简单、更快地使用 dplyr 函数和管道来创建相同的结果?

r dataframe dplyr
1个回答
1
投票

一种方法,从

df
开始:

library(dplyr)
library(tidyr)

df |>
    pivot_longer(starts_with("q")) |>
    drop_na() |>
    count(Country, value) |>
    mutate(Perc = prop.table(n), .by = Country, value) |>
    pivot_wider(id_cols = Country, 
                names_from = value,
                values_from = Perc,
                values_fill = 0 ## replace NA with 0
                )
  Country `Very Dissatisfied` Dissatisfied Average Satisfied `Very Satisfied`
  <chr>                 <dbl>        <dbl>   <dbl>     <dbl>            <dbl>
1 BRAZIL                0.05         0.2     0.15      0.25            0.35  
2 GERMANY               0.111        0.222   0.444     0.222           0     
3 ITALY                 0.25         0       0.25      0               0.5   
4 USA                   0            0.267   0.2       0.467           0.0667 

(像以前一样调整名称)

© www.soinside.com 2019 - 2024. All rights reserved.