我有一个包含李克特量表答案的数据集。我想创建一个新的数据框来计算每个级别的每个类别的百分比。为此,我编写了以下代码:
likert_levels <- c(
"Very Dissatisfied",
"Dissatisfied",
"Average",
"Satisfied",
"Very Satisfied"
)
df <-
tibble(
"q1" = sample(likert_levels, 10, replace = TRUE),
"q2" = sample(likert_levels, 10, replace = TRUE, prob = 5:1),
"q3" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
"q4" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
"q5" = sample(c(likert_levels, NA), 10, replace = TRUE)
) %>%
mutate(across(everything(), ~ factor(.x, levels = likert_levels)))%>%
mutate(Country = c("USA","BRAZIL","BRAZIL","BRAZIL","USA","GERMANY","ITALY","GERMANY","BRAZIL","USA"))%>%
relocate(Country,.before=q1)
df$O_1 <- apply(df, 1, function(x) sum(x=="Very Dissatisfied", na.rm=TRUE)) #How many ONEstars in
df$O_2 <- apply(df, 1, function(x) sum(x=="Dissatisfied", na.rm=TRUE)) #each row
df$O_3 <- apply(df, 1, function(x) sum(x=="Average", na.rm=TRUE))
df$O_4 <- apply(df, 1, function(x) sum(x=="Satisfied", na.rm=TRUE))
df$O_5 <- apply(df, 1, function(x) sum(x=="Very Satisfied", na.rm=TRUE))
df$O_sum <- df$O_1 + df$O_2 + df$O_3 + df$O_4 + df$O_5
df <- df[,-c(2: (ncol(df)-6))]
Likert_df = as.data.frame (df %>% group_by(Country,O_sum) %>% summarise(
OO_1 = sum(O_1) / (n() * (O_sum[1])) * 100,
OO_2 = sum(O_2) / (n() * (O_sum[1])) * 100,
OO_3 = sum(O_3) / (n() * (O_sum[1])) * 100,
OO_4 = sum(O_4) / (n() * (O_sum[1])) * 100,
OO_5 = sum(O_5) / (n() * (O_sum[1])) * 100 ) )
Likert_df$ O_sum <- NULL
Likert_df <- as.data.frame(Likert_df %>% group_by(Country) %>% summarise(
OO_1 = mean(OO_1),
OO_2 = mean(OO_2),
OO_3 = mean(OO_3),
OO_4 = mean(OO_4),
OO_5 = mean(OO_5) ))
colnames(Likert_df) <- c("Item", "Strongly Disagree", "Disagree", "So So", "Agree", "Strongly Agree")
DF = Likert_df
导致:
DF
Item Strongly Disagree Disagree So So Agree Strongly Agree
1 BRAZIL 10 25 25.0 30.0 10
2 GERMANY 30 20 20.0 20.0 10
3 ITALY 20 40 20.0 0.0 20
4 USA 5 10 42.5 17.5 25
我的问题是:是否有另一种方法更简单、更快地使用 dplyr 函数和管道来创建相同的结果?
一种方法,从
df
开始:
library(dplyr)
library(tidyr)
df |>
pivot_longer(starts_with("q")) |>
drop_na() |>
count(Country, value) |>
mutate(Perc = prop.table(n), .by = Country, value) |>
pivot_wider(id_cols = Country,
names_from = value,
values_from = Perc,
values_fill = 0 ## replace NA with 0
)
Country `Very Dissatisfied` Dissatisfied Average Satisfied `Very Satisfied`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 BRAZIL 0.05 0.2 0.15 0.25 0.35
2 GERMANY 0.111 0.222 0.444 0.222 0
3 ITALY 0.25 0 0.25 0 0.5
4 USA 0 0.267 0.2 0.467 0.0667
(像以前一样调整名称)