我在 R 中有一个名为 df 的数据框,其中包含 3 个问题的 Likert 数据和一个名为 var 的分组变量:
var_levels <- c(LETTERS[1:5])
n = 500
likert_levels = c(
"Very \n Dissatisfied",
"Dissatisfied",
"Neutral",
"Satisfied",
"Very \n Satisfied"
)
df <- tibble(
var = sample(var_levels, n, replace = TRUE),
val1 = sample(likert_levels, n, replace = TRUE),
val2 = sample(likert_levels, n, replace = TRUE),
val3 = sample(likert_levels, n, replace = TRUE)
)
好的。所以我总共有 500 个回复。但是我想知道每个级别有多少个,我可以通过计数来获取此信息:
> df_n = df%>%
+ select(var)%>%
+ group_by(var)%>%
+ summarise(counts=n())
> df_n
# A tibble: 5 × 2
var counts
<chr> <int>
1 A 91
2 B 77
3 C 122
4 D 104
5 E 106
(因为它们是模拟数据,如果你运行它们,它会给你不同的数字)。
现在我将保留这些信息,因为我想根据这些数字计数制作条形图。
现在关于我附加的 Likert 数据,或者如果您愿意,我可以使用执行此操作的 tidyr 函数将它们旋转更长的时间:
df2 = df%>%
+ pivot_longer(!var, names_to = "Categories", values_to = "likert_values")%>%
+ select(-Categories)
> df2
# A tibble: 1,500 × 2
var likert_values
<chr> <chr>
1 A "Dissatisfied"
2 A "Dissatisfied"
3 A "Dissatisfied"
4 E "Dissatisfied"
5 E "Dissatisfied"
6 E "Dissatisfied"
7 A "Very \n Dissatisfied"
8 A "Very \n Dissatisfied"
9 A "Neutral"
10 D "Dissatisfied"
我在这里所做的是,我将 3 个问题一个接一个地追加到同一列中。我这样做是因为我想找到这 3 个问题保持 Likert 量表的平均百分比。
如果现在我根据最不满意的答案对它们进行排序:
dat <- df2 |>
mutate(
across(-var, ~ factor(.x, likert_levels))
) |>
pivot_longer(-var, names_to = "group") |>
count(var, value, group) |>
complete(var, value, group, fill = list(n = 0)) |>
mutate(
prop = n / sum(n),
prop_lower = sum(prop[value %in% likert_levels[1:2]]),
prop_higher = sum(prop[value %in% likert_levels[4:5]]),
.by = c(var, group)
) |>
arrange(group, prop_lower) |>
mutate(
y_sort = paste(var, group, sep = "."),
y_sort = fct_inorder(y_sort)
)%>%
select(-n)
并创建两个边距(左侧和右侧)的总计:
top10 <- dat |>
distinct(group, var, prop_lower) |>
slice_max(prop_lower, n = 10, by = group)
dat <- dat |>
semi_join(top10)
dat_tot <- dat |>
distinct(group, var, y_sort, prop_lower, prop_higher) |>
pivot_longer(-c(group, var, y_sort),
names_to = c(".value", "name"),
names_sep = "_"
) |>
mutate(
hjust_tot = ifelse(name == "lower", 1, 0),
x_tot = ifelse(name == "lower", -0.6, 0.6)
)
绑定级别计数的第一个聚合:
dat = dat%>%
left_join(.,df_n,by="var")
dat_bar = dat %>%
select(var,group,counts)%>%
distinct(var,group,counts)%>%
mutate(y_sort=paste(var, group, sep = ".") )%>%
select(-var)
现在如果我尝试绘制它们:
p1 <- ggplot(dat, aes(y = y_sort, x = prop, fill = value)) +
geom_col(position = position_likert(reverse = FALSE)) +
geom_text(
aes(
label = label_percent_abs(hide_below = .05, accuracy = 1)(prop),
color = after_scale(hex_bw(.data$fill))
),
position = position_likert(vjust = 0.5, reverse = FALSE),
size = 3.5
) +
geom_label(
aes(
x = x_tot,
label = label_percent_abs(accuracy = 1)(prop),
hjust = hjust_tot,
fill = NULL
),
data = dat_tot,
size = 3.5,
color = "black",
fontface = "bold",
label.size = 0,
show.legend = FALSE
) +
scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
scale_x_continuous(
labels = label_percent_abs(),
expand = c(0, .15)
) +
scale_fill_brewer(palette = "BrBG") +
facet_wrap(~group,
scales = "free_y", ncol = 1,
strip.position = "right"
) +
theme_light() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank(),
strip.text = element_blank()
) +
labs(x = NULL, y = NULL, fill = NULL)
p2 <- ggplot(dat_bar, aes(y = y_sort, x = counts)) +
geom_col() +
geom_label(
aes(
label = label_number_abs(hide_below = .05, accuracy = 1)(counts)
),
size = 3.5,
hjust = 1,
fill = NA,
label.size = 0,
color = "white"
) +
scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
scale_x_continuous(
labels = label_number_abs(),
expand = c(0, 0, 0, .05)
)+
# facet_wrap(~group,
# scales = "free_y", ncol = 1,
# strip.position = "right"
# ) +
theme_light() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank()
) +
labs(x = NULL, y = NULL, fill = NULL)
library(patchwork)
p1 + p2 +
plot_layout(
axes = "collect",
guides = "collect") &
theme(legend.position = "bottom")
水平方向不匹配。
显然我不能使用 df2 (即附加的数据框),因为我无法在那里计数。它会给出每个类别的错误计数。
我希望李克特图中的每个级别都与条形图中的水平水平匹配。
我怎样才能在 R 中做到这一点?
基本上,这与您之前的post中的答案相同,只是我以不同的方式计算
dat_bar
的计数:
注意:使用随机数时,您只需使用
set.seed()
即可实现再现性。
library(patchwork)
library(tidyverse)
library(ggstats)
set.seed(123)
var_levels <- c(LETTERS[1:5])
n <- 500
likert_levels <- c(
"Very \n Dissatisfied",
"Dissatisfied",
"Neutral",
"Satisfied",
"Very \n Satisfied"
)
df <- tibble(
var = sample(var_levels, n, replace = TRUE),
val1 = sample(likert_levels, n, replace = TRUE),
val2 = sample(likert_levels, n, replace = TRUE),
val3 = sample(likert_levels, n, replace = TRUE)
)
dat <- df |>
mutate(
across(-var, ~ factor(.x, likert_levels))
) |>
pivot_longer(-var, names_to = "group") |>
count(var, value, group) |>
complete(var, value, group, fill = list(n = 0)) |>
mutate(
prop = n / sum(n),
prop_lower = sum(prop[value %in% likert_levels[1:2]]),
prop_higher = sum(prop[value %in% likert_levels[4:5]]),
.by = c(var, group)
) |>
arrange(group, prop_lower) |>
mutate(
y_sort = paste(var, group, sep = "."),
y_sort = fct_inorder(y_sort)
)
top10 <- dat |>
distinct(group, var, prop_lower) |>
slice_max(prop_lower, n = 10, by = group)
dat <- dat |>
semi_join(top10)
#> Joining with `by = join_by(var, group, prop_lower)`
dat_tot <- dat |>
distinct(group, var, y_sort, prop_lower, prop_higher) |>
pivot_longer(-c(group, var, y_sort),
names_to = c(".value", "name"),
names_sep = "_"
) |>
mutate(
hjust_tot = ifelse(name == "lower", 1, 0),
x_tot = ifelse(name == "lower", -0.6, 0.6)
)
dat_bar <- dat %>%
mutate(counts = sum(n), .by = c(var, group)) |>
distinct(var, group, y_sort, counts)
p1 <- ggplot(dat, aes(y = y_sort, x = prop, fill = value)) +
geom_col(position = position_likert(reverse = FALSE)) +
geom_text(
aes(
label = label_percent_abs(hide_below = .05, accuracy = 1)(prop),
color = after_scale(hex_bw(.data$fill))
),
position = position_likert(vjust = 0.5, reverse = FALSE),
size = 3.5
) +
geom_label(
aes(
x = x_tot,
label = label_percent_abs(accuracy = 1)(prop),
hjust = hjust_tot,
fill = NULL
),
data = dat_tot,
size = 3.5,
color = "black",
fontface = "bold",
label.size = 0,
show.legend = FALSE
) +
scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
scale_x_continuous(
labels = label_percent_abs(),
expand = c(0, .15)
) +
scale_fill_brewer(palette = "BrBG") +
facet_wrap(~group,
scales = "free_y", ncol = 1,
strip.position = "right"
) +
theme_light() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank(),
strip.text = element_blank()
) +
labs(x = NULL, y = NULL, fill = NULL)
p2 <- ggplot(dat_bar, aes(y = y_sort, x = counts)) +
geom_col() +
geom_label(
aes(
label = label_number_abs(hide_below = .05, accuracy = 1)(counts)
),
size = 3.5,
hjust = 1,
fill = NA,
label.size = 0,
color = "white"
) +
scale_y_discrete(labels = \(x) gsub("\\..*$", "", x)) +
scale_x_continuous(
labels = label_number_abs(),
expand = c(0, 0, 0, .05)
) +
facet_wrap(~group,
scales = "free_y", ncol = 1
) +
theme_light() +
theme(
legend.position = "bottom",
panel.grid.major.y = element_blank(),
strip.text = element_blank()
) +
labs(x = NULL, y = NULL, fill = NULL)
p1 + p2 +
plot_layout(
guides = "collect"
) &
theme(legend.position = "bottom")