我在 R 中有一个数据框,其中包含两列国家和吸烟这两个因素。
我想根据“烟雾”和“电子烟”总和的递减(按国家/地区)更改左图的排序(参见图像图)。现在它没有排序。例如,根据图中的模拟数据,法国必须位于顶部和下方,然后是美国,然后是英国,最后是德国。
在第二个图中还要对国家进行排序。即必须是法国、美国、英国、德国。
library(dplyr)
library(ggplot2)
library(forcats)
set.seed(123) # Setting seed for reproducibility
levels_country = c('USA', 'UK', 'FRANCE', 'GERMANY')
country = sample(levels_country, 50, replace = TRUE)
levels_smoke = c('smoke', 'not smoke', 'vaping')
smoke = sample(levels_smoke, 50, replace = TRUE)
df = tibble(country,smoke) %>%
mutate(
country = factor(country, levels = levels_country),
smoke = factor(smoke, levels = levels_smoke)
)
Grouped = df %>%
dplyr::group_by(country,smoke) %>%
dplyr::summarise(n = n()) %>%
dplyr::group_by(country) %>%
dplyr::mutate(summed=sum(n))
Grouped = Grouped %>%
dplyr::mutate(percentage = n/summed )
ordered_countries = Grouped %>%
dplyr::filter(smoke=="smoke" | smoke=="not smoke") %>%
dplyr::group_by(country) %>%
dplyr::summarise(percentage = sum(percentage)) %>%
dplyr::arrange(desc(percentage)) %>%
dplyr::select(country)
ranking = as.vector(ordered_countries$country)
ranking = (ordered_countries$country)
smoking_col <- c("red1","salmon","green3")
g1 = ggplot(Grouped,
aes(x = country,
y = percentage ,
fill = smoke))+
geom_col(stat="identity",position = position_fill(reverse = TRUE))+
scale_fill_manual(values = smoking_col ,limits = c("smoke", "vaping" ,"not smoke" ),
breaks = c("smoke", "vaping" , "not smoke" ),
labels = c("smoke", "vaping" , "not smoke" ))+
coord_flip() +
theme_light()+
theme(legend.position="none",axis.title.y=element_blank(),axis.title.x=element_blank()) +
theme(axis.text.y=element_text(size=13, angle=0,hjust=0,vjust=0) , axis.text.x=element_text(size=13)) +
scale_y_continuous(labels = percent)
g1
g2 = ggplot(df, aes(x = country))+
geom_bar(aes(y = (..count..))) +
geom_text(size = 4.75, aes(y = ((..count..)), label = (..count..)), stat = "count", hjust = -0.15) +
coord_flip() +
theme_minimal()+
theme(legend.position="none",
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.text.x=element_text(size=13))+
expand_limits(y=c(0,1300))
grid.arrange(g1,g2, ncol=2, widths = c(3,1.2))
结果:
Grouped
# A tibble: 12 × 5
# Groups: country [4]
country smoke n summed percentage
<fct> <fct> <int> <int> <dbl>
1 USA smoke 4 13 0.308
2 USA not smoke 3 13 0.231
3 USA vaping 6 13 0.462
4 UK smoke 4 13 0.308
5 UK not smoke 6 13 0.462
6 UK vaping 3 13 0.231
7 FRANCE smoke 9 17 0.529
8 FRANCE not smoke 3 17 0.176
9 FRANCE vaping 5 17 0.294
10 GERMANY smoke 2 7 0.286
11 GERMANY not smoke 4 7 0.571
12 GERMANY vaping 1 7 0.143
这是一种稍微简化代码的方法,并使用
reorder
按吸烟和电子烟的比例(总和)对 country
进行排序:
library(dplyr, warn = FALSE)
library(ggplot2)
Grouped <- df %>%
mutate(smoke = factor(smoke, levels = c("smoke", "vaping", "not smoke"))) |>
count(country, smoke) %>%
mutate(percentage = n / sum(n), .by = country) |>
mutate(
country = reorder(
country,
ifelse(smoke %in% c("smoke", "vaping"), percentage, NA),
FUN = sum, na.rm = TRUE
)
)
smoking_col <- c("red1", "salmon", "green3")
g1 <- ggplot(
Grouped,
aes(
x = country,
y = percentage,
fill = smoke
)
) +
geom_col(position = position_stack(reverse = TRUE)) +
scale_fill_manual(
values = setNames(smoking_col, c("smoke", "vaping", "not smoke"))
) +
coord_flip() +
theme_light() +
theme(
legend.position = "none",
axis.title.y = element_blank(),
axis.title.x = element_blank()
) +
theme(
axis.text.y = element_text(size = 13, angle = 0, hjust = 0, vjust = 0),
axis.text.x = element_text(size = 13)
) +
scale_y_continuous(labels = scales::percent)
g2 <- Grouped |>
count(country, wt = n) |>
ggplot(aes(x = country, y = n)) +
geom_col() +
geom_label(aes(label = n), hjust = 0, size = 4.75, fill = NA, label.size = 0) +
coord_flip() +
theme_minimal() +
theme(
legend.position = "none",
legend.text = element_text(size = 15),
legend.title = element_text(size = 15),
axis.text.x = element_text(size = 13)
) +
scale_y_continuous(expand = expansion(add = c(0, 5)))
gridExtra::grid.arrange(g1, g2, ncol = 2, widths = c(3, 2))