使用 ggplot2 根据 R 中水平百分比的组合对两个图进行排序

Question

我在 R 中有一个数据框，其中包含两列国家和吸烟这两个因素。

我想根据“烟雾”和“电子烟”总和的递减（按国家/地区）更改左图的排序（参见图像图）。现在它没有排序。例如，根据图中的模拟数据，法国必须位于顶部和下方，然后是美国，然后是英国，最后是德国。

在第二个图中还要对国家进行排序。即必须是法国、美国、英国、德国。

library(dplyr)
library(ggplot2)
library(forcats)
set.seed(123)  # Setting seed for reproducibility
levels_country  = c('USA', 'UK', 'FRANCE', 'GERMANY')
country = sample(levels_country, 50, replace = TRUE)
levels_smoke = c('smoke', 'not smoke', 'vaping')
smoke   = sample(levels_smoke, 50, replace = TRUE)
df = tibble(country,smoke) %>%
  mutate(
    country = factor(country, levels = levels_country),
    smoke = factor(smoke, levels = levels_smoke)
  )


Grouped = df %>% 
  dplyr::group_by(country,smoke) %>% 
  dplyr::summarise(n = n()) %>% 
  dplyr::group_by(country) %>% 
  dplyr::mutate(summed=sum(n)) 

Grouped = Grouped %>% 
  dplyr::mutate(percentage = n/summed )

ordered_countries = Grouped %>%
  dplyr::filter(smoke=="smoke" | smoke=="not smoke") %>% 
  dplyr::group_by(country) %>% 
  dplyr::summarise(percentage = sum(percentage)) %>% 
  dplyr::arrange(desc(percentage)) %>% 
  dplyr::select(country)

ranking = as.vector(ordered_countries$country)
ranking = (ordered_countries$country)


smoking_col <- c("red1","salmon","green3")
g1 = ggplot(Grouped, 
            aes(x = country,
                y = percentage , 
                fill = smoke))+
  geom_col(stat="identity",position = position_fill(reverse = TRUE))+ 
  scale_fill_manual(values = smoking_col ,limits = c("smoke", "vaping"  ,"not smoke" ),
                    breaks = c("smoke", "vaping" , "not smoke" ), 
                    labels = c("smoke", "vaping" , "not smoke" ))+
  coord_flip() +
  theme_light()+
  theme(legend.position="none",axis.title.y=element_blank(),axis.title.x=element_blank()) + 
  theme(axis.text.y=element_text(size=13, angle=0,hjust=0,vjust=0) , axis.text.x=element_text(size=13)) +
  scale_y_continuous(labels = percent)
g1 

g2 = ggplot(df, aes(x = country))+
  geom_bar(aes(y = (..count..))) + 
  geom_text(size = 4.75, aes(y = ((..count..)), label = (..count..)), stat = "count", hjust = -0.15) +
  coord_flip() + 
  theme_minimal()+
  theme(legend.position="none",
        legend.text = element_text(size = 15),  
        legend.title = element_text(size = 15), 
      
        axis.text.x=element_text(size=13))+ 
  expand_limits(y=c(0,1300))



grid.arrange(g1,g2, ncol=2, widths = c(3,1.2))

结果：

Grouped
# A tibble: 12 × 5
# Groups:   country [4]
   country smoke         n summed percentage
   <fct>   <fct>     <int>  <int>      <dbl>
 1 USA     smoke         4     13      0.308
 2 USA     not smoke     3     13      0.231
 3 USA     vaping        6     13      0.462
 4 UK      smoke         4     13      0.308
 5 UK      not smoke     6     13      0.462
 6 UK      vaping        3     13      0.231
 7 FRANCE  smoke         9     17      0.529
 8 FRANCE  not smoke     3     17      0.176
 9 FRANCE  vaping        5     17      0.294
10 GERMANY smoke         2      7      0.286
11 GERMANY not smoke     4      7      0.571
12 GERMANY vaping        1      7      0.143

Answer 1

这是一种稍微简化代码的方法，并使用

reorder

按吸烟和电子烟的比例（总和）对

country

进行排序：

library(dplyr, warn = FALSE)
library(ggplot2)

Grouped <- df %>%
  mutate(smoke = factor(smoke, levels = c("smoke", "vaping", "not smoke"))) |>
  count(country, smoke) %>%
  mutate(percentage = n / sum(n), .by = country) |>
  mutate(
    country = reorder(
      country,
      ifelse(smoke %in% c("smoke", "vaping"), percentage, NA),
      FUN = sum, na.rm = TRUE
    )
  )

smoking_col <- c("red1", "salmon", "green3")

g1 <- ggplot(
  Grouped,
  aes(
    x = country,
    y = percentage,
    fill = smoke
  )
) +
  geom_col(position = position_stack(reverse = TRUE)) +
  scale_fill_manual(
    values = setNames(smoking_col, c("smoke", "vaping", "not smoke"))
  ) +
  coord_flip() +
  theme_light() +
  theme(
    legend.position = "none",
    axis.title.y = element_blank(),
    axis.title.x = element_blank()
  ) +
  theme(
    axis.text.y = element_text(size = 13, angle = 0, hjust = 0, vjust = 0),
    axis.text.x = element_text(size = 13)
  ) +
  scale_y_continuous(labels = scales::percent)

g2 <- Grouped |>
  count(country, wt = n) |>
  ggplot(aes(x = country, y = n)) +
  geom_col() +
  geom_label(aes(label = n), hjust = 0, size = 4.75, fill = NA, label.size = 0) +
  coord_flip() +
  theme_minimal() +
  theme(
    legend.position = "none",
    legend.text = element_text(size = 15),
    legend.title = element_text(size = 15),
    axis.text.x = element_text(size = 13)
  ) +
  scale_y_continuous(expand = expansion(add = c(0, 5)))

gridExtra::grid.arrange(g1, g2, ncol = 2, widths = c(3, 2))

使用 ggplot2 根据 R 中水平百分比的组合对两个图进行排序

问题描述投票：0回答：1

1个回答

最新问题

使用 ggplot2 根据 R 中水平百分比的组合对两个图进行排序

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1