如何在 R ggplot2 中将多个图与常见的连续刻度图例结合起来?

问题描述 投票:0回答:1

我的数据框如下:

dput(Book1)
structure(list(Phenotype = c("WHR", "WHR", "WHR", "WHR", "WHR", 
"WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR", 
"WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "BMI", "BMI", "BMI", 
"BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", 
"BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BF", 
"BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", 
"BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "WC", "WC", "WC", 
"WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", 
"WC", "WC", "WC", "WC", "WC", "WC"), Feature = c("E1", "E2", 
"E3", "E4", "E5", "prs_add1", "prs_add2", "prs_add3", "prs_add4", 
"prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3", "prs_gxe4", "prs_gxe5", 
"prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3", "prs_gxe4.E4", "prs_gxe5.E5", 
"E1", "E2", "E3", "E4", "E5", "prs_add1", "prs_add2", "prs_add3", 
"prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3", "prs_gxe4", 
"prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3", "prs_gxe4.E4", 
"prs_gxe5.E5", "E1", "E2", "E3", "E4", "E5", "prs_add1", "prs_add2", 
"prs_add3", "prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3", 
"prs_gxe4", "prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3", 
"prs_gxe4.E4", "prs_gxe5.E5", "E1", "E2", "E3", "E4", "E5", "prs_add1", 
"prs_add2", "prs_add3", "prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2", 
"prs_gxe3", "prs_gxe4", "prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2", 
"prs_gxe3.E3", "prs_gxe4.E4", "prs_gxe5.E5"), Estimate = c(-0.046927378, 
0.021501452, -0.060576561, 0.026664695, 0.104747758, 0.07074638, 
0.023649159, NA, 0.032806854, 0.035131611, NA, NA, NA, NA, NA, 
0.007299072, NA, NA, 0.011815316, 0.010481616, -0.05835143, NA, 
-0.08610439, NA, 0.06877879, 0.16459694, NA, 0.06961791, 0.03775922, 
NA, -0.01066025, NA, 0.01537474, NA, NA, NA, NA, 0.01666013, 
0.02455695, 0.02196081, -0.047891691, NA, -0.095327227, 0.012395405, 
0.058295327, 0.096737663, NA, 0.047297343, 0.027082146, 0.032815238, 
-0.007341486, NA, 0.010720534, NA, NA, NA, NA, 0.009458876, 0.014808779, 
0.011568591, -0.059678999, 0.013280999, -0.096726335, 0.021585317, 
0.10152862, 0.111473237, -0.003207717, 0.048416066, 0.026224904, 
0.038620466, -0.003613794, 0.001623824, 0.009766366, -0.002981712, 
0.002548997, NA, NA, 178.444778528, 390.731771843, 220.388712222
), pvalue = c(1.144351e-24, 4.80777e-07, 1.086296e-46, 7.993226e-07, 
2.285532e-85, 0.0003647517, 0.08261897, NA, 0.017708, 0.003835903, 
NA, NA, NA, NA, NA, 0.09759815, NA, NA, 0.02063316, 0.03890773, 
1.404994e-23, NA, 5.218511e-28, NA, 6.079532e-27, 1.217357e-08, 
NA, 0.001499233, 0.08749246, NA, 0.05960327, NA, 0.007881778, 
NA, NA, NA, NA, 0.0325944, 1.3024e-05, 0.0003457601, 5.504658e-28, 
NA, 2.312629e-48, 0.006481173, 1.458908e-38, 6.315281e-05, NA, 
0.001850213, 0.09934841, 0.03273855, 0.08196993, NA, 0.01139238, 
NA, NA, NA, NA, 0.1444197, 0.0004105521, 0.005805464, 1.302842e-31, 
0.007986413, 7.798806e-46, 4.979966e-05, 8.837303e-83, 6.544795e-05, 
0.8587647, 0.004068312, 0.1263797, 0.01614415, 0.4730805, 0.743117, 
0.0488486, 0.5601606, 0.630477, NA, NA, 0.03294762, 4.632479e-07, 
0.00237151)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-80L))

我的 R 代码用于绘制上述数据框中 4 个表型中的 1 个(即 WHR,通过

features_df <- features_df[features_df$Phenotype=="WHR",]
删除与 BMI、BF 和 WC 相关的所有条目)如下:

features_df <- Book1
features_df <- features_df[features_df$Phenotype=="WHR",]

# Create a new column with p-value intervals
features_df$p_interval <- cut(features_df$pvalue,
                         breaks = c(0, 0.0005, 0.005, 0.05, 1),
                         labels = c("< 0.0005", "0.005", "0.05", "1"),
                         include.lowest = TRUE)

# Define your custom legend values and sizes
legend_data <- data.frame(
  p_value_interval = c("< 0.0005", "0.005", "0.05", "1"),
  size = c(15,13,11,9)
)

library(dplyr)


# Adjust your dataframe by splitting the 'Feature' column more accurately
features_df <- features_df %>%
  mutate(
    Category = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), "prs_gxe.E",
                      gsub("\\d+$", "", Feature)),  # Remove trailing digits generally, special case for prs_gxe.E
    Index = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), sub("prs_gxe(\\d+)\\.E\\d+$", "\\1", Feature),
                   gsub("\\D+", "", Feature))  # Extract the number part, special handling for prs_gxe.E
  )


features_df <- features_df %>%
  mutate(Index = case_when(
    Index == 1 ~ "HD",
    Index == 2 ~ "NS",
    Index == 3 ~ "PA",
    Index == 4 ~ "PALC",
    TRUE ~ "SMK"  # Default case if none of the above conditions are met
  ))

features_df <- features_df %>%
  mutate(
    Category = case_when(
      Category == "prs_gxe.E" ~ "PRS[gxe]%*%E",   # Storing as character strings
      Category == "prs_gxe" ~ "'PRS'[gxe]",
      Category == "prs_add" ~ "'PRS'[add]",
      Category == "E" ~ "'E'",
      TRUE ~ Category  # Keeps other categories unchanged
    )
  )


library(ggplot2)
WHR <- ggplot(data = features_df, aes(x = Index, y = Category, fill = Estimate, size = p_interval)) +
  geom_tile(color = "white", fill = "white", size=1) +
  geom_point(shape = 22, color = "white", stroke = 0) +
  
  
  geom_hline(yintercept = seq_along(unique(features_df$Category)) + 0.5, color = "grey") +
  geom_vline(xintercept = seq_along(unique(features_df$Index)) + 0.5, color = "grey") +
  geom_vline(xintercept = 0.5, color = "grey")+
  geom_hline(yintercept = 0.5, color = "grey")+
  geom_text(data = subset(features_df,(pvalue <= 0.05)&(pvalue > 0.005)),
            aes(label = "\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (pvalue <= 0.005)&(pvalue > 0.0005)),
            aes(label = "\u273B\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (pvalue <= 0.0005)),
            aes(label = "\u273B\u273B\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (is.na(Estimate))),
            aes(label = "NA"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE)+
  scale_fill_gradientn(colours = c("red4", "white", "steelblue"),
                       limit = c(-max(abs(features_df$Estimate))-0.000, max(abs(features_df$Estimate))+0.000),
                       guide = guide_colorbar(title = "Estimate",
                                              label.position = "right",
                                              barwidth = 1,
                                              #barheight = 15,
                                              barheight = 10,
                                              title.position = "top",
                                              order = 1)) +
  scale_size_manual(values = legend_data$size,
                    breaks = legend_data$p_value_interval,
                    labels = legend_data$p_value_interval,
                    guide = guide_legend(title = "p value",
                                         label.position = "right",
                                         title.position = "top")) +
  scale_y_discrete(labels = function(x) parse(text = x))+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45,
                                   hjust = 1,
                                   vjust = 1,
                                   size = 12,
                                   colour = "black"),
        axis.text.y = element_text(size=12, colour = "black"),
        axis.title.y = element_text(size = 0),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        legend.key.width = unit(0.1, "cm"),
        legend.key.height = unit(0.1, "cm"),
        legend.box = "horizontal",
        legend.spacing.y = unit(0.2, "cm"),
        legend.position = "right",
        legend.box.margin = margin(b=-10, t=-10, l=-1, r=11),
        legend.box.just = ("center"),
        plot.margin = margin(b=20, t=20, l=20, r=20)) +
  labs(x = features_df$Phenotype, y = "", fill = "Estimate", size = "p value") +
  guides(size = guide_legend(reverse = F,
                             label.position = "right",
                             title.position = "top",
                             override.aes = list(colour="grey",
                                                 shape=15,
                                                 fill="white",
                                                 size=legend_data$size-1)))+
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
        strip.text = element_text(size = 12))

WHR

这个 R 代码将给出以下图: THIS IS MY PLOT

我的兴趣是一张 2x2 的全合图(针对所有 4 种表型),对所有四种表型使用通用的估计值和 p 值图例。我无法单独绘制这 4 个图,因为当我将它们组合起来时,我没有共同的图例。考虑到代码的复杂性,尝试像

grob
这样的选项非常具有挑战性。请帮助我创建全合一图(所有表型)

r ggplot2
1个回答
0
投票

代码:

features_df <- Book1
###########################################features_df <- features_df[features_df$Phenotype=="WHR",]
# Create a new column with p-value intervals
features_df$p_interval <- cut(features_df$pvalue,
                         breaks = c(0, 0.0005, 0.005, 0.05, 1),
                         labels = c("< 0.0005", "0.005", "0.05", "1"),
                         include.lowest = TRUE)

# Define your custom legend values and sizes
legend_data <- data.frame(
  p_value_interval = c("< 0.0005", "0.005", "0.05", "1"),
  size = c(15,13,11,9)
)

library(dplyr)


# Adjust your dataframe by splitting the 'Feature' column more accurately
features_df <- features_df %>%
  mutate(
    Category = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), "prs_gxe.E",
                      gsub("\\d+$", "", Feature)),  # Remove trailing digits generally, special case for prs_gxe.E
    Index = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), sub("prs_gxe(\\d+)\\.E\\d+$", "\\1", Feature),
                   gsub("\\D+", "", Feature))  # Extract the number part, special handling for prs_gxe.E
  )


features_df <- features_df %>%
  mutate(Index = case_when(
    Index == 1 ~ "HD",
    Index == 2 ~ "NS",
    Index == 3 ~ "PA",
    Index == 4 ~ "PALC",
    TRUE ~ "SMK"  # Default case if none of the above conditions are met
  ))

features_df <- features_df %>%
  mutate(
    Category = case_when(
      Category == "prs_gxe.E" ~ "PRS[gxe]%*%E",   # Storing as character strings
      Category == "prs_gxe" ~ "'PRS'[gxe]",
      Category == "prs_add" ~ "'PRS'[add]",
      Category == "E" ~ "'E'",
      TRUE ~ Category  # Keeps other categories unchanged
    )
  )


library(ggplot2)
ggplot(data = features_df, aes(x = Index, y = Category, fill = Estimate, size = p_interval)) +
  geom_tile(color = "white", fill = "white", size=1) +
  geom_point(shape = 22, color = "white", stroke = 0) +
  
  
  geom_hline(yintercept = seq_along(unique(features_df$Category)) + 0.5, color = "grey") +
  geom_vline(xintercept = seq_along(unique(features_df$Index)) + 0.5, color = "grey") +
  geom_vline(xintercept = 0.5, color = "grey")+
  geom_hline(yintercept = 0.5, color = "grey")+
  geom_text(data = subset(features_df,(pvalue <= 0.05)&(pvalue > 0.005)),
            aes(label = "\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (pvalue <= 0.005)&(pvalue > 0.0005)),
            aes(label = "\u273B\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (pvalue <= 0.0005)),
            aes(label = "\u273B\u273B\u273B"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE) +
  geom_text(data = subset(features_df, (is.na(Estimate))),
            aes(label = "NA"),
            size = 4, vjust = 0.5, hjust = 0.5,
            show.legend = FALSE)+
  scale_fill_gradientn(colours = c("red4", "white", "steelblue"),
                       limit = c(-max(abs(features_df$Estimate))-0.000, max(abs(features_df$Estimate))+0.000),
                       guide = guide_colorbar(title = "Estimate",
                                              label.position = "right",
                                              barwidth = 1,
                                              #barheight = 15,
                                              barheight = 10,
                                              title.position = "top",
                                              order = 1)) +
  scale_size_manual(values = legend_data$size,
                    breaks = legend_data$p_value_interval,
                    labels = legend_data$p_value_interval,
                    guide = guide_legend(title = "p value",
                                         label.position = "right",
                                         title.position = "top")) +
  scale_y_discrete(labels = function(x) parse(text = x))+
  facet_wrap(~Phenotype, ncol = 2) +###########################
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45,
                                   hjust = 1,
                                   vjust = 1,
                                   size = 12,
                                   colour = "black"),
        axis.text.y = element_text(size=12, colour = "black"),
        axis.title.y = element_text(size = 0),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        legend.key.width = unit(0.1, "cm"),
        legend.key.height = unit(0.1, "cm"),
        legend.box = "horizontal",
        legend.spacing.y = unit(0.2, "cm"),
        legend.position = "right",
        legend.box.margin = margin(b=-10, t=-10, l=-1, r=11),
        legend.box.just = ("center"),
        plot.margin = margin(b=20, t=20, l=20, r=20)) +
  labs(x = "", y = "", fill = "Estimate", size = "p value") +
  guides(size = guide_legend(reverse = F,
                             label.position = "right",
                             title.position = "top",
                             override.aes = list(colour="grey",
                                                 shape=15,
                                                 fill="white",
                                                 size=legend_data$size-1)))+
  theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
        strip.text = element_text(size = 12))

剧情: PLOT MODIFIED

这里我做了以下编辑:

  1. 已评论
    #features_df <- features_df[features_df$Phenotype=="WHR",]
  2. 添加了
     facet_wrap(~Phenotype, ncol = 2) +
    将绘图区域划分为 4 个 2 列
  3. 删除了 x 标签
    labs(x = "", y = "", fill = "Estimate", size = "p value") +
© www.soinside.com 2019 - 2024. All rights reserved.