我的数据框如下:
dput(Book1)
structure(list(Phenotype = c("WHR", "WHR", "WHR", "WHR", "WHR",
"WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "WHR",
"WHR", "WHR", "WHR", "WHR", "WHR", "WHR", "BMI", "BMI", "BMI",
"BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI",
"BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BMI", "BF",
"BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF",
"BF", "BF", "BF", "BF", "BF", "BF", "BF", "BF", "WC", "WC", "WC",
"WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC", "WC",
"WC", "WC", "WC", "WC", "WC", "WC"), Feature = c("E1", "E2",
"E3", "E4", "E5", "prs_add1", "prs_add2", "prs_add3", "prs_add4",
"prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3", "prs_gxe4", "prs_gxe5",
"prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3", "prs_gxe4.E4", "prs_gxe5.E5",
"E1", "E2", "E3", "E4", "E5", "prs_add1", "prs_add2", "prs_add3",
"prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3", "prs_gxe4",
"prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3", "prs_gxe4.E4",
"prs_gxe5.E5", "E1", "E2", "E3", "E4", "E5", "prs_add1", "prs_add2",
"prs_add3", "prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2", "prs_gxe3",
"prs_gxe4", "prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2", "prs_gxe3.E3",
"prs_gxe4.E4", "prs_gxe5.E5", "E1", "E2", "E3", "E4", "E5", "prs_add1",
"prs_add2", "prs_add3", "prs_add4", "prs_add5", "prs_gxe1", "prs_gxe2",
"prs_gxe3", "prs_gxe4", "prs_gxe5", "prs_gxe1.E1", "prs_gxe2.E2",
"prs_gxe3.E3", "prs_gxe4.E4", "prs_gxe5.E5"), Estimate = c(-0.046927378,
0.021501452, -0.060576561, 0.026664695, 0.104747758, 0.07074638,
0.023649159, NA, 0.032806854, 0.035131611, NA, NA, NA, NA, NA,
0.007299072, NA, NA, 0.011815316, 0.010481616, -0.05835143, NA,
-0.08610439, NA, 0.06877879, 0.16459694, NA, 0.06961791, 0.03775922,
NA, -0.01066025, NA, 0.01537474, NA, NA, NA, NA, 0.01666013,
0.02455695, 0.02196081, -0.047891691, NA, -0.095327227, 0.012395405,
0.058295327, 0.096737663, NA, 0.047297343, 0.027082146, 0.032815238,
-0.007341486, NA, 0.010720534, NA, NA, NA, NA, 0.009458876, 0.014808779,
0.011568591, -0.059678999, 0.013280999, -0.096726335, 0.021585317,
0.10152862, 0.111473237, -0.003207717, 0.048416066, 0.026224904,
0.038620466, -0.003613794, 0.001623824, 0.009766366, -0.002981712,
0.002548997, NA, NA, 178.444778528, 390.731771843, 220.388712222
), pvalue = c(1.144351e-24, 4.80777e-07, 1.086296e-46, 7.993226e-07,
2.285532e-85, 0.0003647517, 0.08261897, NA, 0.017708, 0.003835903,
NA, NA, NA, NA, NA, 0.09759815, NA, NA, 0.02063316, 0.03890773,
1.404994e-23, NA, 5.218511e-28, NA, 6.079532e-27, 1.217357e-08,
NA, 0.001499233, 0.08749246, NA, 0.05960327, NA, 0.007881778,
NA, NA, NA, NA, 0.0325944, 1.3024e-05, 0.0003457601, 5.504658e-28,
NA, 2.312629e-48, 0.006481173, 1.458908e-38, 6.315281e-05, NA,
0.001850213, 0.09934841, 0.03273855, 0.08196993, NA, 0.01139238,
NA, NA, NA, NA, 0.1444197, 0.0004105521, 0.005805464, 1.302842e-31,
0.007986413, 7.798806e-46, 4.979966e-05, 8.837303e-83, 6.544795e-05,
0.8587647, 0.004068312, 0.1263797, 0.01614415, 0.4730805, 0.743117,
0.0488486, 0.5601606, 0.630477, NA, NA, 0.03294762, 4.632479e-07,
0.00237151)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-80L))
我的 R 代码用于绘制上述数据框中 4 个表型中的 1 个(即 WHR,通过
features_df <- features_df[features_df$Phenotype=="WHR",]
删除与 BMI、BF 和 WC 相关的所有条目)如下:
features_df <- Book1
features_df <- features_df[features_df$Phenotype=="WHR",]
# Create a new column with p-value intervals
features_df$p_interval <- cut(features_df$pvalue,
breaks = c(0, 0.0005, 0.005, 0.05, 1),
labels = c("< 0.0005", "0.005", "0.05", "1"),
include.lowest = TRUE)
# Define your custom legend values and sizes
legend_data <- data.frame(
p_value_interval = c("< 0.0005", "0.005", "0.05", "1"),
size = c(15,13,11,9)
)
library(dplyr)
# Adjust your dataframe by splitting the 'Feature' column more accurately
features_df <- features_df %>%
mutate(
Category = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), "prs_gxe.E",
gsub("\\d+$", "", Feature)), # Remove trailing digits generally, special case for prs_gxe.E
Index = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), sub("prs_gxe(\\d+)\\.E\\d+$", "\\1", Feature),
gsub("\\D+", "", Feature)) # Extract the number part, special handling for prs_gxe.E
)
features_df <- features_df %>%
mutate(Index = case_when(
Index == 1 ~ "HD",
Index == 2 ~ "NS",
Index == 3 ~ "PA",
Index == 4 ~ "PALC",
TRUE ~ "SMK" # Default case if none of the above conditions are met
))
features_df <- features_df %>%
mutate(
Category = case_when(
Category == "prs_gxe.E" ~ "PRS[gxe]%*%E", # Storing as character strings
Category == "prs_gxe" ~ "'PRS'[gxe]",
Category == "prs_add" ~ "'PRS'[add]",
Category == "E" ~ "'E'",
TRUE ~ Category # Keeps other categories unchanged
)
)
library(ggplot2)
WHR <- ggplot(data = features_df, aes(x = Index, y = Category, fill = Estimate, size = p_interval)) +
geom_tile(color = "white", fill = "white", size=1) +
geom_point(shape = 22, color = "white", stroke = 0) +
geom_hline(yintercept = seq_along(unique(features_df$Category)) + 0.5, color = "grey") +
geom_vline(xintercept = seq_along(unique(features_df$Index)) + 0.5, color = "grey") +
geom_vline(xintercept = 0.5, color = "grey")+
geom_hline(yintercept = 0.5, color = "grey")+
geom_text(data = subset(features_df,(pvalue <= 0.05)&(pvalue > 0.005)),
aes(label = "\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (pvalue <= 0.005)&(pvalue > 0.0005)),
aes(label = "\u273B\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (pvalue <= 0.0005)),
aes(label = "\u273B\u273B\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (is.na(Estimate))),
aes(label = "NA"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE)+
scale_fill_gradientn(colours = c("red4", "white", "steelblue"),
limit = c(-max(abs(features_df$Estimate))-0.000, max(abs(features_df$Estimate))+0.000),
guide = guide_colorbar(title = "Estimate",
label.position = "right",
barwidth = 1,
#barheight = 15,
barheight = 10,
title.position = "top",
order = 1)) +
scale_size_manual(values = legend_data$size,
breaks = legend_data$p_value_interval,
labels = legend_data$p_value_interval,
guide = guide_legend(title = "p value",
label.position = "right",
title.position = "top")) +
scale_y_discrete(labels = function(x) parse(text = x))+
theme_minimal() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1,
vjust = 1,
size = 12,
colour = "black"),
axis.text.y = element_text(size=12, colour = "black"),
axis.title.y = element_text(size = 0),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.key.width = unit(0.1, "cm"),
legend.key.height = unit(0.1, "cm"),
legend.box = "horizontal",
legend.spacing.y = unit(0.2, "cm"),
legend.position = "right",
legend.box.margin = margin(b=-10, t=-10, l=-1, r=11),
legend.box.just = ("center"),
plot.margin = margin(b=20, t=20, l=20, r=20)) +
labs(x = features_df$Phenotype, y = "", fill = "Estimate", size = "p value") +
guides(size = guide_legend(reverse = F,
label.position = "right",
title.position = "top",
override.aes = list(colour="grey",
shape=15,
fill="white",
size=legend_data$size-1)))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
strip.text = element_text(size = 12))
WHR
我的兴趣是一张 2x2 的全合图(针对所有 4 种表型),对所有四种表型使用通用的估计值和 p 值图例。我无法单独绘制这 4 个图,因为当我将它们组合起来时,我没有共同的图例。考虑到代码的复杂性,尝试像
grob
这样的选项非常具有挑战性。请帮助我创建全合一图(所有表型)
代码:
features_df <- Book1
###########################################features_df <- features_df[features_df$Phenotype=="WHR",]
# Create a new column with p-value intervals
features_df$p_interval <- cut(features_df$pvalue,
breaks = c(0, 0.0005, 0.005, 0.05, 1),
labels = c("< 0.0005", "0.005", "0.05", "1"),
include.lowest = TRUE)
# Define your custom legend values and sizes
legend_data <- data.frame(
p_value_interval = c("< 0.0005", "0.005", "0.05", "1"),
size = c(15,13,11,9)
)
library(dplyr)
# Adjust your dataframe by splitting the 'Feature' column more accurately
features_df <- features_df %>%
mutate(
Category = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), "prs_gxe.E",
gsub("\\d+$", "", Feature)), # Remove trailing digits generally, special case for prs_gxe.E
Index = ifelse(grepl("prs_gxe\\d+\\.E\\d+$", Feature), sub("prs_gxe(\\d+)\\.E\\d+$", "\\1", Feature),
gsub("\\D+", "", Feature)) # Extract the number part, special handling for prs_gxe.E
)
features_df <- features_df %>%
mutate(Index = case_when(
Index == 1 ~ "HD",
Index == 2 ~ "NS",
Index == 3 ~ "PA",
Index == 4 ~ "PALC",
TRUE ~ "SMK" # Default case if none of the above conditions are met
))
features_df <- features_df %>%
mutate(
Category = case_when(
Category == "prs_gxe.E" ~ "PRS[gxe]%*%E", # Storing as character strings
Category == "prs_gxe" ~ "'PRS'[gxe]",
Category == "prs_add" ~ "'PRS'[add]",
Category == "E" ~ "'E'",
TRUE ~ Category # Keeps other categories unchanged
)
)
library(ggplot2)
ggplot(data = features_df, aes(x = Index, y = Category, fill = Estimate, size = p_interval)) +
geom_tile(color = "white", fill = "white", size=1) +
geom_point(shape = 22, color = "white", stroke = 0) +
geom_hline(yintercept = seq_along(unique(features_df$Category)) + 0.5, color = "grey") +
geom_vline(xintercept = seq_along(unique(features_df$Index)) + 0.5, color = "grey") +
geom_vline(xintercept = 0.5, color = "grey")+
geom_hline(yintercept = 0.5, color = "grey")+
geom_text(data = subset(features_df,(pvalue <= 0.05)&(pvalue > 0.005)),
aes(label = "\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (pvalue <= 0.005)&(pvalue > 0.0005)),
aes(label = "\u273B\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (pvalue <= 0.0005)),
aes(label = "\u273B\u273B\u273B"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE) +
geom_text(data = subset(features_df, (is.na(Estimate))),
aes(label = "NA"),
size = 4, vjust = 0.5, hjust = 0.5,
show.legend = FALSE)+
scale_fill_gradientn(colours = c("red4", "white", "steelblue"),
limit = c(-max(abs(features_df$Estimate))-0.000, max(abs(features_df$Estimate))+0.000),
guide = guide_colorbar(title = "Estimate",
label.position = "right",
barwidth = 1,
#barheight = 15,
barheight = 10,
title.position = "top",
order = 1)) +
scale_size_manual(values = legend_data$size,
breaks = legend_data$p_value_interval,
labels = legend_data$p_value_interval,
guide = guide_legend(title = "p value",
label.position = "right",
title.position = "top")) +
scale_y_discrete(labels = function(x) parse(text = x))+
facet_wrap(~Phenotype, ncol = 2) +###########################
theme_minimal() +
theme(axis.text.x = element_text(angle = 45,
hjust = 1,
vjust = 1,
size = 12,
colour = "black"),
axis.text.y = element_text(size=12, colour = "black"),
axis.title.y = element_text(size = 0),
legend.text = element_text(size = 12),
legend.title = element_text(size = 12),
legend.key.width = unit(0.1, "cm"),
legend.key.height = unit(0.1, "cm"),
legend.box = "horizontal",
legend.spacing.y = unit(0.2, "cm"),
legend.position = "right",
legend.box.margin = margin(b=-10, t=-10, l=-1, r=11),
legend.box.just = ("center"),
plot.margin = margin(b=20, t=20, l=20, r=20)) +
labs(x = "", y = "", fill = "Estimate", size = "p value") +
guides(size = guide_legend(reverse = F,
label.position = "right",
title.position = "top",
override.aes = list(colour="grey",
shape=15,
fill="white",
size=legend_data$size-1)))+
theme(panel.border = element_rect(colour = "black", fill=NA, size=1),
strip.text = element_text(size = 12))
这里我做了以下编辑:
#features_df <- features_df[features_df$Phenotype=="WHR",]
facet_wrap(~Phenotype, ncol = 2) +
将绘图区域划分为 4 个 2 列labs(x = "", y = "", fill = "Estimate", size = "p value") +