我有一个由数千个点组成的散点图,由使用
facet_wrap
的因子划分。我已经使用 scale_y_break
包中的 ggbreak
成功分割了 y 轴,现在我想使用 top_g
中的 geom_text_repel
标记我选择的数据点(以向量命名,ggrepel
)。
问题是 ggrepel
将我的所有标签绘制在 y 轴中断下方和上方的坐标空间上。由于所有要标记的点都位于断点上方,因此我在图的下半部分得到了很多带有向上的长线的标签。 scale_y_break
或 geom_text_repel
中的哪些设置可以帮助我在绘图的上部空间仅绘制一次标签?
我有一个如下所示的数据框:
Gene Patient Estimate Pr(>|t|) Color
A2M A2M 13 -0.09364448 0.82082825 NS or FC < 0.5
A2M1 A2M 19 -0.60473507 0.06386751 FC > 0.5
A2M2 A2M 24 0.19832106 0.49231696 NS or FC < 0.5
A2M3 A2M 30 0.38843438 0.24560332 NS or FC < 0.5
A4GALT A4GALT 13 0.45057750 0.07253149 NS or FC < 0.5
A4GALT1 A4GALT 19 0.22712101 0.17208382 NS or FC < 0.5
A4GALT2 A4GALT 24 -0.40367138 0.03971972 p < 0.05
A4GALT3 A4GALT 30 0.45064877 0.04509195 p < 0.05
AAAS AAAS 13 -0.40079848 0.09217090 NS or FC < 0.5
AAAS1 AAAS 19 0.26819152 0.02358786 p < 0.05
AAAS2 AAAS 24 0.07455519 0.69811176 NS or FC < 0.5
AAAS3 AAAS 30 0.03123206 0.91063219 NS or FC < 0.5
我用来创建绘图的代码如下:
# Set parameters for the plot
fold_change_cutoff <- 0.5
volcano_colours <- c("grey","lightblue","#f0a3db","#8F54A0","#411945")
names(volcano_colours) <- levels(results_by_patient$Color)
top_g <- c("IGFBP3", "MT1X", "MT2A", "CTHRC1", "SLC25A37", "SFTPA1", "MOCOS", "IGHG2", "IGHG4", "IGHG3", "IDH3A", "IGHG1")
# Generate the plot
volcano_plot_overall <- ggplot(results_by_patient,
aes(x = Estimate, y = -log10(`Pr(>|t|)`),
color = Color, label = Gene)) +
geom_vline(xintercept = c(fold_change_cutoff, -fold_change_cutoff), lty = "dashed") +
geom_hline(yintercept = -log10(p_value_cutoff), lty = "dashed") +
geom_point() +
labs(x = "Enriched in TI <- log2(FC) -> Enriched in TC",
y = bquote(-log[10]("p-value")),
color = "Significance") +
scale_color_manual(values = volcano_colours,
guide = guide_legend(override.aes = list(size = 4))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
geom_text_repel(data = subset(results_by_patient, Gene %in% top_g & abs(Estimate) > fold_change_cutoff),# & Qvalue_Global < q_value_cutoff),
size = 4, point.padding = 0.15, color = "black",
min.segment.length = .1, box.padding = .2,
max.overlaps = 50) +
theme_classic(base_size = 12) +
theme(legend.position = "bottom") +
facet_wrap(~Patient, nrow = 1) +
ggbreak::scale_y_break(breaks = rep(-log10(p_value_cutoff),2),scales = 4, expand = FALSE)
# View the plot
volcano_plot_overall
向量 top_g 包含我想要绘制的基因的名称。
> top_g
[1] "IGFBP3" "MT1X" "MT2A" "CTHRC1" "SLC25A37" "SFTPA1" "MOCOS" "IGHG2" "IGHG4" "IGHG3" "IDH3A" "IGHG1"
我尝试了一种不同的绘制标签的方法,创建一个名为
Label
的列,其中包含我想要绘制的数据点的标签字符串,但带有我不想绘制的基因的 NA
值。我也尝试删除侧面,但这没有帮助。
top_g_filter <- results_by_patient$Gene %in% top_g & abs(results_by_patient$Estimate) > fold_change_cutoff
results_by_patient$Label[top_g_filter] <- results_by_patient[top_g_filter, "Gene"]
volcano_plot_overall_2 <- ggplot(results_by_patient,
aes(x = Estimate, y = -log10(`Pr(>|t|)`),
color = Color, label = Label)) +
geom_vline(xintercept = c(fold_change_cutoff, -fold_change_cutoff), lty = "dashed") +
geom_hline(yintercept = -log10(p_value_cutoff), lty = "dashed") +
geom_point() +
labs(x = "Enriched in TI <- log2(FC) -> Enriched in TC",
y = bquote(-log[10]("p-value")),
color = "Significance") +
scale_color_manual(values = volcano_colours,
guide = guide_legend(override.aes = list(size = 4))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
geom_text_repel(size = 4, point.padding = 0.15, color = "black",
min.segment.length = .1, box.padding = .2,
max.overlaps = 50) +
theme_classic(base_size = 12) +
theme(legend.position = "bottom") +
ggbreak::scale_y_break(breaks = rep(-log10(p_value_cutoff),2),scales = 4, expand = FALSE)
volcano_plot_overall_2
如果有人想要导入包含 37560 行中的 96 行的数据样本,这里是数据框的
dump
(不包括 label
列):
results_by_patient_for_export <-
structure(list(Gene = c("IGFBP3", "SFTPA1", "MT2A", "SLC25A37",
"MOCOS", "CTHRC1", "IDH3A", "MT1X", "IGHG1", "IGHG2", "IGHG3",
"IGHG4", "IGFBP3", "SFTPA1", "MT2A", "SLC25A37", "MOCOS", "CTHRC1",
"IDH3A", "MT1X", "IGHG1", "IGHG2", "IGHG3", "IGHG4", "IGFBP3",
"SFTPA1", "MT2A", "SLC25A37", "MOCOS", "CTHRC1", "IDH3A", "MT1X",
"IGHG1", "IGHG2", "IGHG3", "IGHG4", "IGFBP3", "SFTPA1", "MT2A",
"SLC25A37", "MOCOS", "CTHRC1", "IDH3A", "MT1X", "IGHG1", "IGHG2",
"IGHG3", "IGHG4", "TPD52L2", "SPOP", "YAE1", "KIAA0319L", "HECTD4",
"TRAPPC12", "GSTM2", "PRSS16", "VAMP3", "CXCL1", "RAB38", "MRPS17",
"MROH6", "LETMD1", "PHC3", "CMTR1", "KHDRBS1", "GNB2", "RFNG",
"BTRC", "ARHGAP25", "DPYD", "CNIH1", "SPIB", "PRKCZ", "POLK",
"PIGQ", "PDIA4", "SLC6A8", "FOLR1", "MRPL53", "CUL9", "WDR25",
"CCDC82", "DHX38", "TNFRSF21", "PPM1B", "RAVER1", "MTR", "ZC3H15",
"MYH9", "ANKH", "SPARC", "CCND1", "TOLLIP", "TIA1", "PDCD10",
"PPP2R5B"), Patient = c("13", "13", "13", "13", "13", "13", "13",
"13", "13", "13", "13", "13", "19", "19", "19", "19", "19", "19",
"19", "19", "19", "19", "19", "19", "24", "24", "24", "24", "24",
"24", "24", "24", "24", "24", "24", "24", "30", "30", "30", "30",
"30", "30", "30", "30", "30", "30", "30", "30", "19", "24", "13",
"13", "19", "13", "19", "13", "13", "30", "30", "24", "24", "19",
"24", "24", "19", "19", "24", "13", "24", "30", "13", "30", "19",
"24", "24", "24", "19", "24", "19", "19", "24", "24", "24", "19",
"19", "13", "24", "13", "30", "30", "30", "30", "30", "13", "19",
"30"), Estimate = c(1.4342758701328928, -0.95936816185395002,
1.3765890535912415, 0.60615370942926672, -1.010270997593212,
0.56353243422236887, -0.90255562228792563, 1.5576954958648126,
-1.3607499236262317, -1.4364118143364513, -1.474110922304231,
-1.1250304191855622, 3.1276702547909232, -3.3143554695322655,
0.9624251913317039, 0.5084592549167477, -0.54588374778088244,
-0.13824110655637623, -0.6052417201126471, 3.7977091871394095,
-0.71741871486351427, -0.68241455453487398, -0.68519031505256389,
-0.67509190711461287, 0.7279818740896894, -0.8371731449517027,
0.6621834292330987, 0.68601080309446616, -0.54741249344639331,
1.0233680799126907, -0.76690550086857856, 0.53216320363185055,
-0.84083755741626598, -0.91939098435321354, -0.97376403120535826,
-0.88391863742066801, -0.51564041463475541, -0.74453358848207518,
-0.28471451077109111, -0.22909521284674911, -0.044770214916977628,
0.68239635866331572, 0.15577084253033693, -0.49540662383622563,
0.098549027264916766, -0.35613186894233162, -0.38333940223368979,
-0.43088746468894373, 0.5351162057034351, 0.027112990337612539,
0.15195259496679489, -0.3962375835799139, 0.25002798235856544,
-0.07057695299871225, 0.79863171699545554, 0.29513008288664477,
0.57201674855370344, -0.50089052176041227, -0.26468319819506236,
-0.0081148038621813127, -0.2336531701309047, 0.17000723874768747,
0.14010918425432589, 0.18737454938419987, 0.44672531215461098,
-0.033362956135194674, 0.12096884904813542, 0.081772635248707412,
-0.45350911420250539, 0.22080135450764451, 0.34743885824956194,
-0.2032755327809011, 0.92009563303147257, 0.022706998886684836,
-0.25977610831654502, 0.015140050702365676, 1.1464018091062771,
-0.038828036472541648, -0.15644114183766625, -0.074307333870867423,
-0.099945432970827974, -0.20100761722591759, 0.15797441169132551,
0.086752990476589312, -0.11579339896376119, 0.14896415466954385,
0.50707961300974613, -0.034268211871531895, -0.03941067650003869,
-0.30431222659251722, 0.64079275967329175, 0.078296987301706519,
-0.043445685667455242, 0.32461627444485458, 0.095282428367853819,
-0.24711741016959532), `Pr(>|t|)` = c(0.00054521644956236591,
0.20844924924849634, 0.0063344881708467032, 0.04690839856354196,
0.010426543004723813, 0.015327690300849395, 0.00012672535286031001,
0.0014902029039675733, 0.00032902060622238736, 0.0095163818112773899,
0.00041057255262061366, 0.0025832008904889104, 3.9052657243800136e-08,
0.00031557989822396073, 0.0016258358307589705, 0.033893440018941505,
0.048477691960592727, 0.74798603744403425, 0.0027477084430068809,
1.3029037134854807e-10, 0.0076116608175266203, 0.0048210474122679084,
0.0015824143193650824, 0.00068984318394415341, 0.031253983612796923,
0.045503059810359778, 0.023560209387624528, 0.0021506430320710149,
0.039887537832338241, 0.012514059956998468, 0.0079174092591247601,
0.044102721831279078, 0.034534589154438501, 0.017662871943542321,
0.014178542803463335, 0.018739918520330426, 0.21531375078386988,
0.007547345201378057, 0.34664827014402366, 0.23592124928581937,
0.86675590750088505, 0.010740914956562011, 0.37538269351680009,
0.07326875438459321, 0.7710526024847264, 0.19876854208969691,
0.2700180593238321, 0.088821455238771108, 5.4060422418354054e-05,
0.85902012019183616, 0.51278621062991048, 0.068238170943737353,
0.1067114019857604, 0.54185622331471206, 0.00045391342317257377,
0.42458757033517591, 0.0031351261383892148, 0.066183649693906679,
0.26831300173960709, 0.97624479310207934, 0.32802963857025325,
0.075725490459696815, 0.39453879747887632, 0.25816494412893093,
1.0744175303137345e-05, 0.69930544875517775, 0.38565565156662995,
0.56501752365592162, 0.13302284729310845, 0.30997459777531461,
0.016620932938517161, 0.60853787428757955, 8.6202581176759562e-05,
0.90431267771378143, 0.35611195951734775, 0.8957129076558038,
1.901545313467059e-07, 0.80795484621207692, 0.31534016133786252,
0.66480293941954949, 0.69897965708479459, 0.2190571720859576,
0.24508617068981289, 0.46460451373712663, 0.48100310611514163,
0.51297903648061927, 0.061352074933778943, 0.79036877023581065,
0.69208351526084511, 0.30645914773081245, 0.10527876566915252,
0.66233329772369676, 0.7606641149420309, 0.080496301712866342,
0.57695731467415023, 0.21638928987345277), Color = structure(c(5L,
2L, 5L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L,
1L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 4L, 5L, 4L, 4L, 5L, 4L, 4L,
4L, 4L, 4L, 2L, 5L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 5L,
1L, 1L, 1L, 1L, 1L, 5L, 1L, 5L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 1L, 1L, 3L, 1L, 5L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L), levels = c("NS or FC < 0.5",
"FC > 0.5", "p < 0.05", "FC > 0.5 & p < 0.05", "FC > 0.5 & FDR < 0.1"
), class = "factor")), row.names = c("IGFBP3", "SFTPA1", "MT2A",
"SLC25A37", "MOCOS", "CTHRC1", "IDH3A", "MT1X", "IGHG1", "IGHG2",
"IGHG3", "IGHG4", "IGFBP31", "SFTPA11", "MT2A1", "SLC25A371",
"MOCOS1", "CTHRC11", "IDH3A1", "MT1X1", "IGHG11", "IGHG21", "IGHG31",
"IGHG41", "IGFBP32", "SFTPA12", "MT2A2", "SLC25A372", "MOCOS2",
"CTHRC12", "IDH3A2", "MT1X2", "IGHG12", "IGHG22", "IGHG32", "IGHG42",
"IGFBP33", "SFTPA13", "MT2A3", "SLC25A373", "MOCOS3", "CTHRC13",
"IDH3A3", "MT1X3", "IGHG13", "IGHG23", "IGHG33", "IGHG43", "TPD52L21",
"SPOP2", "YAE1", "KIAA0319L", "HECTD41", "TRAPPC12", "GSTM21",
"PRSS16", "VAMP3", "CXCL15", "RAB383", "MRPS172", "MROH62", "LETMD11",
"PHC32", "CMTR12", "KHDRBS11", "GNB21", "RFNG2", "BTRC", "ARHGAP252",
"DPYD3", "CNIH1", "SPIB3", "PRKCZ1", "POLK2", "PIGQ2", "PDIA42",
"SLC6A81", "FOLR12", "MRPL531", "CUL91", "WDR252", "CCDC822",
"DHX382", "TNFRSF211", "PPM1B1", "RAVER1", "MTR2", "ZC3H15",
"MYH93", "ANKH3", "SPARC3", "CCND13", "TOLLIP3", "TIA1", "PDCD101",
"PPP2R5B3"), class = "data.frame")
预先感谢您的帮助,如果我可以提供任何进一步的信息,请告诉我!
此问题出现在ggrepel R包的最新更新中(CRAN中为2024-09-07,Github中为2024-09-08,0.9.6),可以使用旧版本(2024-01-11)解决在 Github 上, 0.9.5)[https://github.com/slowkow/ggrepel/releases/tag/0.9.5]。您可以使用下面的代码删除当前版本并安装旧版本。
# remove current one
remove.packages("ggrepel")
# dowload the 0.9.5 ggrepel source code from github (ggrepel-0.9.5.tar.gz)
# install older one
install.packages("~Downloads/ggrepel-0.9.5.tar.gz", repos = NULL, type = "source")
# load
library(ggrepel)
你的问题将会得到解决。