我正在为 2 个样本创建 DGE 图。我希望结果图中的线条不要堆叠在彼此之上,机器人彼此相邻(以便两者都可见)。
## Load packages
if (! require(ggplot2)) {
install.packages("ggplot2")
}
if (! require(dplyr)) {
install.packages("dplyr")
}
if (! require(tidyr)) {
install.packages("tidyr")
}
library(ggplot2)
library(dplyr)
library(tidyr)
gene_order <- c(
"ADGRA1", "ADGRA2", "ADGRA3",
"ADGRB1", "ADGRB2", "ADGRB3",
"CELSR1", "CELSR2", "CELSR3",
"ADGRD1", "ADGRD2",
"ADGRE1", "ADGRE2", "ADGRE3", "ADGRE5",
"ADGRF1", "ADGRF2", "ADGRF3", "ADGRF4", "ADGRF5",
"ADGRG1", "ADGRG2", "ADGRG3", "ADGRG4", "ADGRG5", "ADGRG6", "ADGRG7",
"ADGRL1", "ADGRL2", "ADGRL3", "ADGRL4",
"ADGRV1"
)
gene_order <- rev(gene_order)
## Change variables
file_path <- "/home/pospim/Desktop/Work/bioinformatics/datasets/GSE189727_HIV_done/GSE189727_HIV_dataset1_SZ_DC_Axl_together - analyzed/DGE_radar"
file_name <- paste(file_path,"GPCRS-DE_analysis_DC_CDC2.tsv", sep="/")
plot_name <- paste(file_path,"DGE_plot.png", sep="/")
sample_1 <- "DC"
sample_2 <- "CDC2"
data <- read.table(file_name, header = T, sep = "\t", stringsAsFactors=FALSE)
head(data)
data[] <- lapply(data, function(x) gsub(",",".",x))
head(data)
## Clean data
data$log2FoldChange_1 <- as.numeric(as.character(data$log2FoldChange_1))
data$pvalue_1 <- as.numeric(as.character(data$pvalue_1))
data$log2FoldChange_2 <- as.numeric(as.character(data$log2FoldChange_2))
data$pvalue_2 <- as.numeric(as.character(data$pvalue_2))
## Handle NAs
data$log2FoldChange_1[is.na(data$log2FoldChange_1)] <- 0
data$pvalue_1[is.na(data$pvalue_1)] <- 1
data$log2FoldChange_2[is.na(data$log2FoldChange_2)] <- 0
data$pvalue_2[is.na(data$pvalue_2)] <- 1
## Add log10 pvalue
data$log10_pvalue_1 <- -log10(data$pvalue_1)
data$log10_pvalue_2 <- -log10(data$pvalue_2)
data$gene_symbol <- factor(data$gene_symbol, levels = gene_order)
head(data)
## Convert data to long format for plotting
data_lng <- data %>%
pivot_longer(cols = starts_with("log2FoldChange"), names_to = "sample", values_to = "log2FoldChange") %>%
pivot_longer(cols = starts_with("log10_pvalue"), names_to = "sample_pvalue", values_to = "log10_pvalue") %>%
mutate(sample = case_when(
grepl("1", sample) ~ sample_1,
grepl("2", sample) ~ sample_2
)) %>%
filter((sample == sample_1 & sample_pvalue == "log10_pvalue_1") |
(sample == sample_2 & sample_pvalue == "log10_pvalue_2")) %>%
select(gene_symbol, sample, log2FoldChange, log10_pvalue)
head(data_lng)
data_lng$gene_symbol <- factor(data_lng$gene_symbol, levels = gene_order)
## Plot DGE
p <- ggplot(data_lng, aes(x = log2FoldChange, y = gene_symbol, color = sample)) +
geom_segment(aes(xend=0,yend=gene_symbol),
linewidth=3, show.legend=TRUE) +
scale_color_manual(name="Sample", values=setNames(c("blue","cadetblue1"), c(sample_1,sample_2))) +
labs(title="Differential Gene Expression",
x = "relative expression (log2FoldChange)", y = "") +
theme_minimal()
ggsave(plot_name, plot=p)
print(p)
结构(列表(基因符号=结构(32:1,级别= c(“ADGRV1”, “ADGRL4”,“ADGRL3”,“ADGRL2”,“ADGRL1”,“ADGRG7”,“ADGRG6”,“ADGRG5”, “ADGRG4”、“ADGRG3”、“ADGRG2”、“ADGRG1”、“ADGRF5”、“ADGRF4”、“ADGRF3”、 “ADGRF2”、“ADGRF1”、“ADGRE5”、“ADGRE3”、“ADGRE2”、“ADGRE1”、“ADGRD2”、 “ADGRD1”、“CELSR3”、“CELSR2”、“CELSR1”、“ADGRB3”、“ADGRB2”、“ADGRB1”、 “ADGRA3”,“ADGRA2”,“ADGRA1”),类=“因子”),baseMean = c(“0”, "0.660578564", "0", "0", "0", "0", "0.178397561", "1.522702077", “1.340522526”,“0.840975621”,“0.301928182”,“0.150964091”,“10.13217716”, "1.313089055"、"40.93692353"、"0"、"0"、"0"、"0"、"0"、"0"、"0"、 “0”、“0”、“0.963088173”、“0.644225417”、“0”、“0.838976126”、“0”、 "1.410153", "0", "0"), log2FoldChange_1 = c(0, 0.050537239, 0, 0, 0, 0, -0.971180299, 0.315840199, -0.758324082, 0.551442227, 1.736471275、0.952373053、-0.492148172、-0.026726608、-0.21961465、 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.143565116, -0.081269533, 0, -0.575736451, 0, 3.93190614, 0, 0), lfcSE = c(NA, "2.658534114", NA, NA, NA, 不适用,“4.080472857”,“1.61107253”,“1.693790467”,“2.186266996”, “4.048576265”,“4.080472857”,“0.845207802”,“1.689808395”,“0.404274885”, 不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用,“2.887456086”,“2.666317145”, NA,“2.186422311”,NA,“2.479435904”,NA,NA),stat = c(NA,“0.019009438”, 不适用,不适用,不适用,不适用,“-0.238006803”,“0.196043439”,“-0.447708319”, “0.252230047”,“0.428909118”,“0.233397718”,“-0.582280678”, “-0.015816354”,“-0.543231”,不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用, 不适用,“0.742371504”,“-0.03048007”,不适用,“-0.263323535”,不适用,“1.585806729”, 不适用, 不适用), pvalue_1 = c(1, 0.984833577, 1, 1, 1, 1, 0.811875818, 0.844576166、0.654363717、0.800863255、0.667989364、0.815452585、 0.560377638、0.987380901、0.586970765、1、1、1、1、1、1、1、1、 1, 0.45786228, 0.975684188, 1, 0.792301222, 1, 0.112783159, 1, 1), padj = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, “0.999957765”,不适用,“0.999957765”,不适用,不适用,不适用,不适用,不适用,不适用,不适用, 不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用,不适用),log2FoldChange_2 = c(0, 0.742371504, 0, 0, 0, 0, -0.971180299, -0.575736451, -0.758324082, 0.551442227、1.736471275、0.952373053、-0.492148172、-0.026726608、 -0.21961465, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.050537239, -0.081269533, 0, -0.575736451, 0, 2.887456086, 0, 0), pvalue_2 = c(1, 0.984833577, 1, 1, 1, 1, 0.811875818, 0.844576166, 0.654363717, 0.800863255, 0.667989364、0.815452585、0.560377638、0.987380901、0.586970765、 1, 1, 1, 1, 1, 1, 1, 1, 1, 0.45786228, 0.975684188, 1, 0.792301222, 1, 0.112783159, 1, 1), log10_pvalue_1 = c(0, 0.00663715295120108, 0, 0, 0, 0, 0.0905103940122029, 0.0733611785596228, 0.184180789323803, 0.0964416320665321、0.175230452483861、0.0886012862135488、0.251519203662807、 0.00551527763353332, 0.231383528931875, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.33926513341622, 0.0106907331570114, 0, 0.101109674241976, 0, 0.947755745205597, 0, 0), log10_pvalue_2 = c(0, 0.00663715295120108, 0, 0, 0, 0, 0.0905103940122029, 0.0733611785596228, 0.184180789323803, 0.0964416320665321、0.175230452483861、0.0886012862135488、0.251519203662807、 0.00551527763353332, 0.231383528931875, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.33926513341622, 0.0106907331570114, 0, 0.101109674241976, 0, 0.947755745205597, 0, 0)), row.names = c(NA, -32L), 类 = "data.frame")
显示 32 条条目中的第 1 至 19 条,总共 11 列
我按照GPT的建议尝试了抖动和偏移,但没有成功。