我正在使用 R 编程语言。
我有两个数据集:
这是一些 R 代码,它模拟我正在使用的数据集的结构:
library(lubridate)
library(dplyr)
set.seed(42)
generate_historical_data <- function() {
dates <- seq(as.Date("2000-01-01"), as.Date("2009-12-01"), by = "month")
values <- runif(length(dates), min = 100, max = 300)
data.frame(
date = format(dates, "%Y-%m-%d"),
value = round(values, 2),
class = "Historical"
)
}
generate_classified_data <- function(class, start_date, base_mean) {
dates <- seq(start_date, by = "month", length.out = 120)
t <- seq_len(length(dates))
seasonal <- 20 * sin(2 * pi * t / 12)
mean_values <- base_mean + seasonal + rnorm(length(dates), 0, 10)
spread <- runif(length(dates), 15, 25) # Consistent spread for confidence intervals
data.frame(
date = format(dates, "%Y-%m-%d"),
mean_value = round(mean_values, 2),
low_value = round(mean_values - spread, 2),
high_value = round(mean_values + spread, 2),
class = class
)
}
historical_data <- generate_historical_data()
last_historical_date <- as.Date(max(historical_data$date))
prediction_start_date <- last_historical_date %m+% months(1)
model_predictions <- rbind(
generate_classified_data("A", prediction_start_date, base_mean = 150),
generate_classified_data("B", prediction_start_date, base_mean = 300),
generate_classified_data("C", prediction_start_date, base_mean = 450)
)
这两个数据集如下所示:
> head(historical_data)
date value class
1 2000-01-01 923.33 Historical
2 2000-02-01 943.37 Historical
3 2000-03-01 357.53 Historical
4 2000-04-01 847.40 Historical
5 2000-05-01 677.57 Historical
6 2000-06-01 567.19 Historical
> head(model_predictions)
date mean_value low_value high_value class
1 2010-01-01 156.33 135.73 176.93 A
2 2010-02-01 169.17 147.65 190.70 A
3 2010-03-01 175.82 158.02 193.61 A
4 2010-04-01 181.32 156.52 206.12 A
5 2010-05-01 152.73 131.29 174.17 A
6 2010-06-01 163.03 142.20 183.85 A
我的问题:使用ggplot,我试图制作一个绘制历史数据的图,然后从历史数据的最后一点开始,绘制3个类(即A、B、C)的mean_value以及low_value 和 high_value(即充当置信区间)。
我尝试这样做:
ggplot() +
geom_line(data = historical_data,
aes(x = date, y = value, color = class),
size = 1) +
geom_line(data = model_predictions,
aes(x = date, y = mean_value, color = class),
size = 1) +
geom_ribbon(data = model_predictions,
aes(x = date,
ymin = low_value,
ymax = high_value,
fill = class),
alpha = 0.2) +
scale_color_manual(values = c("Historical" = "black",
"A" = "red",
"B" = "blue",
"C" = "green")) +
scale_fill_manual(values = c("A" = "red",
"B" = "blue",
"C" = "green")) +
labs(title = "Historical Data and Model Predictions",
x = "Date",
y = "Value",
color = "Data Type",
fill = "Prediction Class") +
theme_minimal() +
theme(legend.position = "right",
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12)) +
scale_x_date(date_breaks = "1 year",
date_labels = "%Y") +
scale_y_continuous(limits = c(0, 550))
是否可以使3条彩色线正好从黑线结束的地方开始?
谢谢!
您可以使用历史数据中的最后一个日期和最后一个值创建一个数据框,并将该数据框包含在您的 rbind 中。
last_historical_date <- as.Date(max(historical_data$date))
last_value <- historical_data[historical_data$date == last_historical_date, "value"]
StitchDF <- data.frame(date = rep(last_historical_date, 3),
mean_value = rep(last_value, 3),
low_value = c(NA,NA,NA),
high_value = c(NA,NA,NA),
class = c("A","B","C"))
prediction_start_date <- last_historical_date %m+% months(1)
model_predictions <- rbind(
StitchDF,
generate_classified_data("A", prediction_start_date, base_mean = 150),
generate_classified_data("B", prediction_start_date, base_mean = 300),
generate_classified_data("C", prediction_start_date, base_mean = 450))
另外,如果您使用 lubridate 中的 ymd() 函数将所有日期列转换为数字日期,您的绘图将更加清晰。