library(ggplot2)
library(ggbrace)
set.seed(127) # Set seed for reproducibility
# Generate data for the first plot
x <- runif(100, 0, 10)
y <- 3 + 1.5 * x + rnorm(100, mean = 0, sd = 4)
data <- data.frame(x = x, y = y)
# First plot: points with regression line
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
ggtitle("First Plot: Points with Regression Line") +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none")
# Sample points for the second plot
sample_indices1 <- sample(1:100, 50)
x_sample1 <- x[sample_indices1]
y_sample1 <- y[sample_indices1]
data_sample1 <- data.frame(x = x_sample1, y = y_sample1)
# Second plot: sample points without new regression line
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none")
# Third plot: sample points with new regression line
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none")
# Sample points for the fourth plot
sample_indices2 <- sample(1:100, 50)
x_sample2 <- x[sample_indices2]
y_sample2 <- y[sample_indices2]
data_sample2 <- data.frame(x = x_sample2, y = y_sample2)
# Fourth plot: another sample with regression line
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_point(data = data_sample2, aes(x = x, y = y), color = "#00BFC4") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_smooth(data = data_sample2, method = "lm", color = "#00BFC4", se = FALSE) +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none")
# Fifth plot: all three regressions and points
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
geom_point(data = data_sample2, aes(x = x, y = y), color = "#00BFC4") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
geom_smooth(data = data_sample2, method = "lm", color = "#00BFC4", se = FALSE) +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none")
# Sixth plot: sample points with new regression line
ggplot(data, aes(x = x, y = y)) +
geom_point(color = "gray") +
geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
geom_smooth(method = "lm", color = "black", se = FALSE) +
geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
labs(title = "Population and sample regression Lines",
x = "Years of education",
y = "Log of earnings") +
theme_minimal() +
theme(legend.position = "none") +
annotate("text", x = 1, y = 4.5, label = expression(u[i] == y[i] - beta[0] - beta[1] * x[i]), parse = TRUE,size=3.5, col="#F8766D") +
stat_brace(aes(x=c(2.8,3.3), y=c(.9,7.5)), rotate=270, size=.5, col="#F8766D") +
annotate("text", x = 4.5, y = 3.5, label = expression(hat(u)[i] == y[i] - hat(beta)[0] - hat(beta)[1] * x[i]), parse = TRUE,size=3.5,col="#F8766D") +
stat_brace(aes(x=c(2.2,2.7), y=c(.9,5.3)), inherit.data=T, rotate=90, size=.5, col="#F8766D")
data.frame
data=
的x
和aes上的向量,这将导致错误随着向量的长度与全局数据的行数不同。
y