我正在尝试添加一条仅在密度图上绘制区域的边界内延伸的线。这与这里的问题类似:将摘要信息添加到使用 ggplot 创建的密度图中,但它们不会在绘制区域内添加一条垂直线(它扩展了整个绘图)。我希望每条垂直平均线都停在密度几何的顶部,但我无法找到一种方法来做到这一点。
简而言之:我想捕获密度图顶部的值,其中每个组的平均值,并在该点处绘制线条末端。
这里有一些代码来演示:
iris <- as.data.table(iris)
iris_summary <- iris[, .(sepal_mean = mean(Sepal.Length),
sepal_se_low = mean(Sepal.Length) - sd(Sepal.Length) / sqrt(length(Sepal.Length)),
sepal_se_high = mean(Sepal.Length) + sd(Sepal.Length) / sqrt(length(Sepal.Length))),
Species]
unique(iris$Species)
x.dens.s <- density(iris[Species == "setosa", Sepal.Length])
x.dens.ve <- density(iris[Species == "versicolor", Sepal.Length])
x.dens.vi <- density(iris[Species == "virginica", Sepal.Length])
df.dens <- data.table(x = c(x.dens.s$x, x.dens.ve$x, x.dens.vi$x), y = c(x.dens.s$y, x.dens.ve$y, x.dens.vi$y))
df.dens$Species <- c(rep("setosa", length(x.dens.s$y)), rep("versicolor", length(x.dens.ve$y)),
rep("virginica", length(x.dens.vi$y)))
iris_density <-
ggplot() +
geom_density(data=iris, aes(x=Sepal.Length,fill=Species),alpha=0.5) +
geom_area(data = df.dens[Species == "setosa" &
x %between% c(iris_summary[Species == "setosa", sepal_se_low],
iris_summary[Species == "setosa", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_area(data = df.dens[Species == "versicolor" &
x %between% c(iris_summary[Species == "versicolor", sepal_se_low],
iris_summary[Species == "versicolor", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_area(data = df.dens[Species == "virginica" &
x %between% c(iris_summary[Species == "virginica", sepal_se_low],
iris_summary[Species == "virginica", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_vline(data = iris_summary,
aes(xintercept = sepal_mean, color = Species), linetype = 2,
linewidth = 0.7, color = "black")
iris_density
您需要找到与平均值相对应的 y 值,这需要进行一些舍入。检查一下是否是您需要的。
library(data.table)
library(ggplot2)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:data.table':
#>
#> between, first, last
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
iris <- as.data.table(iris)
iris_summary <- iris[, .(sepal_mean = mean(Sepal.Length),
sepal_se_low = mean(Sepal.Length) - sd(Sepal.Length) / sqrt(length(Sepal.Length)),
sepal_se_high = mean(Sepal.Length) + sd(Sepal.Length) / sqrt(length(Sepal.Length))),
Species]
unique(iris$Species)
#> [1] setosa versicolor virginica
#> Levels: setosa versicolor virginica
x.dens.s <- density(iris[Species == "setosa", Sepal.Length])
x.dens.ve <- density(iris[Species == "versicolor", Sepal.Length])
x.dens.vi <- density(iris[Species == "virginica", Sepal.Length])
df.dens <- data.table(x = c(x.dens.s$x, x.dens.ve$x, x.dens.vi$x), y = c(x.dens.s$y, x.dens.ve$y, x.dens.vi$y))
df.dens$Species <- c(rep("setosa", length(x.dens.s$y)), rep("versicolor", length(x.dens.ve$y)),
rep("virginica", length(x.dens.vi$y)))
## Add mean value to density dataset
df.dens <- left_join(df.dens, iris_summary)
#> Joining with `by = join_by(Species)`
## Find y-value at mean value
yend <- df.dens |>
group_by(Species) |>
filter(round(x, 2) == round(sepal_mean, 2)) |>
summarise(yend = mean(y))
## Plot
iris_density <-
ggplot() +
geom_density(data=iris, aes(x=Sepal.Length,fill=Species),alpha=0.5) +
geom_area(data = df.dens[Species == "setosa" &
x %between% c(iris_summary[Species == "setosa", sepal_se_low],
iris_summary[Species == "setosa", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_area(data = df.dens[Species == "versicolor" &
x %between% c(iris_summary[Species == "versicolor", sepal_se_low],
iris_summary[Species == "versicolor", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_area(data = df.dens[Species == "virginica" &
x %between% c(iris_summary[Species == "virginica", sepal_se_low],
iris_summary[Species == "virginica", sepal_se_high]),],
aes(x=x,y=y), fill = "white", alpha = 0.5) +
geom_segment(aes(x = iris_summary$sepal_mean, y = 0, yend = yend$yend), linetype = 2)
geom_vline(data = iris_summary,
aes(xintercept = sepal_mean, color = Species), linetype = 2,
linewidth = 0.7, color = "black")
#> mapping: xintercept = ~sepal_mean, colour = ~Species
#> geom_vline: na.rm = FALSE
#> stat_identity: na.rm = FALSE
#> position_identity
iris_density
创建于 2024-08-29,使用 reprex v2.1.0