向密度图添加一条中线,该中线在图形区域内结束

问题描述 投票:0回答:1

我正在尝试添加一条仅在密度图上绘制区域的边界内延伸的线。这与这里的问题类似:将摘要信息添加到使用 ggplot 创建的密度图中,但它们不会在绘制区域内添加一条垂直线(它扩展了整个绘图)。我希望每条垂直平均线都停在密度几何的顶部,但我无法找到一种方法来做到这一点。

简而言之:我想捕获密度图顶部的值,其中每个组的平均值,并在该点处绘制线条末端。

这里有一些代码来演示:

iris <- as.data.table(iris)

iris_summary <- iris[, .(sepal_mean = mean(Sepal.Length),
                                   sepal_se_low = mean(Sepal.Length) - sd(Sepal.Length) / sqrt(length(Sepal.Length)),
                                   sepal_se_high = mean(Sepal.Length) + sd(Sepal.Length) / sqrt(length(Sepal.Length))),
                                    Species]
unique(iris$Species)
x.dens.s <- density(iris[Species == "setosa", Sepal.Length])
x.dens.ve <- density(iris[Species == "versicolor", Sepal.Length])
x.dens.vi <- density(iris[Species == "virginica", Sepal.Length])
df.dens <- data.table(x = c(x.dens.s$x, x.dens.ve$x, x.dens.vi$x), y = c(x.dens.s$y, x.dens.ve$y, x.dens.vi$y))
df.dens$Species <- c(rep("setosa", length(x.dens.s$y)), rep("versicolor", length(x.dens.ve$y)),
                 rep("virginica", length(x.dens.vi$y)))

iris_density <- 
  ggplot() +
  geom_density(data=iris, aes(x=Sepal.Length,fill=Species),alpha=0.5) +
  geom_area(data = df.dens[Species == "setosa" & 
                             x %between% c(iris_summary[Species == "setosa", sepal_se_low], 
                                           iris_summary[Species == "setosa", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5) +
  geom_area(data = df.dens[Species == "versicolor" & 
                             x %between% c(iris_summary[Species == "versicolor", sepal_se_low], 
                                           iris_summary[Species == "versicolor", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5) +
  geom_area(data = df.dens[Species == "virginica" & 
                             x %between% c(iris_summary[Species == "virginica", sepal_se_low], 
                                           iris_summary[Species == "virginica", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5)  + 
  geom_vline(data = iris_summary, 
             aes(xintercept = sepal_mean, color = Species), linetype = 2, 
             linewidth = 0.7, color = "black")
  
iris_density

enter image description here

r ggplot2 plot
1个回答
0
投票

您需要找到与平均值相对应的 y 值,这需要进行一些舍入。检查一下是否是您需要的。

library(data.table)
library(ggplot2)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:data.table':
#> 
#>     between, first, last
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

iris <- as.data.table(iris)

iris_summary <- iris[, .(sepal_mean = mean(Sepal.Length),
                                   sepal_se_low = mean(Sepal.Length) - sd(Sepal.Length) / sqrt(length(Sepal.Length)),
                                   sepal_se_high = mean(Sepal.Length) + sd(Sepal.Length) / sqrt(length(Sepal.Length))),
                                    Species]

unique(iris$Species)
#> [1] setosa     versicolor virginica 
#> Levels: setosa versicolor virginica
x.dens.s <- density(iris[Species == "setosa", Sepal.Length])
x.dens.ve <- density(iris[Species == "versicolor", Sepal.Length])
x.dens.vi <- density(iris[Species == "virginica", Sepal.Length])
df.dens <- data.table(x = c(x.dens.s$x, x.dens.ve$x, x.dens.vi$x), y = c(x.dens.s$y, x.dens.ve$y, x.dens.vi$y))
df.dens$Species <- c(rep("setosa", length(x.dens.s$y)), rep("versicolor", length(x.dens.ve$y)),
                 rep("virginica", length(x.dens.vi$y)))

## Add mean value to density dataset
df.dens <- left_join(df.dens, iris_summary)
#> Joining with `by = join_by(Species)`

## Find y-value at mean value
yend <- df.dens |> 
  group_by(Species) |> 
  filter(round(x, 2) == round(sepal_mean, 2)) |> 
  summarise(yend = mean(y))

## Plot
iris_density <- 
  ggplot() +
  geom_density(data=iris, aes(x=Sepal.Length,fill=Species),alpha=0.5) +
  geom_area(data = df.dens[Species == "setosa" & 
                             x %between% c(iris_summary[Species == "setosa", sepal_se_low], 
                                           iris_summary[Species == "setosa", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5) +
  geom_area(data = df.dens[Species == "versicolor" & 
                             x %between% c(iris_summary[Species == "versicolor", sepal_se_low], 
                                           iris_summary[Species == "versicolor", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5) +
  geom_area(data = df.dens[Species == "virginica" & 
                             x %between% c(iris_summary[Species == "virginica", sepal_se_low], 
                                           iris_summary[Species == "virginica", sepal_se_high]),],  
            aes(x=x,y=y), fill = "white", alpha = 0.5)  + 
  geom_segment(aes(x = iris_summary$sepal_mean, y = 0, yend = yend$yend), linetype = 2)

  geom_vline(data = iris_summary, 
             aes(xintercept = sepal_mean, color = Species), linetype = 2, 
             linewidth = 0.7, color = "black")
#> mapping: xintercept = ~sepal_mean, colour = ~Species 
#> geom_vline: na.rm = FALSE
#> stat_identity: na.rm = FALSE
#> position_identity
  
iris_density

创建于 2024-08-29,使用 reprex v2.1.0

© www.soinside.com 2019 - 2024. All rights reserved.