我已经在不同的因子水平上创建了一些加权核密度估计,我认为这些估计不能合并到 geom_violin 图估计中。我想知道是否有一种方法 geom_violin 或其他 ggplot2 函数可以使用原始数据来创建小提琴图而不是内置密度计算?任何帮助将非常感激。一些示例代码,我想根据 y 值在 x 值分布上的变化创建小提琴图...
###Create data
Surge_vs_Plummet_Stats_df <- data.frame(
Type = rep(c("Surge", "Plummet"), each = 50),
site_no = sample(1:10, 100, replace = TRUE),
Mean = c(rnorm(50, mean = 5, sd = 2), rnorm(50, mean = 3, sd = 1)))
###Calculate weights
station_counts <- table(Surge_vs_Plummet_Stats_df$site_no)
Surge_vs_Plummet_Stats_df$Weights <- 1 / station_counts[Surge_vs_Plummet_Stats_df$site_no]
Surge_vs_Plummet_Stats_df$Weights <- Surge_vs_Plummet_Stats_df$Weights /
sum(Surge_vs_Plummet_Stats_df$Weights)##Normalize (sum to 1)
###Identify bandwidth
bw <- bw.nrd(Surge_vs_Plummet_Stats_df$Mean)##Not 100% sure its doing much
###Now separate the dfs run KDEs
Surge_vs_Plummet_Stats_Surge <- Surge_vs_Plummet_Stats_df%>%filter(Type == "Surge")%>%mutate(Weights = Weights / sum(Weights))
Surge_kde <- density(Surge_vs_Plummet_Stats_Surge$Mean, weights = Surge_vs_Plummet_Stats_Surge$Weights,bw = bw,
from=min(Surge_vs_Plummet_Stats_df$Mean), to=max(Surge_vs_Plummet_Stats_df$Mean))##Delib the full df and not just surges
#
Surge_vs_Plummet_Stats_Plummet <- Surge_vs_Plummet_Stats_df%>%filter(Type == "Plummet")%>%mutate(Weights = Weights / sum(Weights))
Plummet_kde <- density(Surge_vs_Plummet_Stats_Plummet$Mean, weights = Surge_vs_Plummet_Stats_Plummet$Weights,bw = bw,
from=min(Surge_vs_Plummet_Stats_df$Mean), to=max(Surge_vs_Plummet_Stats_df$Mean))##Delib the full df and not just surges
##
Mean_Kernel_df <- data.frame(x = c(Surge_kde$x,Plummet_kde$x),
y = c(Surge_kde$y,Plummet_kde$y),
Type = c(rep("Surge",times=length(Surge_kde$x)),
rep("Plummet",times=length(Surge_kde$y))))