我正在尝试在我感兴趣的时间段内计算quartile
中两个variables
的上限和下限data.frame
。下面的代码给了我上位数和下位数的一位数字。
FakeData <- data.frame(seq(as.Date("2001-01-01"), to= as.Date("2003-12-31"), by="day"),
A = runif(1095, 0,10),
D = runif(1095,5,15))
colnames(FakeData) <- c("Date", "A","D")
statistics <- FakeData %>%
gather(-Date, key = "Variable", value = "Value") %>%
mutate(Year = year(Date), Month = month(Date)) %>%
filter(between(Month,3,5)) %>%
mutate(NewDate = ymd(paste("2020", Month,day(Date), sep = "-"))) %>%
group_by(Variable, NewDate) %>%
summarise(Upper = quantile(Value,0.75, na.rm = T),
Lower = quantile(Value, 0.25, na.rm = T))
我想要下面的输出(Final_output
是我感兴趣的输出]
Output1 <- data.frame(seq(as.Date("2000-03-01"), to= as.Date("2000-05-31"), by="day"),
Upper = runif(92, 0,10), lower = runif(92,5,15), Variable = rep("A",92))
colnames(Output1)[1] <- "Date"
Output2 <- data.frame(seq(as.Date("2000-03-01"), to= as.Date("2000-05-31"), by="day"),
Upper = runif(92, 2,10), lower = runif(92,5,15), Variable = rep("D",92))
colnames(Output2)[1] <- "Date"
Final_Output<- bind_rows(Output1,Output2)
我可以向您提出data.table
解决方案。最后的步骤(在Value
变量上按组应用四分位数)可以转换为:
library(data.table)
setDT(statistics)
statistics[,.(lapply(get('Value'), quantile, probs = .25,.75)) ,
by = c("Variable", "NewDate")]