我有一个包含每月数据的 tsibble,我需要以三个月为间隔进行汇总。这些不是“典型”季度,而是由以下期间组成的定制三个月组:
Dec-Jan-Feb
Mar-Apr-May
Jun-Jul-Aug
Sept-Oct-Nov
当然,在
Dec-Jan-Feb
的情况下,这将跨越两年(即Dec2023-Jan2024-Feb2024
)。我不确定这对于 tsibble 的索引是否可能。
index_by
函数按时间索引进行分组。但是,我不确定应该如何将它与自定义函数一起使用。链接的帮助页面末尾有一个示例,但是 1) 它与我需要的不同,2) 它似乎给出了错误。
示例数据集以及我用来按“正常”季度对其进行分组的方式。
data <- structure(list(date = structure(c(17897, 17928, 17956, 17987,
18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 18262,
18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 18536,
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 18809,
18840, 18871, 18901, 18932, 18962, 18993, 19024, 19052, 19083,
19113, 19144, 19174, 19205, 19236, 19266, 19297, 19327, 19358,
19389, 19417, 19448, 19478, 19509, 19539, 19570, 19601, 19631,
19662, 19692, 19723, 17897, 17928, 17956, 17987, 18017, 18048,
18078, 18109, 18140, 18170, 18201, 18231, 18262, 18293, 18322,
18353, 18383, 18414, 18444, 18475, 18506, 18536, 18567, 18597,
18628, 18659, 18687, 18718, 18748, 18779, 18809, 18840, 18871,
18901, 18932, 18962, 18993, 19024, 19052, 19083, 19113, 19144,
19174, 19205, 19236, 19266, 19297, 19327, 19358, 19389, 19417,
19448, 19478, 19509, 19539, 19570, 19601, 19631, 19662, 19692,
19723), class = c("yearmonth", "vctrs_vctr")), geo = c("AT",
"AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT",
"AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT",
"AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT",
"AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT",
"AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT", "AT",
"AT", "AT", "AT", "AT", "AT", "IT", "IT", "IT", "IT", "IT", "IT",
"IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT",
"IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT",
"IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT",
"IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT",
"IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT", "IT"
), value = c(870L, 770L, 870L, 820L, 880L, 850L, 1015L, 985L,
1000L, 1010L, 870L, 1050L, 1395L, 960L, 755L, 320L, 600L, 970L,
1045L, 1360L, 1375L, 1665L, 1395L, 1560L, 1455L, 1775L, 1505L,
1440L, 1810L, 2070L, 3165L, 4700L, 4660L, 5030L, 5725L, 4470L,
3275L, 3205L, 4380L, 4970L, 5855L, 9540L, 11300L, 14495L, 15930L,
18210L, 11745L, 6875L, 4095L, 2480L, 3045L, 3295L, 4140L, 4765L,
5280L, 6680L, 7995L, 9710L, 2310L, 2345L, 1980L, 3175L, 2680L,
2595L, 2565L, 3115L, 2820L, 3075L, 2265L, 3005L, 3890L, 3340L,
2485L, 3060L, 3060L, 850L, 120L, 890L, 1500L, 1810L, 930L, 1055L,
2915L, 2580L, 2560L, 2425L, 2310L, 2660L, 2390L, 2715L, 2960L,
3280L, 3890L, 6610L, 5875L, 5505L, 4575L, 4460L, 5230L, 6130L,
4700L, 6450L, 5690L, 5105L, 5985L, 7920L, 8385L, 9145L, 7995L,
8415L, 9840L, 11335L, 8175L, 11045L, 10730L, 9825L, 10005L, 11930L,
14950L, 14160L, 10160L, 12920L)), class = c("tbl_ts", "tbl_df",
"tbl", "data.frame"), row.names = c(NA, -122L), key = structure(list(
geo = c("AT", "IT"), .rows = structure(list(1:61, 62:122), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE), index = structure("date", ordered = TRUE), index2 = "date", interval = structure(list(
year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval",
"vctrs_rcrd", "vctrs_vctr")))
data.q <- data |>
index_by(quarter = ~yearquarter(.)) |>
group_by_key() |>
summarise(geo = first(geo),
value = sum(value))
在索引之前添加四分之一的日期。那么 Q1 指 12-1-2 月,Q2 指 3-4-5 月,依此类推。插入
mutate(date = date + month(1)) |>
:
data.q <- data |>
mutate(date = date + month(1)) |>
index_by(quarter = ~yearquarter(.)) |>
group_by_key() |>
summarise(geo = first(geo),
value = sum(value))