我的数据是这样的
structure(list(year = c(2017, 2017, 2017, 2017, 2018, 2018, 2018,
2018, 2019, 2019, 2019, 2019, 2017, 2017, 2018, 2018, 2019, 2019,
2017, 2017, 2017, 2017, 2018, 2018, 2018, 2018, 2019, 2019, 2019,
2019), College = c("College1", "College1", "College1", "College1",
"College1", "College1", "College1", "College1", "College1", "College1",
"College1", "College1", "College2", "College2", "College2", "College2",
"College2", "College2", "College3", "College3", "College3", "College3",
"College3", "College3", "College3", "College3", "College3", "College3",
"College3", "College3"), Department = c("Department 1", "Department 2",
"Department 3", "Department 4", "Department 1", "Department 2",
"Department 3", "Department 4", "Department 1", "Department 2",
"Department 3", "Department 4", "Department 1", "Department 2",
"Department 1", "Department 2", "Department 1", "Department 2",
"Department 1", "Department 2", "Department 3", "Department 4",
"Department 1", "Department 2", "Department 3", "Department 4",
"Department 1", "Department 2", "Department 3", "Department 4"
), Enrollment = c(51L, 322L, 251L, 106L, 468L, 205L, 718L, 200L,
344L, 256L, 434L, 38L, 487L, 503L, 14L, 448L, 489L, 437L, 695L,
833L, 941L, 299L, 864L, 888L, 531L, 335L, 47L, 753L, 319L, 986L
)), row.names = c(NA, -30L), class = c("tbl_df", "tbl", "data.frame"
))
我在计算各学院和各系多年来的入学人数变化时遇到了困难。因此,我想要一个新的专栏,告诉我在学院级别的招生变化是什么,以及一个新的专栏,告诉我在部门级别。
我尝试过很多不同的事情,这是我得到的最接近的
growth_rate = df %>%
arrange(College, Department)%>%
group_by(College) %>%
mutate(Diff_growth = Enrollment - lag(Enrollment),
Rate_percent = Diff_growth/Enrollment)
但是这段代码并没有在每个周期开始时停止,即 2017 年,它只是继续向下移动。 任何帮助将不胜感激。
所需的输出将如下所示
structure(list(X = 1:30, year = c(2017L, 2017L, 2017L, 2017L,
2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L, 2017L,
2017L, 2018L, 2018L, 2019L, 2019L, 2017L, 2017L, 2017L, 2017L,
2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L), College =
c("College1",
"College1", "College1", "College1", "College1", "College1",
"College1",
"College1", "College1", "College1", "College1", "College1",
"College2",
"College2", "College2", "College2", "College2", "College2",
"College3",
"College3", "College3", "College3", "College3", "College3",
"College3",
"College3", "College3", "College3", "College3", "College3"),
Department = c("Department 1", "Department 2", "Department 3",
"Department 4", "Department 1", "Department 2", "Department 3",
"Department 4", "Department 1", "Department 2", "Department 3",
"Department 4", "Department 1", "Department 2", "Department 1",
"Department 2", "Department 1", "Department 2", "Department 1",
"Department 2", "Department 3", "Department 4", "Department 1",
"Department 2", "Department 3", "Department 4", "Department 1",
"Department 2", "Department 3", "Department 4"), Enrollment = c(51L,
322L, 251L, 106L, 468L, 205L, 718L, 200L, 344L, 256L, 434L,
38L, 487L, 503L, 14L, 448L, 489L, 437L, 695L, 833L, 941L,
299L, 864L, 888L, 531L, 335L, 47L, 753L, 319L, 986L), RateChange_College = c(NA,
NA, NA, NA, 1.179452055, 1.179452055, 1.179452055, 1.179452055,
-0.326209931, -0.326209931, -0.326209931, -0.326209931, NA,
NA, -0.533333333, -0.533333333, 1.004329004, 1.004329004,
NA, NA, NA, NA, -0.054190751, -0.054190751, -0.054190751,
-0.054190751, -0.195951108, -0.195951108, -0.195951108, -0.195951108
), RateChange_Department = c(NA, NA, NA, NA, 8.176470588,
-0.363354037, 1.860557769, 0.886792453, -0.264957265, 0.248780488,
-0.395543175, -0.81, NA, NA, -0.971252567, -0.109343936,
33.92857143, -0.024553571, NA, NA, NA, NA, 0.243165468, 0.066026411,
-0.435706695, 0.120401338, -0.945601852, -0.152027027, -0.399246704,
1.943283582)), class = "data.frame", row.names = c(NA, -30L
))
您可以分别计算学院和院系组合的费率,并
join
计算结果。
library(tidyverse)
college_rates <- df |>
arrange(year, College, Department) |>
summarize(college_yearly_enrollment = sum(Enrollment),
.by = c(year, College)) |>
mutate(RateChange_College = (college_yearly_enrollment - lag(college_yearly_enrollment))/
lag(college_yearly_enrollment),
.by = College) |>
select(-college_yearly_enrollment)
glimpse(college_rates)
#> Rows: 9
#> Columns: 3
#> $ year <dbl> 2017, 2017, 2017, 2018, 2018, 2018, 2019, 2019, 2019
#> $ College <chr> "College1", "College2", "College3", "College1", "Co…
#> $ RateChange_College <dbl> NA, NA, NA, 1.17945205, -0.53333333, -0.05419075, -…
df |>
arrange(year, College, Department) |>
left_join(college_rates, by = join_by(year, College)) |>
mutate(RateChange_Department = (Enrollment - lag(Enrollment))/lag(Enrollment),
.by = c(College, Department)) |>
# filter for reprex only
filter(year > 2017)
#> # A tibble: 20 × 6
#> year College Department Enrollment RateChange_College RateChange_Department
#> <dbl> <chr> <chr> <int> <dbl> <dbl>
#> 1 2018 College1 Departmen… 468 1.18 8.18
#> 2 2018 College1 Departmen… 205 1.18 -0.363
#> 3 2018 College1 Departmen… 718 1.18 1.86
#> 4 2018 College1 Departmen… 200 1.18 0.887
#> 5 2018 College2 Departmen… 14 -0.533 -0.971
#> 6 2018 College2 Departmen… 448 -0.533 -0.109
#> 7 2018 College3 Departmen… 864 -0.0542 0.243
#> 8 2018 College3 Departmen… 888 -0.0542 0.0660
#> 9 2018 College3 Departmen… 531 -0.0542 -0.436
#> 10 2018 College3 Departmen… 335 -0.0542 0.120
#> 11 2019 College1 Departmen… 344 -0.326 -0.265
#> 12 2019 College1 Departmen… 256 -0.326 0.249
#> 13 2019 College1 Departmen… 434 -0.326 -0.396
#> 14 2019 College1 Departmen… 38 -0.326 -0.81
#> 15 2019 College2 Departmen… 489 1.00 33.9
#> 16 2019 College2 Departmen… 437 1.00 -0.0246
#> 17 2019 College3 Departmen… 47 -0.196 -0.946
#> 18 2019 College3 Departmen… 753 -0.196 -0.152
#> 19 2019 College3 Departmen… 319 -0.196 -0.399
#> 20 2019 College3 Departmen… 986 -0.196 1.94