我有重复措施,参与者(按“特定地点”)每周接受一次疾病测试。我有累计天数和自上次测试以来的天数(“rolling_days”)。另外,我有一个二元变量(“disease_detection”),其中 1 = 检测到疾病。最后,我有一个“日期”变量,列出了疾病检测测试的日期。 我希望在接下来的 60 天里,每次疾病检测 = 1 时,使用数据将“rolling_days”归零。 该方法可以自动将天数清零或 (2) 创建一个是/否变量来指示积极状态和接下来的 60 天。
我尝试了多种方法,但这是最新的...它正在接近但不准确。
我将不胜感激任何帮助以使其正常工作!
谢谢你
data_processed <- data %>%
arrange(site_specific, date) %>% # Ensure data is sorted
group_by(site_specific) %>%
mutate(
zero_out = ifelse(disease_detection == 1, "Yes", NA) # Set 'zero_out' to "Yes" when disease_detection = 1
) %>%
mutate(
detection_date = ifelse(disease_detection == 1, date, NA),
detection_date = zoo::na.locf(detection_date, na.rm = FALSE) # Forward fill detection_date
) ungroup() %>%
mutate(
days_since_detection = as.numeric(date - detection_date),
zero_out = ifelse(days_since_detection <= 60, "Yes", zero_out) # Set 'zero_out' to "Yes" if within 60 days
) %>%
select(-detection_date, -days_since_detection) # Remove temporary columns
> dput(data)
structure(list(site_specific = c("HPB0002", "HPB0002", "HPB0002",
"HPB0002", "HPB0002", "HPB0002", "HPB0002", "HPB0002", "HPB0002",
"HPB0002", "HPB0002", "HPB0002", "HPB0002", "HPB0002", "HPB0002",
"HPB0002", "HPB0005", "HPB0005", "HPB0005", "HPB0005", "HPB0005",
"HPB0005", "HPB0005", "HPB0005", "HPB0005", "HPB0005", "HPB0005",
"HPB0005", "HPB0005", "HPB0005", "HPB0005", "HPB0005", "HPB0008",
"HPB0008", "HPB0008", "HPB0008", "HPB0008", "HPB0008", "HPB0008",
"HPB0008", "HPB0008", "HPB0008", "HPB0008", "HPB0008", "HPB0008",
"HPB0008", "HPB0008", "HPB0008", "HPB0009", "HPB0009", "HPB0009",
"HPB0009", "HPB0009", "HPB0009", "HPB0009", "HPB0009", "HPB0009",
"HPB0009", "HPB0009", "HPB0009", "HPB0009", "HPB0009", "HPB0009",
"HPB0009", "HPB0010", "HPB0010", "HPB0010", "HPB0010", "HPB0010",
"HPB0010", "HPB0010", "HPB0010", "HPB0010", "HPB0010", "HPB0010",
"HPB0010", "HPB0013", "HPB0013", "HPB0013", "HPB0013", "HPB0013",
"HPB0013", "HPB0013", "HPB0013", "HPB0013", "HPB0013", "HPB0013",
"HPB0013", "HPB0013", "HPB0017", "HPB0017", "HPB0017", "HPB0017",
"HPB0017", "HPB0017", "HPB0017", "HPB0017", "HPB0017", "HPB0017",
"HPB0017", "HPB0017", "HPB0017", "HPB0017", "HPB0017", "HPB0017",
"HPB0024", "HPB0024", "HPB0024", "HPB0024", "HPB0024", "HPB0024",
"HPB0024", "HPB0024", "HPB0024", "HPB0024", "HPB0024", "HPB0024",
"HPB0024", "HPB0024", "HPB0024", "HPB0024", "HPB0026", "HPB0026",
"HPB0026", "HPB0026", "HPB0026", "HPB0026", "HPB0026", "HPB0026",
"HPB0026", "HPB0026", "HPB0026", "HPB0026", "HPB0026", "HPB0026",
"HPB0026", "HPB0026", "HPB0035", "HPB0035", "HPB0035", "HPB0035",
"HPB0035", "HPB0035", "HPB0035", "HPB0035", "HPB0035", "HPB0035",
"HPB0035", "HPB0035", "HPB0035", "HPB0035", "HPB0035", "HPB0035",
"HPB0047", "HPB0047", "HPB0047", "HPB0047", "HPB0047", "HPB0047",
"HPB0047", "HPB0047", "HPB0047", "HPB0047", "HPB0047", "HPB0047",
"HPB0047", "HPB0047", "HPB0047", "HPB0047", "HPB0058", "HPB0058",
"HPB0058", "HPB0058", "HPB0058", "HPB0058", "HPB0058", "HPB0058",
"HPB0058", "HPB0058", "HPB0058", "HPB0058", "HPB0058", "HPB0058",
"HPB0058", "HPB0058", "HPB0059", "HPB0059", "HPB0059", "HPB0059",
"HPB0059", "HPB0059", "HPB0059", "HPB0059", "HPB0059", "HPB0059",
"HPB0059", "HPB0059", "HPB0059", "HPB0059", "HPB0059", "HPB0059",
"HPB0061", "HPB0061", "HPB0061", "HPB0061", "HPB0061", "HPB0061",
"HPB0061", "HPB0061", "HPB0061", "HPB0061", "HPB0061", "HPB0061",
"HPB0061", "HPB0061", "HPB0061", "HPB0061", "HPB0070", "HPB0070",
"HPB0070"), date = c("10/14/2020", "10/22/2020", "10/27/2020",
"11/3/2020", "11/11/2020", "11/18/2020", "11/26/2020", "12/2/2020",
"12/9/2020", "12/17/2020", "12/22/2020", "12/29/2020", "1/6/2021",
"1/11/2021", "1/18/2021", "1/26/2021", "8/15/2020", "8/21/2020",
"8/28/2020", "9/3/2020", "9/11/2020", "9/18/2020", "9/25/2020",
"10/2/2020", "10/9/2020", "10/16/2020", "10/23/2020", "10/30/2020",
"11/6/2020", "11/13/2020", "11/20/2020", "11/27/2020", "8/6/2020",
"8/12/2020", "8/20/2020", "8/26/2020", "9/4/2020", "9/12/2020",
"9/18/2020", "9/26/2020", "10/2/2020", "10/10/2020", "10/16/2020",
"10/22/2020", "10/28/2020", "11/3/2020", "11/11/2020", "11/19/2020",
"8/12/2020", "8/19/2020", "8/26/2020", "9/1/2020", "9/8/2020",
"9/16/2020", "9/23/2020", "9/30/2020", "10/7/2020", "10/14/2020",
"10/20/2020", "10/28/2020", "11/4/2020", "11/11/2020", "11/18/2020",
"11/25/2020", "8/6/2020", "8/15/2020", "8/20/2020", "8/26/2020",
"9/3/2020", "9/9/2020", "9/17/2020", "9/23/2020", "9/30/2020",
"10/7/2020", "10/14/2020", "10/20/2020", "8/28/2020", "9/1/2020",
"9/8/2020", "9/15/2020", "9/22/2020", "9/29/2020", "10/6/2020",
"10/13/2020", "10/20/2020", "11/3/2020", "11/17/2020", "12/1/2020",
"12/10/2020", "8/20/2020", "8/27/2020", "9/2/2020", "9/10/2020",
"9/16/2020", "9/24/2020", "10/2/2020", "10/9/2020", "10/14/2020",
"10/22/2020", "10/31/2020", "11/6/2020", "11/11/2020", "11/18/2020",
"11/26/2020", "12/3/2020", "8/18/2020", "8/25/2020", "9/2/2020",
"9/10/2020", "9/16/2020", "9/23/2020", "9/30/2020", "10/6/2020",
"10/13/2020", "10/20/2020", "10/27/2020", "11/2/2020", "11/11/2020",
"11/16/2020", "11/23/2020", "11/30/2020", "8/13/2020", "8/20/2020",
"8/27/2020", "9/4/2020", "9/9/2020", "9/16/2020", "9/24/2020",
"10/2/2020", "10/8/2020", "10/16/2020", "10/23/2020", "10/28/2020",
"11/3/2020", "11/11/2020", "11/18/2020", "11/26/2020", "8/14/2020",
"8/21/2020", "8/28/2020", "9/4/2020", "9/10/2020", "9/18/2020",
"9/24/2020", "10/1/2020", "10/9/2020", "10/16/2020", "10/23/2020",
"10/29/2020", "11/6/2020", "11/12/2020", "11/20/2020", "11/27/2020",
"10/1/2020", "10/8/2020", "10/16/2020", "10/21/2020", "10/28/2020",
"11/6/2020", "11/11/2020", "11/18/2020", "11/26/2020", "12/3/2020",
"12/11/2020", "12/16/2020", "1/2/2021", "1/7/2021", "1/12/2021",
"1/24/2021", "8/12/2020", "8/22/2020", "8/27/2020", "9/5/2020",
"9/11/2020", "9/18/2020", "9/26/2020", "10/1/2020", "10/10/2020",
"10/16/2020", "10/21/2020", "10/28/2020", "11/4/2020", "11/11/2020",
"11/19/2020", "11/26/2020", "9/21/2020", "9/29/2020", "10/8/2020",
"10/16/2020", "10/23/2020", "10/28/2020", "11/5/2020", "11/12/2020",
"11/17/2020", "11/25/2020", "11/30/2020", "12/7/2020", "12/15/2020",
"12/21/2020", "12/28/2020", "1/6/2021", "8/18/2020", "8/25/2020",
"8/31/2020", "9/7/2020", "9/16/2020", "9/22/2020", "9/29/2020",
"10/5/2020", "10/13/2020", "10/20/2020", "10/28/2020", "11/2/2020",
"11/9/2020", "11/17/2020", "11/24/2020", "12/16/2020", "8/22/2020",
"8/29/2020", "9/3/2020"), enroll_date = c("10/7/2020", "10/7/2020",
"10/7/2020", "10/7/2020", "10/7/2020", "10/7/2020", "10/7/2020",
"10/7/2020", "10/7/2020", "10/7/2020", "10/7/2020", "10/7/2020",
"10/7/2020", "10/7/2020", "10/7/2020", "10/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "7/30/2020", "7/30/2020",
"7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020",
"7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020",
"7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020", "8/4/2020",
"8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020",
"8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020", "8/4/2020",
"8/4/2020", "8/4/2020", "8/4/2020", "7/30/2020", "7/30/2020",
"7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020",
"7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020", "7/30/2020",
"8/22/2020", "8/22/2020", "8/22/2020", "8/22/2020", "8/22/2020",
"8/22/2020", "8/22/2020", "8/22/2020", "8/22/2020", "8/22/2020",
"8/22/2020", "8/22/2020", "8/22/2020", "8/13/2020", "8/13/2020",
"8/13/2020", "8/13/2020", "8/13/2020", "8/13/2020", "8/13/2020",
"8/13/2020", "8/13/2020", "8/13/2020", "8/13/2020", "8/13/2020",
"8/13/2020", "8/13/2020", "8/13/2020", "8/13/2020", "8/11/2020",
"8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020",
"8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020",
"8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020", "8/11/2020",
"8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020",
"8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020",
"8/5/2020", "8/5/2020", "8/5/2020", "8/5/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "9/24/2020", "9/24/2020", "9/24/2020",
"9/24/2020", "9/24/2020", "9/24/2020", "9/24/2020", "9/24/2020",
"9/24/2020", "9/24/2020", "9/24/2020", "9/24/2020", "9/24/2020",
"9/24/2020", "9/24/2020", "9/24/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020", "8/7/2020",
"8/7/2020", "8/7/2020", "9/16/2020", "9/16/2020", "9/16/2020",
"9/16/2020", "9/16/2020", "9/16/2020", "9/16/2020", "9/16/2020",
"9/16/2020", "9/16/2020", "9/16/2020", "9/16/2020", "9/16/2020",
"9/16/2020", "9/16/2020", "9/16/2020", "8/10/2020", "8/10/2020",
"8/10/2020", "8/10/2020", "8/10/2020", "8/10/2020", "8/10/2020",
"8/10/2020", "8/10/2020", "8/10/2020", "8/10/2020", "8/10/2020",
"8/10/2020", "8/10/2020", "8/10/2020", "8/10/2020", "8/14/2020",
"8/14/2020", "8/14/2020"), follow_days = c(7L, 15L, 20L, 27L,
35L, 42L, 50L, 56L, 63L, 71L, 76L, 83L, 91L, 96L, 103L, 111L,
8L, 14L, 21L, 27L, 35L, 42L, 49L, 56L, 63L, 70L, 77L, 84L, 91L,
98L, 105L, 112L, 7L, 13L, 21L, 27L, 36L, 44L, 50L, 58L, 64L,
72L, 78L, 84L, 90L, 96L, 104L, 112L, 8L, 15L, 22L, 28L, 35L,
43L, 50L, 57L, 64L, 71L, 77L, 85L, 92L, 99L, 106L, 113L, 7L,
16L, 21L, 27L, 35L, 41L, 49L, 55L, 62L, 69L, 76L, 82L, 6L, 10L,
17L, 24L, 31L, 38L, 45L, 52L, 59L, 73L, 87L, 101L, 110L, 7L,
14L, 20L, 28L, 34L, 42L, 50L, 57L, 62L, 70L, 79L, 85L, 90L, 97L,
105L, 112L, 7L, 14L, 22L, 30L, 36L, 43L, 50L, 56L, 63L, 70L,
77L, 83L, 92L, 97L, 104L, 111L, 8L, 15L, 22L, 30L, 35L, 42L,
50L, 58L, 64L, 72L, 79L, 84L, 90L, 98L, 105L, 113L, 7L, 14L,
21L, 28L, 34L, 42L, 48L, 55L, 63L, 70L, 77L, 83L, 91L, 97L, 105L,
112L, 7L, 14L, 22L, 27L, 34L, 43L, 48L, 55L, 63L, 70L, 78L, 83L,
100L, 105L, 110L, 122L, 5L, 15L, 20L, 29L, 35L, 42L, 50L, 55L,
64L, 70L, 75L, 82L, 89L, 96L, 104L, 111L, 5L, 13L, 22L, 30L,
37L, 42L, 50L, 57L, 62L, 70L, 75L, 82L, 90L, 96L, 103L, 112L,
8L, 15L, 21L, 28L, 37L, 43L, 50L, 56L, 64L, 71L, 79L, 84L, 91L,
99L, 106L, 128L, 8L, 15L, 20L), rolling.days = c(7L, 8L, 5L,
7L, 8L, 7L, 8L, 6L, 7L, 8L, 5L, 7L, 8L, 5L, 7L, 8L, 8L, 6L, 7L,
6L, 8L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 6L, 8L,
6L, 9L, 8L, 6L, 8L, 6L, 8L, 6L, 6L, 6L, 6L, 8L, 8L, 8L, 7L, 7L,
6L, 7L, 8L, 7L, 7L, 7L, 7L, 6L, 8L, 7L, 7L, 7L, 7L, 7L, 9L, 5L,
6L, 8L, 6L, 8L, 6L, 7L, 7L, 7L, 6L, 6L, 4L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 14L, 14L, 14L, 9L, 7L, 7L, 6L, 8L, 6L, 8L, 8L, 7L, 5L,
8L, 9L, 6L, 5L, 7L, 8L, 7L, 7L, 7L, 8L, 8L, 6L, 7L, 7L, 6L, 7L,
7L, 7L, 6L, 9L, 5L, 7L, 7L, 8L, 7L, 7L, 8L, 5L, 7L, 8L, 8L, 6L,
8L, 7L, 5L, 6L, 8L, 7L, 8L, 7L, 7L, 7L, 7L, 6L, 8L, 6L, 7L, 8L,
7L, 7L, 6L, 8L, 6L, 8L, 7L, 7L, 7L, 8L, 5L, 7L, 9L, 5L, 7L, 8L,
7L, 8L, 5L, 17L, 5L, 5L, 12L, 5L, 10L, 5L, 9L, 6L, 7L, 8L, 5L,
9L, 6L, 5L, 7L, 7L, 7L, 8L, 7L, 5L, 8L, 9L, 8L, 7L, 5L, 8L, 7L,
5L, 8L, 5L, 7L, 8L, 6L, 7L, 9L, 8L, 7L, 6L, 7L, 9L, 6L, 7L, 6L,
8L, 7L, 8L, 5L, 7L, 8L, 7L, 22L, 8L, 7L, 5L), disease_detection = c(0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, NA, 0L, 0L,
NA, NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L,
NA, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, NA, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-220L))
此表示假设您的选项 1(归零滚动天数)是首选解决方案,尽管可以应用相同的逻辑来创建选项 2。
它涉及创建一个临时列,然后使用
tidyr::fill()
填充 follow_days 中的值,其中疾病检测 == 1。一旦有了这个临时列,case_when()
用于将相关的 moving.days 值归零。
请注意,如果您有日期值,最好将它们存储为 Date 类。我为此目的使用了
lubridate()
。如果您想保留原始日期字符串,只需创建两个新的日期类列并将它们用于排序/分析等。
library(dplyr)
library(lubridate)
library(tidyr)
data_processed <- data |>
mutate(across(contains("date"), ~ mdy(.))) |>
arrange(site_specific, date) |>
group_by(site_specific) |>
mutate(tmp = ifelse(disease_detection == 1, follow_days, NA)) |>
fill(tmp, .direction = "down") |>
mutate(rolling.days = case_when(is.na(tmp) ~ rolling.days,
follow_days - tmp <= 60 ~ 0,
.default = rolling.days)) |>
ungroup() |>
select(-tmp)
data_processed
# # A tibble: 220 × 6
# site_specific date enroll_date follow_days rolling.days disease_detection
# <chr> <date> <date> <int> <dbl> <int>
# 1 HPB0002 2020-10-14 2020-10-07 7 7 0
# 2 HPB0002 2020-10-22 2020-10-07 15 8 0
# 3 HPB0002 2020-10-27 2020-10-07 20 5 0
# 4 HPB0002 2020-11-03 2020-10-07 27 7 0
# 5 HPB0002 2020-11-11 2020-10-07 35 8 0
# 6 HPB0002 2020-11-18 2020-10-07 42 7 0
# 7 HPB0002 2020-11-26 2020-10-07 50 0 1
# 8 HPB0002 2020-12-02 2020-10-07 56 0 0
# 9 HPB0002 2020-12-09 2020-10-07 63 0 0
# 10 HPB0002 2020-12-17 2020-10-07 71 0 0
# ℹ 210 more rows
# ℹ Use `print(n = ...)` to see more rows
filter(data_processed, site_specific == "HPB0026")
# # A tibble: 16 × 6
# site_specific date enroll_date follow_days rolling.days disease_detection
# <chr> <date> <date> <int> <dbl> <int>
# 1 HPB0026 2020-08-13 2020-08-05 8 8 0
# 2 HPB0026 2020-08-20 2020-08-05 15 7 0
# 3 HPB0026 2020-08-27 2020-08-05 22 7 0
# 4 HPB0026 2020-09-04 2020-08-05 30 8 0
# 5 HPB0026 2020-09-09 2020-08-05 35 5 0
# 6 HPB0026 2020-09-16 2020-08-05 42 0 1
# 7 HPB0026 2020-09-24 2020-08-05 50 0 1
# 8 HPB0026 2020-10-02 2020-08-05 58 0 1
# 9 HPB0026 2020-10-08 2020-08-05 64 0 1
# 10 HPB0026 2020-10-16 2020-08-05 72 0 1
# 11 HPB0026 2020-10-23 2020-08-05 79 0 0
# 12 HPB0026 2020-10-28 2020-08-05 84 0 0
# 13 HPB0026 2020-11-03 2020-08-05 90 0 1
# 14 HPB0026 2020-11-11 2020-08-05 98 0 0
# 15 HPB0026 2020-11-18 2020-08-05 105 0 0
# 16 HPB0026 2020-11-26 2020-08-05 113 0 0