我在开发一个函数时遇到了麻烦,该函数旨在通过标识符将阳性结果后 21 天的“有”列(即收集之间的天数)清零。此时,我不关心将 POSITVO 行清零,只关心 positivo 后 21 天内的收集日期。我添加了“想要”栏,但任何接近的栏目都会很棒!下面的数据以及我在下面尝试使用 chatgtp 的内容:
感谢您的帮助!
dput(df)
structure(list(result = c("Negativo", "Negativo", "Negativo",
"Negativo", "Negativo", "Positivo", "Positivo", "Negativo", "Negativo",
"Negativo", "Negativo", "Negativo", "Negativo", "Negativo", "Negativo",
"Negativo", "Negativo", "Negativo", "Negativo", "Negativo", "Negativo",
"Negativo", "Negativo", "Positivo", "Negativo", "Negativo", "Negativo",
"Negativo", "Negativo", "Negativo", "Negativo"), identifier = c("a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "b", "b"), date_collection = structure(c(1631059200,
1631664000, 1632355200, 1632787200, 1634601600, 1635292800, 1635811200,
1636416000, 1637107200, 1637712000, 1638144000, 1638748800, 1639440000,
1640044800, 1640736000, 1640044800, 1640649600, 1641254400, 1641859200,
1643241600, 1645142400, 1645660800, 1646352000, 1646784000, 1647388800,
1648080000, 1648512000, 1649203200, 1649721600, 1650326400, 1650931200
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), have = c(0,
7, 8, 5, 21, 8, 6, 7, 8, 7, 5, 7, 8, 7, 8, 0, 7, 7, 7, 16, 22,
6, 8, 5, 7, 8, 5, 8, 6, 7, 7), want = c(0, 7, 8, 5, 21, 8, 6,
0, 0, 1, 5, 7, 8, 7, 8, 0, 7, 7, 7, 16, 22, 6, 8, 5, 0, 0, 0,
8, 6, 7, 7)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-31L))
我尝试了下面的代码,但它没有正确运行,既没有找到正确的变量名称,也没有将许多观察值归零。
set_days_at_risk <- function(df) {
for (person_id in unique(df$person)) {
person_df <- df %>% filter(person == person_id)
for (i in 1:(nrow(person_df) - 1)) {
if (person_df$result[i] == "positive") {
end_date <- person_df$date_of_collection[i] + days(21)
person_df$days_at_risk[person_df$date_of_collection > person_df$date_of_collection[i] & person_df$date_of_collection <= end_date] <- 0
}
}
df[df$person == person_id, "days_at_risk"] <- person_df$days_at_risk
}
return(df)
}
我相信您的“想要”列中有错误,因为 df[7,5] 应该为 0?此外,此代表假设“21 天内”==“< 21 days". If this is not the case, comment and I will update this answer.
逐步的方法:
library(dplyr)
df1 <- df |>
group_by(identifier) |>
mutate(tmp = if_else(result == "Negativo", 0, 1),
tmp = cumsum(tmp != lag(tmp, default = 1)),
tmp = if_else(tmp > 1, 2, 1),
tmp1 = as.integer(difftime(date_collection,
lag(date_collection),
units = "days")),
tmp1 = if_else(is.na(tmp1), 0, tmp1)) |>
group_by(identifier, tmp) |>
mutate(days = as.integer(difftime(date_collection,
first(date_collection),
units = "days")),
want = case_when(tmp == 1 ~ tmp1,
tmp == 2 & days != 0 & days < 21 ~ 0,
.default = tmp1)) |>
ungroup() |>
select(result, identifier, date_collection, want)
data.frame(df1)
# result identifier date_collection want
# 1 Negativo a 2021-09-08 0
# 2 Negativo a 2021-09-15 7
# 3 Negativo a 2021-09-23 8
# 4 Negativo a 2021-09-28 5
# 5 Negativo a 2021-10-19 21
# 6 Positivo a 2021-10-27 8
# 7 Positivo a 2021-11-02 0
# 8 Negativo a 2021-11-09 0
# 9 Negativo a 2021-11-17 8
# 10 Negativo a 2021-11-24 7
# 11 Negativo a 2021-11-29 5
# 12 Negativo a 2021-12-06 7
# 13 Negativo a 2021-12-14 8
# 14 Negativo a 2021-12-21 7
# 15 Negativo a 2021-12-29 8
# 16 Negativo b 2021-12-21 0
# 17 Negativo b 2021-12-28 7
# 18 Negativo b 2022-01-04 7
# 19 Negativo b 2022-01-11 7
# 20 Negativo b 2022-01-27 16
# 21 Negativo b 2022-02-18 22
# 22 Negativo b 2022-02-24 6
# 23 Negativo b 2022-03-04 8
# 24 Positivo b 2022-03-09 5
# 25 Negativo b 2022-03-16 0
# 26 Negativo b 2022-03-24 0
# 27 Negativo b 2022-03-29 0
# 28 Negativo b 2022-04-06 8
# 29 Negativo b 2022-04-12 6
# 30 Negativo b 2022-04-19 7
# 31 Negativo b 2022-04-26 7