早上好,
附数据:
id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23"),
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0")
fichier <- data.frame(id, vm, GE)
附R代码:
library(tidyverse)
fichier <- fichier %>%
mutate(statut = case_when(
fichier$vm == 13 & fichier$GE == 1 ~ "infection",
fichier$vm == 14 & fichier$GE == 1 ~ "infection",
fichier$vm == 15 & fichier$GE == 1 ~ "infection",
fichier$vm == 16 & fichier$GE == 1 ~ "infection",
fichier$vm == 17 & fichier$GE == 1 ~ "infection",
fichier$vm == 19 & fichier$GE == 1 ~ "infection",
fichier$vm == 20 & fichier$GE == 1 ~ "infection",
fichier$vm == 21 & fichier$GE == 1 ~ "infection",
fichier$vm == 22 & fichier$GE == 1 ~ "infection",
fichier$vm == 23 & fichier$GE == 1 ~ "infection",
TRUE ~ "noinfection")
)
附结果:
id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23"),
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0"),
statut = c("noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "infection", "noinfection", "infecton", "noinfection")
fichier <- data.frame(id, vm, GE, status)
请,我希望能够在 vm = 18(月)和 vm = 24(月)时完成“状态”变量。
a) 我想在 vm = 18 时给出这个变量
- 如果在 13 至 17 个月的上次访问期间 GE = 0,则值为 0。
- 如果在 13 至 17 个月的上次访问期间 GE = 1,则值为 1。
b)我想在 vm = 24 处给出这个变量
- 如果在 19 至 22 个月的上次访问期间 GE = 0,则值为 0。
- 如果在 19 至 22 个月的上次访问期间 GE = 1,则值为 1。
提前感谢您的帮助!
我在您的描述中看到两种潜在的情况:
我已经为这两种情况添加了解决方案。此外,您还指出“之前访问过 19 至 22 个月”。您的意思是“19 至 23 个月之前访问过”吗?本表述假设是后者。如果这不正确,请在下面评论,我将更新解决方案。
加载所需的包和您的数据:
library(dplyr)
library(tidyr)
# Your sample data
id <- c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
vm <- c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23")
GE <- c("0", "0", "0", "0", "0", "0", "1", "0", "1","0")
fichier <- data.frame(id, vm, GE)
场景一:
# Add missing data if all previous visit == 0 or any previous visits == 1
fichier <- fichier |>
mutate(across(c(vm, GE), as.integer)) |>
group_by(id) |> # assuming your data may have multiple id values
complete(vm = 13:24) |>
fill(id, .direction = "down") |>
mutate(tmp = if_else(vm <= 18, 1, 2)) |>
group_by(id, tmp) |>
mutate(GE = case_when(is.na(GE) & sum(GE, na.rm = TRUE) == 0 ~ 0,
is.na(GE) & sum(GE, na.rm = TRUE) > 0 ~ 1,
.default = GE),
status = if_else(GE == 1, "infection", "noinfection")) |>
ungroup() |>
select(-tmp)
fichier
# # A tibble: 12 × 4
# id vm GE status
# <dbl> <int> <dbl> <chr>
# 1 2 13 0 noinfection
# 2 2 14 0 noinfection
# 3 2 15 0 noinfection
# 4 2 16 0 noinfection
# 5 2 17 0 noinfection
# 6 2 18 0 noinfection
# 7 2 19 0 noinfection
# 8 2 20 1 infection
# 9 2 21 0 noinfection
# 10 2 22 1 infection
# 11 2 23 0 noinfection
# 12 2 24 1 infection
场景2:
# Add missing data if all previous visit == 0 or all previous visits == 1
fichier <- fichier |>
mutate(across(c(vm, GE), as.integer)) |>
group_by(id) |> # assuming your data may have multiple id values
complete(vm = 13:24) |>
fill(id, .direction = "down") |>
mutate(tmp = if_else(vm <= 18, 1, 2)) |>
group_by(id, tmp) |>
mutate(tmp1 = +(n_distinct(GE, na.rm = TRUE) == 1),
GE = case_when(is.na(GE) & tmp1 == 1 & first(GE) == 0 ~ 0,
is.na(GE) & tmp1 == 1 & first(GE) == 1 ~ 1,
.default = GE),
status = case_when(GE == 0 ~ "noinfection",
GE == 1 ~ "infection",
.default = "mixed")) |>
ungroup() |>
select(-starts_with("tmp"))
fichier
# # A tibble: 12 × 4
# id vm GE status
# <dbl> <int> <dbl> <chr>
# 1 2 13 0 noinfection
# 2 2 14 0 noinfection
# 3 2 15 0 noinfection
# 4 2 16 0 noinfection
# 5 2 17 0 noinfection
# 6 2 18 0 noinfection
# 7 2 19 0 noinfection
# 8 2 20 1 infection
# 9 2 21 0 noinfection
# 10 2 22 1 infection
# 11 2 23 0 noinfection
# 12 2 24 NA mixed