创建条件变量

Question

早上好，

附数据：

id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23"), 
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0")
fichier <- data.frame(id, vm, GE)

附R代码：

library(tidyverse)
fichier <- fichier %>%
mutate(statut = case_when(
  fichier$vm == 13 & fichier$GE == 1 ~ "infection",
  fichier$vm == 14 & fichier$GE == 1 ~ "infection",
  fichier$vm == 15 & fichier$GE == 1 ~ "infection",
  fichier$vm == 16 & fichier$GE == 1 ~ "infection",
  fichier$vm == 17 & fichier$GE == 1 ~ "infection",
  fichier$vm == 19 & fichier$GE == 1 ~ "infection",
  fichier$vm == 20 & fichier$GE == 1 ~ "infection",
  fichier$vm == 21 & fichier$GE == 1 ~ "infection",
  fichier$vm == 22 & fichier$GE == 1 ~ "infection",
  fichier$vm == 23 & fichier$GE == 1 ~ "infection",
  TRUE ~ "noinfection")
)

附结果：

id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
vm = c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23"), 
GE = c("0", "0", "0", "0" "0", "0", "1", "0", "1","0"), 
statut = c("noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "infection", "noinfection", "infecton", "noinfection")
fichier <- data.frame(id, vm, GE, status)

请，我希望能够在 vm = 18（月）和 vm = 24（月）时完成“状态”变量。

a) 我想在 vm = 18 时给出这个变量

- 如果在 13 至 17 个月的上次访问期间 GE = 0，则值为 0。

- 如果在 13 至 17 个月的上次访问期间 GE = 1，则值为 1。

b）我想在 vm = 24 处给出这个变量

- 如果在 19 至 22 个月的上次访问期间 GE = 0，则值为 0。

- 如果在 19 至 22 个月的上次访问期间 GE = 1，则值为 1。

提前感谢您的帮助！

Answer 1

我在您的描述中看到两种潜在的情况：

18 == 0 和 24 == 1 的“状态”为 至少一个 之前的 GE 值 == 1 在 24 的设定时间范围内
18 == 0 和 24 == NA 的“状态”，因为 并非全部 24 设定时间范围内的先前 GE 值相同

我已经为这两种情况添加了解决方案。此外，您还指出“之前访问过 19 至 22 个月”。您的意思是“19 至 23 个月之前访问过”吗？本表述假设是后者。如果这不正确，请在下面评论，我将更新解决方案。

加载所需的包和您的数据：

library(dplyr)
library(tidyr)

# Your sample data
id <- c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
vm <- c("13", "14", "15", "16", "17", "19", "20", "21", "22", "23")
GE <- c("0", "0", "0", "0", "0", "0", "1", "0", "1","0")

fichier <- data.frame(id, vm, GE)

场景一：

# Add missing data if all previous visit == 0 or any previous visits == 1
fichier <- fichier |>
  mutate(across(c(vm, GE), as.integer)) |>
  group_by(id) |> # assuming your data may have multiple id values
  complete(vm = 13:24) |>
  fill(id, .direction = "down") |>
  mutate(tmp = if_else(vm <= 18, 1, 2)) |>
  group_by(id, tmp) |>
  mutate(GE = case_when(is.na(GE) & sum(GE, na.rm = TRUE) == 0 ~ 0,
                          is.na(GE) & sum(GE, na.rm = TRUE) > 0 ~ 1,
                          .default = GE),
         status = if_else(GE == 1, "infection", "noinfection")) |>
  ungroup() |>
  select(-tmp)

fichier
# # A tibble: 12 × 4
#       id    vm    GE status     
#    <dbl> <int> <dbl> <chr>      
#  1     2    13     0 noinfection
#  2     2    14     0 noinfection
#  3     2    15     0 noinfection
#  4     2    16     0 noinfection
#  5     2    17     0 noinfection
#  6     2    18     0 noinfection
#  7     2    19     0 noinfection
#  8     2    20     1 infection  
#  9     2    21     0 noinfection
# 10     2    22     1 infection  
# 11     2    23     0 noinfection
# 12     2    24     1 infection

场景2：

# Add missing data if all previous visit == 0 or all previous visits == 1
fichier <- fichier |>
  mutate(across(c(vm, GE), as.integer)) |>
  group_by(id) |> # assuming your data may have multiple id values
  complete(vm = 13:24) |>
  fill(id, .direction = "down") |>
  mutate(tmp = if_else(vm <= 18, 1, 2)) |>
  group_by(id, tmp) |>
  mutate(tmp1 = +(n_distinct(GE, na.rm = TRUE) == 1),
         GE = case_when(is.na(GE) & tmp1 == 1 & first(GE) == 0 ~ 0,
                        is.na(GE) & tmp1 == 1 & first(GE) == 1 ~ 1,
                        .default = GE),
         status = case_when(GE == 0 ~ "noinfection",
                            GE == 1 ~ "infection",
                            .default = "mixed")) |>
  ungroup() |>
  select(-starts_with("tmp"))

fichier
# # A tibble: 12 × 4
#       id    vm    GE status     
#    <dbl> <int> <dbl> <chr>      
#  1     2    13     0 noinfection
#  2     2    14     0 noinfection
#  3     2    15     0 noinfection
#  4     2    16     0 noinfection
#  5     2    17     0 noinfection
#  6     2    18     0 noinfection
#  7     2    19     0 noinfection
#  8     2    20     1 infection  
#  9     2    21     0 noinfection
# 10     2    22     1 infection  
# 11     2    23     0 noinfection
# 12     2    24    NA mixed

创建条件变量

问题描述投票：0回答：1

1个回答

最新问题

创建条件变量

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1