在纵向数据集中,如何根据随时间变化的第二个诊断变量创建状态变量 0/1

问题描述 投票:0回答:1

我致力于研究性传播疾病,例如淋病及其可能的后果,例如女性宫外孕。我的数据集 (df_gono) 包含按年份类别、年龄类别、性别和艾滋病毒状况分类的纵向流行病学数据。它还具有一个变量,用于计算每个age_cat、year_cat 和 HIV 状态的淋病诊断数量(从 0 到 n 的整数)。 我想创建一个变量来告知患者的“淋病状态”,患者在给定的age_cat和year_cat中至少被诊断一次后立即服用1。换句话说,对于每个患者,无论何时 变量 n_gono 大于 0,该行和后续行的 status_gono 取 1。结果应为 df_gono2。

df_gono <- data.frame(patient = c("A", "A", "A", "A", "A","A", "B", "B", "B", "C", "C", "C", "C", "C", "D", "D", "D", "D", "E", "E", "E"), 
                      sex = c("Female", "Female", "Female", "Female", "Female", "Female", "Female","Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female"), 
                      age_current_cat = c(5, 5, 6, 6, 6, 7, 7, 8, 8, 5, 6, 6, 7, 7, 3, 3, 4, 4, 6, 7, 7), 
                      calyear_current_cat = c(2, 2, 2, 3, 4, 4, 2, 2, 3, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 4, 4), 
                      age_cat = c("30-34","30-34", "35-39", "35-39", "35-39", "40-44", "40-44", "45-49", "45-49", "30-34", "35-39", "35-39", "40-44", "40-44", "20-24", "20-24", "25-29", "25-29", "35-39", "40-44", "40-44"), 
                      year_cat = c("2011-2013", "2011-2013", "2011-2013", "2014-2017", "2018-2020", "2018-2020", "2011-2013", "2011-2013", "2014-2017", "2011-2013", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2018-2020", "2018-2020", "2018-2020"), 
                      hiv = c(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
                      pat_year = c(0.05475702, 2.52635181, 0.41957563, 3.00068446, 1.57973990, 1.91649555, 0.08555784, 2.91512663, 2.08076660, 0.08418891, 2.91649555, 2.08350445, 0.91718001, 3.49623546, 3.00068446, 0.83299110, 2.16769336, 0.99657769, 0.25188227, 0.18343600, 2.22861054),
                      n_gono = c(0, 1, 0, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 7, 1, 1, 0, 0, 2, 7))

df_gono2 <- data.frame(patient = c("A", "A", "A", "A", "A","A", "B", "B", "B", "C", "C", "C", "C", "C", "D", "D", "D", "D", "E", "E", "E"), 
                       sex = c("Female", "Female", "Female", "Female", "Female", "Female", "Female","Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female"), 
                       age_current_cat = c(5, 5, 6, 6, 6, 7, 7, 8, 8, 5, 6, 6, 7, 7, 3, 3, 4, 4, 6, 7, 7), 
                       calyear_current_cat = c(2, 2, 2, 3, 4, 4, 2, 2, 3, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 4, 4), 
                       age_cat = c("30-34","30-34", "35-39", "35-39", "35-39", "40-44", "40-44", "45-49", "45-49", "30-34", "35-39", "35-39", "40-44", "40-44", "20-24", "20-24", "25-29", "25-29", "35-39", "40-44", "40-44"), 
                       year_cat = c("2011-2013", "2011-2013", "2011-2013", "2014-2017", "2018-2020", "2018-2020", "2011-2013", "2011-2013", "2014-2017", "2011-2013", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2018-2020", "2018-2020", "2018-2020"), 
                       hiv = c(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
                       pat_year = c(0.05475702, 2.52635181, 0.41957563, 3.00068446, 1.57973990, 1.91649555, 0.08555784, 2.91512663, 2.08076660, 0.08418891, 2.91649555, 2.08350445, 0.91718001, 3.49623546, 3.00068446, 0.83299110, 2.16769336, 0.99657769, 0.25188227, 0.18343600, 2.22861054),
                       n_gono = c(0, 1, 0, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 7, 1, 1, 0, 0, 2, 7),
                       status_gono = c(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1))
r time status longitudinal
1个回答
0
投票

尝试这个解决方案,您需要安装

dplyr
tidyr
功能:

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tidyr)

df_gono2 <- data.frame(patient = c("A", "A", "A", "A", "A","A", "B", "B", "B", "C", "C", "C", "C", "C", "D", "D", "D", "D", "E", "E", "E"), 
                       sex = c("Female", "Female", "Female", "Female", "Female", "Female", "Female","Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female", "Female"), 
                       age_current_cat = c(5, 5, 6, 6, 6, 7, 7, 8, 8, 5, 6, 6, 7, 7, 3, 3, 4, 4, 6, 7, 7), 
                       calyear_current_cat = c(2, 2, 2, 3, 4, 4, 2, 2, 3, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 4, 4), 
                       age_cat = c("30-34","30-34", "35-39", "35-39", "35-39", "40-44", "40-44", "45-49", "45-49", "30-34", "35-39", "35-39", "40-44", "40-44", "20-24", "20-24", "25-29", "25-29", "35-39", "40-44", "40-44"), 
                       year_cat = c("2011-2013", "2011-2013", "2011-2013", "2014-2017", "2018-2020", "2018-2020", "2011-2013", "2011-2013", "2014-2017", "2011-2013", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2011-2013", "2014-2017", "2014-2017", "2018-2020", "2018-2020", "2018-2020", "2018-2020"), 
                       hiv = c(0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
                       pat_year = c(0.05475702, 2.52635181, 0.41957563, 3.00068446, 1.57973990, 1.91649555, 0.08555784, 2.91512663, 2.08076660, 0.08418891, 2.91649555, 2.08350445, 0.91718001, 3.49623546, 3.00068446, 0.83299110, 2.16769336, 0.99657769, 0.25188227, 0.18343600, 2.22861054),
                       n_gono = c(0, 1, 0, 2, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 7, 1, 1, 0, 0, 2, 7),
                       status_gono = c(0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1))

df_gono2 <- df_gono2 |> 
  group_by(patient) |> 
  mutate(status_gono2 = ifelse(n_gono >= 1, 1, NA)) |> 
  fill(status_gono2, .direction = "down") |> 
  replace_na(list (status_gono2 = 0))

select(df_gono2, status_gono, status_gono2) |> 
  print(n = 30)
#> Adding missing grouping variables: `patient`
#> # A tibble: 21 × 3
#> # Groups:   patient [5]
#>    patient status_gono status_gono2
#>    <chr>         <dbl>        <dbl>
#>  1 A                 0            0
#>  2 A                 1            1
#>  3 A                 1            1
#>  4 A                 1            1
#>  5 A                 1            1
#>  6 A                 1            1
#>  7 B                 0            0
#>  8 B                 1            1
#>  9 B                 1            1
#> 10 C                 0            0
#> 11 C                 0            0
#> 12 C                 0            0
#> 13 C                 0            0
#> 14 C                 0            0
#> 15 D                 1            1
#> 16 D                 1            1
#> 17 D                 1            1
#> 18 D                 1            1
#> 19 E                 0            0
#> 20 E                 1            1
#> 21 E                 1            1

创建于 2024-08-27,使用 reprex v2.1.0

© www.soinside.com 2019 - 2024. All rights reserved.