我有数据框 df_have。我想创建数据框 df_want,在其中保留 flag 的值来填充缺失值,直到 flag 将其值更改为每个患者 ID 的非缺失值。我尝试了函数 fill( ,.direction=down) ,但是,这会导致在所有观察中延续先前的值。一旦 FLAG=="Y",我想停止填充 FLAG 列。
USUBJID <- c(1,1,1,1,1,1, 2,2,2,2,2,2 ,3,3,3,3,3,3)
FLAG <- c("N", NA, NA, "Y", NA, NA, "N", NA, NA, "Y", NA, NA, "N", NA, "Y", NA, NA, NA)
df_have<-data.frame(USUBJID, FLAG)
df_have
USUBJID <- c(1,1,1,1,1,1, 2,2,2,2,2,2 ,3,3,3,3,3,3)
FLAG <- c("N", "N", "N", "Y", NA, NA, "N", "N", "N", "Y", NA, NA, "N", "N", "Y", NA, NA, NA)
df_want<-data.frame(USUBJID, FLAG)
df_want
foo <- function(x) {
first_value <- NA_character_
for (i in seq_along(x)) {
if (is.na(first_value) && !is.na(x[i])) {
first_value <- x[i]
} else if (!is.na(first_value) && is.na(x[i])) {
x[i] <- first_value
} else {
break
}
}
x
}
df_have |>
group_by(USUBJID) |>
mutate(FLAG2 = foo(FLAG))
USUBJID FLAG FLAG2
# <dbl> <chr> <chr>
# 1 1 N N
# 2 1 NA N
# 3 1 NA N
# 4 1 Y Y
# 5 1 NA NA
# 6 1 NA NA
# 7 2 N N
# 8 2 NA N
# 9 2 NA N
# 10 2 Y Y
# 11 2 NA NA
# 12 2 NA NA
# 13 3 N N
# 14 3 NA N
# 15 3 Y Y
# 16 3 NA NA
# 17 3 NA NA
# 18 3 NA NA