我有一个数据框,其中有一列 NA。我需要用另一个表中的值替换这些 NA。这是一个相对简单的问题,已在此处解决:将一个 DF 中的 NA 替换为另一个 DF 中的值
我将其归结为一个非常简单的表示并得到错误:
Error in `[<-`:
! Can't use NA as column index in a tibble for assignment.
Backtrace:
1. base::`[<-`(...)
2. tibble:::`[<-.tbl_df`(...)
代表:
# This is the data with NAs that need to be replaced
to_be_replaced <- structure(list(id = c("20", "21", "22", "23"
), df = c(NA_real_, NA_real_, NA_real_, NA_real_), factor = c("a",
"a", "a", "a")), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
# These are the replacement data
to_insert <- structure(list(id = structure(20:23, levels = c("1",
"2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"), class = "factor"), df_min = c(1000, 1450, NA, NA
), df = c(60000, 90000, NA, NA)), row.names = c(NA, -4L), class = c("tbl_df",
"tbl", "data.frame"))
这是我尝试使用
match
to_be_replaced[match(to_insert$id, to_be_replaced$id), match(names(to_insert), names(to_be_replaced))] <- to_insert
Error in `[<-`(`*tmp*`, match(to_insert$id, to_be_replaced$id), match(names(to_insert), :
9.
stop(fallback)
8.
signal_abort(cnd, .file)
7.
abort(x, class, ..., call = call, parent = parent, use_cli_format = TRUE)
6.
tibble_abort(call = call, "Can't use NA as column index in a tibble for assignment.")
5.
abort_assign_columns_non_na_only(call)
4.
vectbl_as_new_col_index(j, xo, j_arg, names2(value), value_arg,
call = call)
3.
tbl_subassign(x, i, j, value, i_arg, j_arg, substitute(value))
2.
`[<-.tbl_df`(`*tmp*`, match(to_insert$id, to_be_replaced$id),
match(names(to_insert), names(to_be_replaced)), value = structure(list(
id = c("20", "21", "22", "23"), df_min = c(1000, 1450,
NA, NA), df = c(60000, 90000, NA, NA)), row.names = c(NA, ...
1.
`[<-`(`*tmp*`, match(to_insert$id, to_be_replaced$id), match(names(to_insert),
names(to_be_replaced)), value = structure(list(id = c("20",
"21", "22", "23"), df_min = c(1000, 1450, NA, NA), df = c(60000,
90000, NA, NA)), row.names = c(NA, -4L), class = c("tbl_df", ...
我也尝试过使用
replace
,结果好坏参半
# This doesn't work
to_be_replaced |>
mutate(df = replace(df, id == "20" | id == "21", to_insert[,"df"]))
# A tibble: 4 × 3
id df factor
<chr> <list> <chr>
1 20 <dbl [4]> a
2 21 <dbl [4]> a
3 22 <dbl [1]> a
4 23 <dbl [1]> a
# This works but throws an error?
to_be_replaced |>
mutate(df = replace(df, id == "20" | id == "21", to_insert$df))
# A tibble: 4 × 3
id df factor
<chr> <dbl> <chr>
1 20 60000 a
2 21 90000 a
3 22 NA a
4 23 NA a
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `df = replace(df, id == "20" | id == "21", to_insert$df)`.
Caused by warning in `x[list] <- values`:
! number of items to replace is not a multiple of replacement length
match(names(to_insert), names(to_be_replaced))
给出 NA
。正如错误所述,在进行替换时不能将 NA
作为列的索引。intersect
:
cv <- intersect(names(to_be_replaced), names(to_insert))
rv <- match(to_insert$id, to_be_replaced$id)
to_be_replaced[rv, cv] <- to_insert[cv]
to_be_replaced
# id df factor
#1 20 60000 a
#2 21 90000 a
#3 22 NA a
#4 23 NA a