如何使用该行最后一个非空单元格中的值填充数据帧每行中的空单元格?

问题描述 投票:0回答:6

我有一个包含不完整行的数据框(或数据表,如果这更容易的话):

ID Var1 Var2 Var3
1     2    5    1
2    12    3
3     8
4     4

d <- data.frame(
  ID = 1:4,
  Var1 = c(2, 12, 8, 4),
  Var2 = c(5, 3, NA, NA),
  Var3 = c(1, NA, NA, NA)
)

library(data.table)
d <- fread("
  ID Var1 Var2 Var3
  1 2 5 1
  2 12 3 NA
  3 8 NA NA
  4 4 NA NA
")

空单元格始终位于行的末尾。

我想用该行最后一个非空单元格中的值填充每行中的空单元格,例如:

ID Var1 Var2 Var3
1     2    5    1
2    12    3 -> 3
3     8 -> 8 -> 8
4     4 -> 4 -> 4

我该怎么做?


我不想使用 dplyr 并且我不想填充列

r dataframe data.table na
6个回答
4
投票

使用

collapse
包的另一个答案,它的优点是特别快(比
data.table
快):

library(collapse)
dapply(d, na_locf, MARGIN = 1)

#   ID Var1 Var2 Var3
# 1  1    2    5    1
# 2  2   12    3    3
# 3  3    8    8    8
# 4  4    4    4    4

微基准:

# Unit: microseconds
#        expr     min       lq      mean   median      uq     max neval
#    collapse    69.5   112.95   244.847   135.45   161.7  9964.4   100
#          dt   592.9   788.70  1237.643   874.70  1186.6 14563.1   100
#       tidyr 32283.2 36170.80 41293.420 40501.55 43809.1 75417.8   100
#   Reduce_dt   645.0   803.70  1083.373   954.05  1222.6  2367.5   100
#  Reduce_TiC   383.9   499.25   661.475   586.40   687.6  5179.1   100

基准代码:

microbenchmark::microbenchmark(
  collapse = dapply(d, na_locf, MARGIN = 1),
  dt = data.table::transpose(
    setnafill(data.table::transpose(d, keep.names = "ID"), 
              type = "locf", cols = 1:nrow(d) + 1), make.names = "ID"),
  tidyr = d %>% 
    pivot_longer(-ID) %>% 
    fill(value, .direction = "down") %>% 
    pivot_wider(),
  Reduce_dt = d[, (sprintf("Var%d", 1:3)) := Reduce(\(x, y) ifelse(is.na(y), x, y), .SD, accumulate = TRUE), .SDcols = sprintf("Var%d", 1:3)],
  Reduce_TiC = Reduce(\(x, y) ifelse(is.na(y), x, y), d[-1], accumulate = TRUE)
)

4
投票

转置并填补缺失,然后再次转置。

library(data.table)

transpose(
  setnafill(transpose(d, keep.names = "ID"), 
            type = "locf", cols = 1:nrow(d) + 1),
  make.names = "ID")

#       ID  Var1  Var2  Var3
#    <int> <int> <int> <int>
# 1:     1     2     5     1
# 2:     2    12     3     3
# 3:     3     8     8     8
# 4:     4     4     4     4

3
投票

这是基础方法:

(start <- structure(list(
  ID = 1:4,
  Var1 = c(2L, 12L, 8L, 4L),
  Var2 = c(5L, 3L, NA, NA),
  Var3 = c(1L, NA, NA, NA)
),
class = "data.frame",
row.names = c(NA, -4L)))


start$last <- apply(start,MARGIN = 1,FUN = \(row){row |> 
                                                  as.matrix() |> 
                                                  na.omit() |> 
                                                  tail(n=1)})

start$last

(fin <- apply(start,MARGIN = 1,FUN = \(row){ 
  n <- length(row)
  p <- ifelse(is.na(row),row[n] ,row)
  head(p,-1)
}) |> t() |> data.frame())

3
投票

A

tidyr::pivot
/
fill
采取:

library(tidyverse) # `tidyr`

# toy data
aux <- tibble::tribble(
  ~ID, ~Var1, ~Var2, ~Var3,
  1,     2,     5,     1,
  2,    12,     3,    NA,
  3,     8,    NA,    NA,
  4,     4,    NA,    NA)

# Pivot down, fill down and and pivot back
new_aux <- aux %>% 
  pivot_longer(-ID) %>% 
  fill(value, .direction = "down") %>% 
  pivot_wider()

输出:

> new_aux
# A tibble: 4 × 4
     ID  Var1  Var2  Var3
  <dbl> <dbl> <dbl> <dbl>
1     1     2     5     1
2     2    12     3     3
3     3     8     8     8
4     4     4     4     4

创建于 2024-05-28,使用 reprex v2.1.0


2
投票
d[, (cols) := Reduce(\(x, y) ifelse(is.na(y), x, y), .SD, accumulate = TRUE), .SDcols = cols]

或者

d[, (cols) := Reduce(fcoalesce, .SD, right = TRUE, accumulate = TRUE), .SDcols = cols]

输出

#       ID  Var1  Var2  Var3
#    <int> <int> <int> <int>
# 1:     1     2     5     1
# 2:     2    12     3     3
# 3:     3     8     8     8
# 4:     4     4     4     4

哪里

cols <- sprintf("Var%d", 1:3)
d <- data.table(
  ID = 1:4,
  Var1 = c(2L, 12L, 8L, 4L),
  Var2 = c(5L, 3L, 8L, 4L),
  Var3 = c(1L, 3L, 8L, 4L)
)

2
投票

您可以简单地运行

Reduce
,如下所示

> df[-1] <- Reduce(\(x, y) ifelse(is.na(y), x, y), df[-1], accumulate = TRUE)

df[-1] <- Reduce(\(x, y) rowSums(cbind(x, y), TRUE) - x * !is.na(y), df[-1], accumulate = TRUE)

这给出了

> df
  ID Var1 Var2 Var3
1  1    2    5    1
2  2   12    3    3
3  3    8    8    8
4  4    4    4    4
最新问题
© www.soinside.com 2019 - 2025. All rights reserved.