请考虑以下数据框:
df <- structure(list(oID = c(37751L, 30978L, 33498L),
peId = c(12L, 13L, 14L),
last_Name = c("ABC", "DEF", "EFG"),
first_Name = c("Z", "Y", "X"),
personnel_Number = list(structure(list(hId = c(1L, 4L, 5L),
hName = c("PS", "XY", "MN"),
personnel_Number = c("0123", "1234", "98")),
class = "data.frame",
row.names = c(NA, 3L)),
structure(list(hId = 1L, hName = "PS",
personnel_Number = "0987"),
class = "data.frame",
row.names = 1L),
structure(list(),
names = character(0),
row.names = integer(0),
class = "data.frame")),
ls_Role = list(structure(list(functionId = c(1L, 5L),
`function` = c("function A", "function B"),
function_Short = c("FA", "FB")),
class = "data.frame",
row.names = 1:2),
structure(list(functionId = 6L,
`function` = "function A",
function_Short = "FA"),
class = "data.frame",
row.names = 1L),
structure(list(functionId = 6L,
`function` = "function A",
function_Short = "FA"),
class = "data.frame",
row.names = 1L))),
row.names = c(1L, 2L, 3L),
class = "data.frame")
如您所见,数据框包含两个列表列,每个列都包含基于行的数据框。我想以一种将它们的“内部”数据帧放入宽格式的方式取消嵌套每个列表列,即所有列都重复与内部数据帧每行包含的行数一样多。
因此,对于“personnel_number”列,我希望返回 9 列(hId_1、_2、_3;hName_1、_2 等)。 ls_Role 列也是如此。
我知道如何通过取消嵌套然后大量重塑整个数据框来手动完成此操作,但我想知道是否有一种更简洁的方法可以更自动地执行此操作或减少函数调用,最好以 tidyverse 的方式。
这是我当前的代码(也反映了我的预期输出):
library(tidyverse)
df |>
mutate(id = row_number()) |>
unnest_longer(col = personnel_Number, keep_empty = TRUE) |>
unpack(cols = personnel_Number) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(hId, hName, personnel_Number),
names_from = id_inner) |>
unnest_longer(col = ls_Role, keep_empty = TRUE) |>
unpack(cols = ls_Role) |>
mutate(id_inner = row_number(), .by = id) |>
pivot_wider(values_from = c(functionId, "function", function_Short),
names_from = id_inner)
解决您问题的一种可能方法。该解决方案基于 data.table 包中的函数
library(data.table)
fun = function(dfs) {
lst = vector("list", length(dfs))
for(i in seq_along(dfs)) {
d = setDT(copy(dfs[[i]]))
lst[[i]] = if(nrow(d)) dcast(d, . ~ seq_len(nrow(d)), value.var=names(d))
else d = data.table(. = ".")
}
rbindlist(lst, fill=T)[, . := NULL][]
}
df2 = do.call(cbind, list(df, fun(df$ls_Role), fun(df$personnel_Number)))
oID peId last_Name ... functionId_1 functionId_2 function_1 function_2 function_Short_1 function_Short_2 hId_1 hId_2 hId_3 hName_1 hName_2 hName_3 personnel_Number_1 personnel_Number_2 personnel_Number_3
1 37751 12 ABC ... 1 5 function A function B FA FB 1 4 5 PS XY MN 0123 1234 98
2 30978 13 DEF ... 6 NA function A <NA> FA <NA> 1 NA NA PS <NA> <NA> 0987 <NA> <NA>
3 33498 14 EFG ... 6 NA function A <NA> FA <NA> NA NA NA <NA> <NA> <NA> <NA> <NA> <NA>