让我们考虑下面的数据框
dat0
。pivot_wider
,我可以将其转置为 dat1
表,然后为每组 biomarker
导出 csv(link):
dat1 <- pivot_wider(dat0, names_from = analyser, values_from = c(result)) # then
dat1 %>%
group_by(biomarker) %>%
group_walk(~ write_csv(.x, paste0(.y$biomarker, ".csv")))
但是,我希望在所有最终 CSV 中:
NA
单元格显示为空感谢您的帮助
初始数据
dat0
# A tibble: 30 x 4
id biomarker analyser result
<chr> <chr> <chr> <dbl>
1 0240307415 BICAR A1 27
2 0240307373 BICAR A1 23
3 0240326970 AMMO A2 657
4 0240326976 AMMO A2 603
5 0240322181 HAPTO A3 6
6 0240322185 HAPTO A3 4
7 0240322191 HAPTO A3 4
8 0240326976 AMMO B1 744
9 0240326970 AMMO B1 623
10 0240326981 AMMO B1 550
11 0240326975 AMMO B1 490
12 0240307415 BICAR B1 28
13 0240307415 BICAR B2 27
14 0240307373 BICAR B2 23
15 0240322181 HAPTO B2 5
16 0240322185 HAPTO B2 5
17 0240322191 HAPTO B2 4
18 0240326976 AMMO C1 772
19 0240326970 AMMO C1 649
20 0240326981 AMMO C1 570
21 0240326975 AMMO C1 509
22 0240307415 BICAR C1 29
23 0240252218 BICAR C1 25
24 0240307417 BICAR C1 23
25 0240307415 BICAR C2 28
26 0240307373 BICAR C2 23
27 0240322181 HAPTO C2 5
28 0240322185 HAPTO C2 5
29 0240322191 HAPTO C2 4
30 0240322151 HAPTO C2 4
dput
dat0 <-
structure(list(id = c("0240307415", "0240307373", "0240326970",
"0240326976", "0240322181", "0240322185", "0240322191", "0240326976",
"0240326970", "0240326981", "0240326975", "0240307415", "0240307415",
"0240307373", "0240322181", "0240322185", "0240322191", "0240326976",
"0240326970", "0240326981", "0240326975", "0240307415", "0240252218",
"0240307417", "0240307415", "0240307373", "0240322181", "0240322185",
"0240322191", "0240322151"), biomarker = c("BICAR", "BICAR",
"AMMO", "AMMO", "HAPTO", "HAPTO", "HAPTO", "AMMO", "AMMO", "AMMO",
"AMMO", "BICAR", "BICAR", "BICAR", "HAPTO", "HAPTO", "HAPTO",
"AMMO", "AMMO", "AMMO", "AMMO", "BICAR", "BICAR", "BICAR", "BICAR",
"BICAR", "HAPTO", "HAPTO", "HAPTO", "HAPTO"), analyser = c("A1",
"A1", "A2", "A2", "A3", "A3", "A3", "B1", "B1", "B1", "B1", "B1",
"B2", "B2", "B2", "B2", "B2", "C1", "C1", "C1", "C1", "C1", "C1",
"C1", "C2", "C2", "C2", "C2", "C2", "C2"), result = c(27, 23,
657, 603, 6, 4, 4, 744, 623, 550, 490, 28, 27, 23, 5, 5, 4, 772,
649, 570, 509, 29, 25, 23, 28, 23, 5, 5, 4, 4)), row.names = c(NA,
-30L), class = "data.frame")
一种选择是先
group_nest
,然后pivot_wider
,然后迭代每个嵌套框架以删除所有NA
列。
dat1 <- dat0 |>
group_nest(biomarker) |>
mutate(data = lapply(data, function(d) pivot_wider(d, names_from = analyser, values_from = result)))
dat1 |>
group_by(biomarker) |>
group_walk(~ write_csv(.x$data[[1]], paste0(.y$biomarker, ".csv")))
制作过程:
list.files(pattern = "csv$")
# [1] "AMMO.csv" "BICAR.csv" "HAPTO.csv"
lapply(list.files(pattern = "csv$"), readr::read_csv, show_col_types = FALSE)
# [[1]]
# # A tibble: 4 × 4
# id A2 B1 C1
# <chr> <dbl> <dbl> <dbl>
# 1 0240326970 657 623 649
# 2 0240326976 603 744 772
# 3 0240326981 NA 550 570
# 4 0240326975 NA 490 509
# [[2]]
# # A tibble: 4 × 6
# id A1 B1 B2 C1 C2
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 0240307415 27 28 27 29 28
# 2 0240307373 23 NA 23 NA 23
# 3 0240252218 NA NA NA 25 NA
# 4 0240307417 NA NA NA 23 NA
# [[3]]
# # A tibble: 4 × 4
# id A3 B2 C2
# <chr> <dbl> <dbl> <dbl>
# 1 0240322181 6 5 5
# 2 0240322185 4 5 5
# 3 0240322191 4 4 4
# 4 0240322151 NA NA 4
(我知道我正在将 base-
lapply
与 purrr
函数混合在一起......老习惯。purrr
变体没有任何问题:-)