我正在尝试在两个嵌套的小对象上映射一个mutate函数。
数据看起来像:
# A tibble: 6 x 3
# Groups: .id [6]
.id glancemodels annualised_exp_slope
<chr> <list> <list>
1 TMO <tibble [1 × 11]> <tibble [1 × 6]>
2 XRAY <tibble [1 × 11]> <tibble [1 × 6]>
3 EMN <tibble [1 × 11]> <tibble [1 × 6]>
4 STZ <tibble [1 × 11]> <tibble [1 × 6]>
5 ABBV <tibble [1 × 11]> <tibble [1 × 6]>
6 CMCSA <tibble [1 × 11]> <tibble [1 × 6]>
当我将两个嵌套的小动作都嵌套时,我得到:
> d %>%
+ unnest(glancemodels)
# A tibble: 6 x 13
# Groups: .id [6]
.id r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
1 TMO 0.964 0.964 0.0404 20148. 0. 2 1350. -2695. -2681.
2 XRAY 0.152 0.150 0.165 134. 1.08e-28 2 290. -575. -561.
3 EMN 0.0301 0.0288 0.121 23.3 1.67e- 6 2 522. -1037. -1023.
4 STZ 0.0616 0.0604 0.112 49.4 4.73e-12 2 581. -1155. -1141.
5 ABBV 0.123 0.122 0.164 106. 2.72e-23 2 295. -584. -571.
6 CMCSA 0.326 0.325 0.0862 363. 2.23e-66 2 779. -1553. -1539.
# … with 3 more variables: deviance <dbl>, df.residual <int>, annualised_exp_slope <list>
> d %>%
+ unnest(annualised_exp_slope)
# A tibble: 6 x 8
# Groups: .id [6]
.id glancemodels term estimate std.error statistic p.value annualised_slope
<chr> <list> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 TMO <tibble [1 × 11]> date 0.000661 0.00000466 142. 0. 0.180
2 XRAY <tibble [1 × 11]> date -0.000220 0.0000190 -11.6 1.08e-28 -0.0536
3 EMN <tibble [1 × 11]> date -0.0000675 0.0000140 -4.83 1.67e- 6 -0.0167
4 STZ <tibble [1 × 11]> date 0.0000909 0.0000129 7.03 4.73e-12 0.0230
5 ABBV <tibble [1 × 11]> date 0.000194 0.0000189 10.3 2.72e-23 0.0497
6 CMCSA <tibble [1 × 11]> date 0.000189 0.00000994 19.1 2.23e-66 0.0485
[我想做的是将r.squared
小标题中的glancemodels
与annualised_slope
小标题中的相应annualised_exp_slope
相乘。
我可以使用以下方式在小数乘法中进行操作:
d %>%
mutate(
new_data = map(
glancemodels, ~.x %>%
mutate(
new_col = r.squared * adj.r.squared
)
)
) %>%
unnest(new_data) %>%
select(new_col)
但是,我不知道如何在多个小节之间进行多重选择。
数据:
d <- structure(list(.id = c("TMO", "XRAY", "EMN", "STZ", "ABBV", "CMCSA"
), glancemodels = list(structure(list(r.squared = 0.964019230974359,
adj.r.squared = 0.963971384207037, sigma = 0.0404111568255498,
statistic = 20148.0535665064, p.value = 0, df = 2L, logLik = 1350.4433018847,
AIC = -2694.8866037694, BIC = -2681.01042666538, deviance = 1.22806232017634,
df.residual = 752L), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(r.squared = 0.151543842550593,
adj.r.squared = 0.150415576383772, sigma = 0.164863447653688,
statistic = 134.315684549489, p.value = 0.000000000000000000000000000108369480260803,
df = 2L, logLik = 290.310528800025, AIC = -574.621057600049,
BIC = -560.744880496025, deviance = 20.4393271919398, df.residual = 752L), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
r.squared = 0.0300722110583662, adj.r.squared = 0.0287824134666884,
sigma = 0.121320068267404, statistic = 23.315449844537, p.value = 0.00000166609147101428,
df = 2L, logLik = 521.551241799896, AIC = -1037.10248359979,
BIC = -1023.22630649577, deviance = 11.0683563412344, df.residual = 752L), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
r.squared = 0.0616192510242292, adj.r.squared = 0.06037140428357,
sigma = 0.112177359727858, statistic = 49.3804639756275,
p.value = 0.00000000000473148838568907, df = 2L, logLik = 580.62793934711,
AIC = -1155.25587869422, BIC = -1141.3797015902, deviance = 9.46298754670592,
df.residual = 752L), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), structure(list(r.squared = 0.123255529055469,
adj.r.squared = 0.122089645450489, sigma = 0.163787969689822,
statistic = 105.718554175606, p.value = 0.0000000000000000000000271981905314741,
df = 2L, logLik = 295.245321328204, AIC = -584.490642656408,
BIC = -570.614465552384, deviance = 20.1735272593657, df.residual = 752L), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
r.squared = 0.325731092124442, adj.r.squared = 0.324834457938437,
sigma = 0.0861924295216946, statistic = 363.28203542612,
p.value = 0.00000000000000000000000000000000000000000000000000000000000000000222500611035618,
df = 2L, logLik = 779.306063600262, AIC = -1552.61212720052,
BIC = -1538.7359500965, deviance = 5.58670944995293, df.residual = 752L), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))), annualised_exp_slope = list(
structure(list(term = "date", estimate = 0.000661238317613535,
std.error = 0.0000046584502718324, statistic = 141.943839480643,
p.value = 0, annualised_slope = 0.179758291051453), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
term = "date", estimate = -0.000220256089065116, std.error = 0.000019004854917986,
statistic = -11.5894643771612, p.value = 0.000000000000000000000000000108369480260797,
annualised_slope = -0.0535754462309787), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
term = "date", estimate = -0.000067529685400456, std.error = 0.0000139853334919056,
statistic = -4.82860744361517, p.value = 0.0000016660914710153,
annualised_slope = -0.0167407118618857), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
term = "date", estimate = 0.0000908705126223421, std.error = 0.0000129313955097491,
statistic = 7.02712344957929, p.value = 0.00000000000473148838568977,
annualised_slope = 0.0229776386777905), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
term = "date", estimate = 0.000194132294619269, std.error = 0.0000188808777540866,
statistic = 10.28195283862, p.value = 0.0000000000000000000000271981905314649,
annualised_slope = 0.0497300896568584), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
term = "date", estimate = 0.000189378758195995, std.error = 0.00000993594784897057,
statistic = 19.0599589565696, p.value = 0.00000000000000000000000000000000000000000000000000000000000000000222500611035929,
annualised_slope = 0.0484833480535469), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame")))), row.names = c(NA,
-6L), groups = structure(list(.id = c("ABBV", "CMCSA", "EMN",
"STZ", "TMO", "XRAY"), .rows = structure(list(5L, 6L, 3L, 4L,
1L, 2L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr"
))), row.names = c(NA, 6L), class = c("tbl_df", "tbl", "data.frame"
), .drop = FALSE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))
我们可以使用map2
遍历list
列的两个tibble
,提取“ r.squared”,“ annualised_slope”并进行乘法运算
library(dplyr)
library(purrr)
d %>%
ungroup %>%
mutate(new_data = map2_dbl(glancemodels, annualised_exp_slope,
~ .x$r.squared * .y$annualised_slope))
# A tibble: 6 x 4
# .id glancemodels annualised_exp_slope new_data
# <chr> <list> <list> <dbl>
#1 TMO <tibble [1 × 11]> <tibble [1 × 6]> 0.173
#2 XRAY <tibble [1 × 11]> <tibble [1 × 6]> -0.00812
#3 EMN <tibble [1 × 11]> <tibble [1 × 6]> -0.000503
#4 STZ <tibble [1 × 11]> <tibble [1 × 6]> 0.00142
#5 ABBV <tibble [1 × 11]> <tibble [1 × 6]> 0.00613
#6 CMCSA <tibble [1 × 11]> <tibble [1 × 6]> 0.0158