将两个嵌套的tibble变量相乘

问题描述 投票:2回答:1

我正在尝试在两个嵌套的小对象上映射一个mutate函数。

数据看起来像:

# A tibble: 6 x 3
# Groups:   .id [6]
  .id   glancemodels      annualised_exp_slope
  <chr> <list>            <list>              
1 TMO   <tibble [1 × 11]> <tibble [1 × 6]>    
2 XRAY  <tibble [1 × 11]> <tibble [1 × 6]>    
3 EMN   <tibble [1 × 11]> <tibble [1 × 6]>    
4 STZ   <tibble [1 × 11]> <tibble [1 × 6]>    
5 ABBV  <tibble [1 × 11]> <tibble [1 × 6]>    
6 CMCSA <tibble [1 × 11]> <tibble [1 × 6]>

当我将两个嵌套的小动作都嵌套时,我得到:

> d %>% 
+   unnest(glancemodels)
# A tibble: 6 x 13
# Groups:   .id [6]
  .id   r.squared adj.r.squared  sigma statistic  p.value    df logLik    AIC    BIC
  <chr>     <dbl>         <dbl>  <dbl>     <dbl>    <dbl> <int>  <dbl>  <dbl>  <dbl>
1 TMO      0.964         0.964  0.0404   20148.  0.           2  1350. -2695. -2681.
2 XRAY     0.152         0.150  0.165      134.  1.08e-28     2   290.  -575.  -561.
3 EMN      0.0301        0.0288 0.121       23.3 1.67e- 6     2   522. -1037. -1023.
4 STZ      0.0616        0.0604 0.112       49.4 4.73e-12     2   581. -1155. -1141.
5 ABBV     0.123         0.122  0.164      106.  2.72e-23     2   295.  -584.  -571.
6 CMCSA    0.326         0.325  0.0862     363.  2.23e-66     2   779. -1553. -1539.
# … with 3 more variables: deviance <dbl>, df.residual <int>, annualised_exp_slope <list>
> d %>% 
+   unnest(annualised_exp_slope)
# A tibble: 6 x 8
# Groups:   .id [6]
  .id   glancemodels      term    estimate  std.error statistic  p.value annualised_slope
  <chr> <list>            <chr>      <dbl>      <dbl>     <dbl>    <dbl>            <dbl>
1 TMO   <tibble [1 × 11]> date   0.000661  0.00000466    142.   0.                 0.180 
2 XRAY  <tibble [1 × 11]> date  -0.000220  0.0000190     -11.6  1.08e-28          -0.0536
3 EMN   <tibble [1 × 11]> date  -0.0000675 0.0000140      -4.83 1.67e- 6          -0.0167
4 STZ   <tibble [1 × 11]> date   0.0000909 0.0000129       7.03 4.73e-12           0.0230
5 ABBV  <tibble [1 × 11]> date   0.000194  0.0000189      10.3  2.72e-23           0.0497
6 CMCSA <tibble [1 × 11]> date   0.000189  0.00000994     19.1  2.23e-66           0.0485

[我想做的是将r.squared小标题中的glancemodelsannualised_slope小标题中的相应annualised_exp_slope相乘。

我可以使用以下方式在小数乘法中进行操作:

d %>% 
  mutate(
    new_data = map(
      glancemodels, ~.x %>% 
        mutate(
          new_col = r.squared * adj.r.squared
      )
    )
  ) %>% 
  unnest(new_data) %>% 
  select(new_col)

但是,我不知道如何在多个小节之间进行多重选择。

数据:

d <- structure(list(.id = c("TMO", "XRAY", "EMN", "STZ", "ABBV", "CMCSA"
), glancemodels = list(structure(list(r.squared = 0.964019230974359, 
    adj.r.squared = 0.963971384207037, sigma = 0.0404111568255498, 
    statistic = 20148.0535665064, p.value = 0, df = 2L, logLik = 1350.4433018847, 
    AIC = -2694.8866037694, BIC = -2681.01042666538, deviance = 1.22806232017634, 
    df.residual = 752L), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(r.squared = 0.151543842550593, 
    adj.r.squared = 0.150415576383772, sigma = 0.164863447653688, 
    statistic = 134.315684549489, p.value = 0.000000000000000000000000000108369480260803, 
    df = 2L, logLik = 290.310528800025, AIC = -574.621057600049, 
    BIC = -560.744880496025, deviance = 20.4393271919398, df.residual = 752L), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    r.squared = 0.0300722110583662, adj.r.squared = 0.0287824134666884, 
    sigma = 0.121320068267404, statistic = 23.315449844537, p.value = 0.00000166609147101428, 
    df = 2L, logLik = 521.551241799896, AIC = -1037.10248359979, 
    BIC = -1023.22630649577, deviance = 11.0683563412344, df.residual = 752L), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    r.squared = 0.0616192510242292, adj.r.squared = 0.06037140428357, 
    sigma = 0.112177359727858, statistic = 49.3804639756275, 
    p.value = 0.00000000000473148838568907, df = 2L, logLik = 580.62793934711, 
    AIC = -1155.25587869422, BIC = -1141.3797015902, deviance = 9.46298754670592, 
    df.residual = 752L), row.names = c(NA, -1L), class = c("tbl_df", 
"tbl", "data.frame")), structure(list(r.squared = 0.123255529055469, 
    adj.r.squared = 0.122089645450489, sigma = 0.163787969689822, 
    statistic = 105.718554175606, p.value = 0.0000000000000000000000271981905314741, 
    df = 2L, logLik = 295.245321328204, AIC = -584.490642656408, 
    BIC = -570.614465552384, deviance = 20.1735272593657, df.residual = 752L), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    r.squared = 0.325731092124442, adj.r.squared = 0.324834457938437, 
    sigma = 0.0861924295216946, statistic = 363.28203542612, 
    p.value = 0.00000000000000000000000000000000000000000000000000000000000000000222500611035618, 
    df = 2L, logLik = 779.306063600262, AIC = -1552.61212720052, 
    BIC = -1538.7359500965, deviance = 5.58670944995293, df.residual = 752L), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))), annualised_exp_slope = list(
    structure(list(term = "date", estimate = 0.000661238317613535, 
        std.error = 0.0000046584502718324, statistic = 141.943839480643, 
        p.value = 0, annualised_slope = 0.179758291051453), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
        term = "date", estimate = -0.000220256089065116, std.error = 0.000019004854917986, 
        statistic = -11.5894643771612, p.value = 0.000000000000000000000000000108369480260797, 
        annualised_slope = -0.0535754462309787), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
        term = "date", estimate = -0.000067529685400456, std.error = 0.0000139853334919056, 
        statistic = -4.82860744361517, p.value = 0.0000016660914710153, 
        annualised_slope = -0.0167407118618857), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
        term = "date", estimate = 0.0000908705126223421, std.error = 0.0000129313955097491, 
        statistic = 7.02712344957929, p.value = 0.00000000000473148838568977, 
        annualised_slope = 0.0229776386777905), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
        term = "date", estimate = 0.000194132294619269, std.error = 0.0000188808777540866, 
        statistic = 10.28195283862, p.value = 0.0000000000000000000000271981905314649, 
        annualised_slope = 0.0497300896568584), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
        term = "date", estimate = 0.000189378758195995, std.error = 0.00000993594784897057, 
        statistic = 19.0599589565696, p.value = 0.00000000000000000000000000000000000000000000000000000000000000000222500611035929, 
        annualised_slope = 0.0484833480535469), row.names = c(NA, 
    -1L), class = c("tbl_df", "tbl", "data.frame")))), row.names = c(NA, 
-6L), groups = structure(list(.id = c("ABBV", "CMCSA", "EMN", 
"STZ", "TMO", "XRAY"), .rows = structure(list(5L, 6L, 3L, 4L, 
    1L, 2L), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr"
))), row.names = c(NA, 6L), class = c("tbl_df", "tbl", "data.frame"
), .drop = FALSE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
))
r dplyr tibble
1个回答
2
投票

我们可以使用map2遍历list列的两个tibble,提取“ r.squared”,“ annualised_slope”并进行乘法运算

library(dplyr)
library(purrr)
d %>%
   ungroup %>%
   mutate(new_data = map2_dbl(glancemodels, annualised_exp_slope, 
          ~ .x$r.squared * .y$annualised_slope))
# A tibble: 6 x 4
#  .id   glancemodels      annualised_exp_slope  new_data
#  <chr> <list>            <list>                   <dbl>
#1 TMO   <tibble [1 × 11]> <tibble [1 × 6]>      0.173   
#2 XRAY  <tibble [1 × 11]> <tibble [1 × 6]>     -0.00812 
#3 EMN   <tibble [1 × 11]> <tibble [1 × 6]>     -0.000503
#4 STZ   <tibble [1 × 11]> <tibble [1 × 6]>      0.00142 
#5 ABBV  <tibble [1 × 11]> <tibble [1 × 6]>      0.00613 
#6 CMCSA <tibble [1 × 11]> <tibble [1 × 6]>      0.0158  
© www.soinside.com 2019 - 2024. All rights reserved.