使用 Stringr 包和大括号的自定义函数

问题描述 投票:0回答:1

我正在进行系统评价,并进行了包含平均值 (SD) 和中位数 (IQR) 的研究组合。我正在尝试创建一个函数,使用 Wan 方法(2014)将中位数(IQR)转换为均值(SD)。我用“数字”m 标记了中位数数据(例如

34m
),IQR 是一个范围(例如
16-45
)。

这是一个数据集:

library(tidyverse)

data <- tibble(
  study = c(1,2,3),
  age_median = c("52m", "42m", "69m"),
  age_iqr = c("18-74", "38-55", "60-91"),
  number = c(65,30,45)
)

我可以使用以下语法根据中位数和 IQR(使用 Wan 方法)计算平均值和标准差:

# Calculate Age Mean from Median
data <- data |> 
  separate_wider_delim(age_median, "m", names = c("age_median", NA)) |> 
  separate_wider_delim(age_iqr, "-", names = c("age_iqr_low", "age_iqr_high"), too_few = "debug") |> 
  mutate_at(c("age_median", "age_iqr_low", "age_iqr_high"), as.numeric) |> 
  mutate(age_mean = (age_median + age_iqr_low + age_iqr_high)/3)

# Calculate Age SD from IQR
data <- data |> 
  mutate(age_sd = (age_iqr_high- age_iqr_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))

我正在尝试将其包装成一个函数以提供额外的干预措施:

# Function to convert median into mean
med_to_mean <- function(median_col, iqr_col){
  data |> 
  separate_wider_delim({{median_col}}, "m", names := c("median_col", NA)) |> 
  separate_wider_delim({{iqr_col}}, "-", names := c("iqr_col_low", "iqr_col_high"), too_few = "debug") |> 
  mutate_at(c("{{median_col}}", "iqr_col_low", "iqr_col_high"), as.numeric) |> 
  mutate({{median_col}} := (median_col + iqr_col_low + iqr_col_high)/3)

return({{median_col}})

# Calculate Age SD from IQR
data <- data |> 
  mutate({{iqr_col}} := (iqr_col_high- iqr_col_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))

return({{iqr_col}})
}

当我尝试使用该功能时:

data <- data |> med_to_mean(age_median, age_iqr)

我不断收到错误:

Error in med_to_mean(age_median, age_iqr) : 
  unused argument (age_iqr)

我知道这可能很简单,但我们将非常感谢您的帮助。

干杯, 本

r function dplyr stringr curly-braces
1个回答
0
投票

你的尝试已经很接近了;也许尝试这些微小的改变?

library(tidyverse)

data <- tibble(
  study = c(1,2,3),
  age_median = c("52m", "42m", "69m"),
  age_iqr = c("18-74", "38-55", "60-91"),
  number = c(65,30,45)
)

example_output <- data |> 
  separate_wider_delim(age_median, "m", names = c("age_median", NA)) |> 
  separate_wider_delim(age_iqr, "-", names = c("age_iqr_low", "age_iqr_high"), too_few = "debug") |> 
  mutate_at(c("age_median", "age_iqr_low", "age_iqr_high"), as.numeric) |> 
  mutate(age_mean = (age_median + age_iqr_low + age_iqr_high)/3) |>
  mutate(age_sd = (age_iqr_high- age_iqr_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
#> Warning: Debug mode activated: adding variables `age_iqr_ok`, `age_iqr_pieces`, and
#> `age_iqr_remainder`.

med_to_mean <- function(data, median_col, iqr_col){
  transformed_data <- data |> 
    separate_wider_delim({{median_col}}, "m", names = c("median_col", NA)) |> 
    separate_wider_delim({{iqr_col}}, "-", names = c("iqr_col_low", "iqr_col_high"), too_few = "debug") |> 
    mutate(across(c(median_col, iqr_col_low, iqr_col_high), as.numeric)) |> 
    mutate(mean_col = (median_col + iqr_col_low + iqr_col_high)/3) |>
    mutate(iqr_col = (iqr_col_high- iqr_col_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
  return(transformed_data)
}

funciton_output <- data |> med_to_mean(median_col = age_median, iqr_col = age_iqr)
#> Warning: Debug mode activated: adding variables `age_iqr_ok`, `age_iqr_pieces`, and
#> `age_iqr_remainder`.

all.equal(example_output, funciton_output)
#> [1] "Names: 5 string mismatches"
example_output
#> # A tibble: 3 × 11
#>   study age_median age_iqr_low age_iqr_high age_iqr age_iqr_ok age_iqr_pieces
#>   <dbl>      <dbl>       <dbl>        <dbl> <chr>   <lgl>               <int>
#> 1     1         52          18           74 18-74   TRUE                    2
#> 2     2         42          38           55 38-55   TRUE                    2
#> 3     3         69          60           91 60-91   TRUE                    2
#> # ℹ 4 more variables: age_iqr_remainder <chr>, number <dbl>, age_mean <dbl>,
#> #   age_sd <dbl>
funciton_output
#> # A tibble: 3 × 11
#>   study median_col iqr_col_low iqr_col_high age_iqr age_iqr_ok age_iqr_pieces
#>   <dbl>      <dbl>       <dbl>        <dbl> <chr>   <lgl>               <int>
#> 1     1         52          18           74 18-74   TRUE                    2
#> 2     2         42          38           55 38-55   TRUE                    2
#> 3     3         69          60           91 60-91   TRUE                    2
#> # ℹ 4 more variables: age_iqr_remainder <chr>, number <dbl>, mean_col <dbl>,
#> #   iqr_col <dbl>

创建于 2024-08-20,使用 reprex v2.1.0

注意。这部分代码显示列名称不同(例如“age_median”更改为“median_col”),但值是相同的:

all.equal(example_output, funciton_output)
#> [1] "Names: 5 string mismatches"

这能解决你的问题吗?

© www.soinside.com 2019 - 2024. All rights reserved.