我正在进行系统评价,并进行了包含平均值 (SD) 和中位数 (IQR) 的研究组合。我正在尝试创建一个函数,使用 Wan 方法(2014)将中位数(IQR)转换为均值(SD)。我用“数字”m 标记了中位数数据(例如
34m
),IQR 是一个范围(例如 16-45
)。
这是一个数据集:
library(tidyverse)
data <- tibble(
study = c(1,2,3),
age_median = c("52m", "42m", "69m"),
age_iqr = c("18-74", "38-55", "60-91"),
number = c(65,30,45)
)
我可以使用以下语法根据中位数和 IQR(使用 Wan 方法)计算平均值和标准差:
# Calculate Age Mean from Median
data <- data |>
separate_wider_delim(age_median, "m", names = c("age_median", NA)) |>
separate_wider_delim(age_iqr, "-", names = c("age_iqr_low", "age_iqr_high"), too_few = "debug") |>
mutate_at(c("age_median", "age_iqr_low", "age_iqr_high"), as.numeric) |>
mutate(age_mean = (age_median + age_iqr_low + age_iqr_high)/3)
# Calculate Age SD from IQR
data <- data |>
mutate(age_sd = (age_iqr_high- age_iqr_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
我正在尝试将其包装成一个函数以提供额外的干预措施:
# Function to convert median into mean
med_to_mean <- function(median_col, iqr_col){
data |>
separate_wider_delim({{median_col}}, "m", names := c("median_col", NA)) |>
separate_wider_delim({{iqr_col}}, "-", names := c("iqr_col_low", "iqr_col_high"), too_few = "debug") |>
mutate_at(c("{{median_col}}", "iqr_col_low", "iqr_col_high"), as.numeric) |>
mutate({{median_col}} := (median_col + iqr_col_low + iqr_col_high)/3)
return({{median_col}})
# Calculate Age SD from IQR
data <- data |>
mutate({{iqr_col}} := (iqr_col_high- iqr_col_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
return({{iqr_col}})
}
当我尝试使用该功能时:
data <- data |> med_to_mean(age_median, age_iqr)
我不断收到错误:
Error in med_to_mean(age_median, age_iqr) :
unused argument (age_iqr)
我知道这可能很简单,但我们将非常感谢您的帮助。
干杯, 本
你的尝试已经很接近了;也许尝试这些微小的改变?
library(tidyverse)
data <- tibble(
study = c(1,2,3),
age_median = c("52m", "42m", "69m"),
age_iqr = c("18-74", "38-55", "60-91"),
number = c(65,30,45)
)
example_output <- data |>
separate_wider_delim(age_median, "m", names = c("age_median", NA)) |>
separate_wider_delim(age_iqr, "-", names = c("age_iqr_low", "age_iqr_high"), too_few = "debug") |>
mutate_at(c("age_median", "age_iqr_low", "age_iqr_high"), as.numeric) |>
mutate(age_mean = (age_median + age_iqr_low + age_iqr_high)/3) |>
mutate(age_sd = (age_iqr_high- age_iqr_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
#> Warning: Debug mode activated: adding variables `age_iqr_ok`, `age_iqr_pieces`, and
#> `age_iqr_remainder`.
med_to_mean <- function(data, median_col, iqr_col){
transformed_data <- data |>
separate_wider_delim({{median_col}}, "m", names = c("median_col", NA)) |>
separate_wider_delim({{iqr_col}}, "-", names = c("iqr_col_low", "iqr_col_high"), too_few = "debug") |>
mutate(across(c(median_col, iqr_col_low, iqr_col_high), as.numeric)) |>
mutate(mean_col = (median_col + iqr_col_low + iqr_col_high)/3) |>
mutate(iqr_col = (iqr_col_high- iqr_col_low)/(2*qnorm((0.75*number - 0.125)/( number+ 0.25), 0,1)))
return(transformed_data)
}
funciton_output <- data |> med_to_mean(median_col = age_median, iqr_col = age_iqr)
#> Warning: Debug mode activated: adding variables `age_iqr_ok`, `age_iqr_pieces`, and
#> `age_iqr_remainder`.
all.equal(example_output, funciton_output)
#> [1] "Names: 5 string mismatches"
example_output
#> # A tibble: 3 × 11
#> study age_median age_iqr_low age_iqr_high age_iqr age_iqr_ok age_iqr_pieces
#> <dbl> <dbl> <dbl> <dbl> <chr> <lgl> <int>
#> 1 1 52 18 74 18-74 TRUE 2
#> 2 2 42 38 55 38-55 TRUE 2
#> 3 3 69 60 91 60-91 TRUE 2
#> # ℹ 4 more variables: age_iqr_remainder <chr>, number <dbl>, age_mean <dbl>,
#> # age_sd <dbl>
funciton_output
#> # A tibble: 3 × 11
#> study median_col iqr_col_low iqr_col_high age_iqr age_iqr_ok age_iqr_pieces
#> <dbl> <dbl> <dbl> <dbl> <chr> <lgl> <int>
#> 1 1 52 18 74 18-74 TRUE 2
#> 2 2 42 38 55 38-55 TRUE 2
#> 3 3 69 60 91 60-91 TRUE 2
#> # ℹ 4 more variables: age_iqr_remainder <chr>, number <dbl>, mean_col <dbl>,
#> # iqr_col <dbl>
创建于 2024-08-20,使用 reprex v2.1.0
注意。这部分代码显示列名称不同(例如“age_median”更改为“median_col”),但值是相同的:
all.equal(example_output, funciton_output)
#> [1] "Names: 5 string mismatches"
这能解决你的问题吗?