我正在尝试使用
tbl_svysummary
函数按 gtsummary 中的分类变量生成加权平均调查分数表。
如何使用调查权重、使用
tbl_svysummary
功能来执行此操作?我已经浏览了两个与 Daniel Sjoberg 提供的解决方案很接近的示例,但我只是不太明白它们发生了什么。我可以通过调换 by
中的 include
和 tbl_svysummary
参数来做到这一点,但随后我遇到了一个全新的问题,即表格方向错误。
这是一些示例数据:
dat <- structure(list(uuid = c("p41112019021430", "p41222013024584",
"p41212017017560", "p41212017011700", "p41212019022003", "p41212019133026",
"p41212017014434", "p41112019023063", "p41212019077561", "p41212017050030"
), age_cat = structure(c(3L, 1L, 4L, 2L, 2L, 3L, 3L, 2L, 1L,
4L), levels = c("18-24", "25-44", "45-64", "65-74", "75+"), class = "factor"),
cvh_score = c(6, 4, 1, 3, 0, 2, 3, 2, 6, 1), weights = c(p41112019021430 = 0.360602284454939,
p41222013024584 = 5.00004172246093, p41212017017560 = 0.276025143197602,
p41212017011700 = 1.55086389757734, p41212019022003 = 2.20669366738008,
p41212019133026 = 0.878664071962474, p41212017014434 = 1.15252329666968,
p41112019023063 = 1.51638372307208, p41212019077561 = 2.1408232841115,
p41212017050030 = 0.282529671403006)), row.names = c(NA,
-10L), class = "data.frame")
dat_svy <- dat |>
srvyr::as_survey_design(
ids = uuid,
weights = weights
)
我可以使用
tbl_continuous
函数在没有权重的情况下实现我想要的目标,如下所示:
tbl_01 <- gtsummary::tbl_continuous(
dat,
variable = cvh_score,
include = c(age_cat),
statistic = list(
everything() ~ "{mean} ({sd})"
)
)
tbl_01
这给了我以下内容:
如何用重量做到这一点?
weighted_mean <- dat_svy |>
filter(!is.na(age_cat)) |>
group_by(age_cat) |>
summarize(mean = survey_mean(cvh_score), sd = survey_sd(cvh_score)) |>
transmute(
row_type = "level",
label = age_cat,
stat_3 = str_glue("{round(mean, 2)} ({round(sd, 2)})")
) |>
add_row(
row_type = "label",
label = "Age",
stat_3 = NA,
.before = 1L
)
table <- tbl_svysummary(
dat_svy,
include = c("age_cat"),
label = list(
age_cat ~ "Age"
),
statistic = list(
all_categorical() ~ "{n}"
),
percent = "row",
missing = "no"
) |>
modify_table_body(
~ .x %>%
dplyr::left_join(
weighted_mean,
by = c("row_type", "label")
)
) |>
modify_column_unhide(columns = c("stat_3")) |>
modify_column_hide(columns = c("stat_0")) |>
modify_header(
label = "**Characteristic**",
stat_3 = "**CVH Score**, Mean (SD)"
)