我正在对大量参数进行数据探索(包括总结参数的多种方法)。我正在尝试对测试数据进行预测的变量进行优先级排序。我想使用 dredge 检查所有可能的组合(以及双向交互),以缩小变量范围,以便更深入地探索。复杂的是,我有不同类别的多个变量:饮食、海面温度 (SST) 和 NAO,并且只想一次在模型中包含每个类别的一个变量。最终我想看看海表温度的方差是否比平均值等更有用,所以我不希望它们在同一个模型中(并且它们也可能高度相关)。目前,我已经为每个变量组合创建了全局模型,并对全局模型进行了挖掘。我想查看我评估的每个全局模型中顶级模型和 2 AICc 内所有模型的输出。首先,我只想查看每个顶级模型的模型公式和 AIC 列表。然后我希望能够访问顶级模型的摘要以进行进一步探索。
所以我的问题是如何保存所需的输出,以及我的方法是否是构建模型和挖掘模型的最佳方法。谢谢!
我的数据名为
Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev
(前 5 行):
structure(list(DP = c(1, 1, 0, 0, 0), HI = structure(c(2L, 2L,
2L, 2L, 2L), levels = c("MR", "MSI", "SINWR"), class = "factor"),
NIP = c(1, 1, 1, 0, 0), PP = structure(c(2L, 1L, 1L, 1L,
1L), levels = c("0", "1"), class = "factor"), MPFHMOAP = c(0.750452915231012,
0.453965698407607, 0.441925608384321, 0.722610207052164,
0.441925608384321), MHVHMOAP = c(-0.0524813927346641, 1.01282281910627,
0.757232590524714, 0.99109247578557, 0.757232590524714),
MLVHMOAP = c(0.73056230124052, 1.59501115848116, 0.598198676596868,
0.449493863725604, 0.598198676596868), MPFHMORAP = c(0.523489867811386,
0.192245391224712, 0.178793838876015, 0.492383153005024,
0.178793838876015), MHVHMORAP = c(-0.329148414641852, 0.788691843441836,
0.520497046213404, 0.765889856632941, 0.520497046213404),
MLVHMORAP = c(0.688793519232913, 1.70845779144675, 0.532663451086749,
0.357258065885503, 0.532663451086749), MPFAAP = c(0.43070772291489,
1.13584913195813, -0.426234961686277, -0.203429863689975,
-0.426234961686277), MHVAAP = c(-0.0579325416076462, 0.849585831032181,
0.121001819024069, -0.201997642218822, 0.121001819024069),
MLVAAP = c(-1.35485853425042, 0.484252451276881, 0.768258290849834,
1.48519986147912, 0.768258290849834), MPFVAP = c(-0.244597407127982,
1.27217782014326, 0.247421666481899, 0.774014226262795, 0.247421666481899
), MHVVAP = c(-0.37754544487536, 0.758649910763721, 0.681804652562789,
0.887267156622061, 0.681804652562789), MLVVAP = c(-0.572980090345317,
1.62888586457038, -0.0115436727418632, 0.0160827313504334,
-0.0115436727418632), SSTANAP = c(1.07683379079566, -1.38798265446896,
-0.559237858951006, 0.0697567434994638, -0.559237858951006
), SSTVNAP = c(2.09070187356431, 0.144862160369841, -0.853503337708263,
-1.68867970657618, -0.853503337708263), SSTABAAP = c(0.173411254232863,
-1.11203442428642, -0.473195106714148, 0.827784658554605,
-0.473195106714148), SSTVBAAP = c(-0.629121122035084, 0.392528189012985,
0.379784122762904, 0.490232696930265, 0.379784122762904),
NAOA = c(0.897128780719261, -0.204661067596483, -1.061094353052,
-0.824996528412911, -1.061094353052), NAOV = c(-0.028733326081941,
0.855031609762443, -0.028481970866968, 0.766860571745026,
-0.028481970866968)), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
我运行的代码的子集(我的完整数据有大约 70 个全局模型):
# Build global models for all combinations of diet/sst/nao
model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTANAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))
model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTVNAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))
model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTABAAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))
# Dredge global models
options(na.action = "na.fail") # Required for dredge to run
model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)
model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)
model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)
options(na.action = "na.omit") # set back to default
期望的输出: |型号|工商局 | | -------- | -------------- | | DP ~ HI + NAOA + SSTANAP + HI:NAOA + HI:SSTANAP + NAOA:SSTANAP + 1 | 809.3 | 809.3 | DP ~ HI + MPFHMOAP + NAOA + HI:MPFHMOAP + HI:NAOA + 1 | 810.6 | 810.6 | DP ~ HI + MPFHMOAP + NAOA + SSTABAAP + HI:MPFHMOAP + HI:NAOA + HI:SSTABAAP + 1 | 810.8 |
这是一种获取所需输出的方法,其中包含指定为字符的模型和随附的 AICc 值。
您的理论问题(即,这是一个好方法吗)更适合交叉验证。
options(na.action = "na.fail")
library(dplyr)
library(MuMIn)
library(purrr)
library(tidyr)
# a list containing the global models
list(
global_model_1 = glm(mpg ~ (cyl + disp + hp + drat)^2, data = mtcars),
global_model_2 = glm(mpg ~ (wt + qsec + vs + am)^2, data = mtcars)
) %>%
# for each global model dredge and make a tibble consisting of model and AICc
map(\(global_model) {
global_model %>%
dredge() %>%
as_tibble() %>%
# keep only predictor columns and AICc by deselecting all others
select(-c("(Intercept)", df, logLik, delta, weight)) %>%
rowwise() %>%
mutate(
# for predictor columns replace value with name of column if not missing
across(-AICc, ~ if_else(!is.na(.), cur_column(), NA_character_)),
intercept = 1
) %>%
# collapse values in predictor columns by removing NAs and separate with +
unite(model, -AICc, sep = " + ", remove = TRUE, na.rm = TRUE)
}) %>%
# row bind resulting list of tibbles
list_rbind() %>%
# sort by AICc
arrange(AICc)
#> # A tibble: 226 × 2
#> model AICc
#> <chr> <dbl>
#> 1 am + qsec + wt + am:wt + 1 148.
#> 2 am + qsec + wt + am:wt + qsec:wt + 1 149.
#> 3 am + qsec + wt + am:qsec + am:wt + 1 151.
#> 4 am + qsec + vs + wt + am:wt + 1 151.
#> 5 am + qsec + wt + am:qsec + am:wt + qsec:wt + 1 152.
#> 6 am + qsec + vs + wt + am:wt + qsec:wt + 1 153.
#> 7 am + qsec + vs + wt + am:wt + qsec:vs + 1 154.
#> 8 am + qsec + wt + am:qsec + 1 154.
#> 9 am + qsec + vs + wt + am:qsec + am:wt + 1 154.
#> 10 am + qsec + vs + wt + am:vs + am:wt + 1 155.
#> # ℹ 216 more rows
创建于 2024-09-04,使用 reprex v2.1.1