在R(二项式GLM)中挖掘了许多全局模型,如何比较模型之间的结果?

问题描述 投票:0回答:1

我正在对大量参数进行数据探索(包括总结参数的多种方法)。我正在尝试对测试数据进行预测的变量进行优先级排序。我想使用 dredge 检查所有可能的组合(以及双向交互),以缩小变量范围,以便更深入地探索。复杂的是,我有不同类别的多个变量:饮食、海面温度 (SST) 和 NAO,并且只想一次在模型中包含每个类别的一个变量。最终我想看看海表温度的方差是否比平均值等更有用,所以我不希望它们在同一个模型中(并且它们也可能高度相关)。目前,我已经为每个变量组合创建了全局模型,并对全局模型进行了挖掘。我想查看我评估的每个全局模型中顶级模型和 2 AICc 内所有模型的输出。首先,我只想查看每个顶级模型的模型公式和 AIC 列表。然后我希望能够访问顶级模型的摘要以进行进一步探索。

所以我的问题是如何保存所需的输出,以及我的方法是否是构建模型和挖掘模型的最佳方法。谢谢!

我的数据名为

Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev
(前 5 行):

structure(list(DP = c(1, 1, 0, 0, 0), HI = structure(c(2L, 2L, 
2L, 2L, 2L), levels = c("MR", "MSI", "SINWR"), class = "factor"), 
    NIP = c(1, 1, 1, 0, 0), PP = structure(c(2L, 1L, 1L, 1L, 
    1L), levels = c("0", "1"), class = "factor"), MPFHMOAP = c(0.750452915231012, 
    0.453965698407607, 0.441925608384321, 0.722610207052164, 
    0.441925608384321), MHVHMOAP = c(-0.0524813927346641, 1.01282281910627, 
    0.757232590524714, 0.99109247578557, 0.757232590524714), 
    MLVHMOAP = c(0.73056230124052, 1.59501115848116, 0.598198676596868, 
    0.449493863725604, 0.598198676596868), MPFHMORAP = c(0.523489867811386, 
    0.192245391224712, 0.178793838876015, 0.492383153005024, 
    0.178793838876015), MHVHMORAP = c(-0.329148414641852, 0.788691843441836, 
    0.520497046213404, 0.765889856632941, 0.520497046213404), 
    MLVHMORAP = c(0.688793519232913, 1.70845779144675, 0.532663451086749, 
    0.357258065885503, 0.532663451086749), MPFAAP = c(0.43070772291489, 
    1.13584913195813, -0.426234961686277, -0.203429863689975, 
    -0.426234961686277), MHVAAP = c(-0.0579325416076462, 0.849585831032181, 
    0.121001819024069, -0.201997642218822, 0.121001819024069), 
    MLVAAP = c(-1.35485853425042, 0.484252451276881, 0.768258290849834, 
    1.48519986147912, 0.768258290849834), MPFVAP = c(-0.244597407127982, 
    1.27217782014326, 0.247421666481899, 0.774014226262795, 0.247421666481899
    ), MHVVAP = c(-0.37754544487536, 0.758649910763721, 0.681804652562789, 
    0.887267156622061, 0.681804652562789), MLVVAP = c(-0.572980090345317, 
    1.62888586457038, -0.0115436727418632, 0.0160827313504334, 
    -0.0115436727418632), SSTANAP = c(1.07683379079566, -1.38798265446896, 
    -0.559237858951006, 0.0697567434994638, -0.559237858951006
    ), SSTVNAP = c(2.09070187356431, 0.144862160369841, -0.853503337708263, 
    -1.68867970657618, -0.853503337708263), SSTABAAP = c(0.173411254232863, 
    -1.11203442428642, -0.473195106714148, 0.827784658554605, 
    -0.473195106714148), SSTVBAAP = c(-0.629121122035084, 0.392528189012985, 
    0.379784122762904, 0.490232696930265, 0.379784122762904), 
    NAOA = c(0.897128780719261, -0.204661067596483, -1.061094353052, 
    -0.824996528412911, -1.061094353052), NAOV = c(-0.028733326081941, 
    0.855031609762443, -0.028481970866968, 0.766860571745026, 
    -0.028481970866968)), row.names = c(NA, -5L), class = c("tbl_df", 
"tbl", "data.frame"))

我运行的代码的子集(我的完整数据有大约 70 个全局模型):

# Build global models for all combinations of diet/sst/nao 
model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTANAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))

model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTVNAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))

model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRsatr_autoglm=glm(DP~(HI+MPFHMOAP+SSTABAAP+NAOA)^2, data = Model5_Diet_EnvData_SummarizedByRegionHY_RecruitmentSummaries_SubAbbrev, family = binomial(link = "logit"))


# Dredge global models
options(na.action = "na.fail") # Required for dredge to run

model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTANAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)

model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTVNAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)

model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRdredge=dredge(model5_diet_HI_MPFHMOAP_SSTABAAP_NAOA_DRsatr_autoglm, beta = "none", evaluate = TRUE, rank = AICc)

options(na.action = "na.omit") # set back to default

期望的输出: |型号|工商局 | | -------- | -------------- | | DP ~ HI + NAOA + SSTANAP + HI:NAOA + HI:SSTANAP + NAOA:SSTANAP + 1 | 809.3 | 809.3 | DP ~ HI + MPFHMOAP + NAOA + HI:MPFHMOAP + HI:NAOA + 1 | 810.6 | 810.6 | DP ~ HI + MPFHMOAP + NAOA + SSTABAAP + HI:MPFHMOAP + HI:NAOA + HI:SSTABAAP + 1 | 810.8 |

r modeling glm
1个回答
0
投票

这是一种获取所需输出的方法,其中包含指定为字符的模型和随附的 AICc 值。

您的理论问题(即,这是一个好方法吗)更适合交叉验证

options(na.action = "na.fail")

library(dplyr)
library(MuMIn)
library(purrr)
library(tidyr)

# a list containing the global models
list(
  global_model_1 = glm(mpg ~ (cyl + disp + hp + drat)^2, data = mtcars),
  global_model_2 = glm(mpg ~ (wt + qsec + vs + am)^2, data = mtcars)
) %>% 
  # for each global model dredge and make a tibble consisting of model and AICc
  map(\(global_model) {
    global_model %>% 
      dredge() %>% 
      as_tibble() %>%
      # keep only predictor columns and AICc by deselecting all others
      select(-c("(Intercept)", df, logLik, delta, weight)) %>% 
      rowwise() %>% 
      mutate(
        # for predictor columns replace value with name of column if not missing
        across(-AICc, ~ if_else(!is.na(.), cur_column(), NA_character_)),
        intercept = 1
      ) %>% 
      # collapse values in predictor columns by removing NAs and separate with +
      unite(model, -AICc, sep = " + ", remove = TRUE, na.rm = TRUE)
  }) %>% 
  # row bind resulting list of tibbles
  list_rbind() %>% 
  # sort by AICc
  arrange(AICc)
#> # A tibble: 226 × 2
#>    model                                           AICc
#>    <chr>                                          <dbl>
#>  1 am + qsec + wt + am:wt + 1                      148.
#>  2 am + qsec + wt + am:wt + qsec:wt + 1            149.
#>  3 am + qsec + wt + am:qsec + am:wt + 1            151.
#>  4 am + qsec + vs + wt + am:wt + 1                 151.
#>  5 am + qsec + wt + am:qsec + am:wt + qsec:wt + 1  152.
#>  6 am + qsec + vs + wt + am:wt + qsec:wt + 1       153.
#>  7 am + qsec + vs + wt + am:wt + qsec:vs + 1       154.
#>  8 am + qsec + wt + am:qsec + 1                    154.
#>  9 am + qsec + vs + wt + am:qsec + am:wt + 1       154.
#> 10 am + qsec + vs + wt + am:vs + am:wt + 1         155.
#> # ℹ 216 more rows

创建于 2024-09-04,使用 reprex v2.1.1

© www.soinside.com 2019 - 2024. All rights reserved.