我正在对我的数据应用 Zeroinfl negbin 回归。具体来说,我的因变量是计数变量(中心性度量),而我的自变量/控件都是二元且连续的。
我已经在 R 中对数据进行了回归,但现在为了计算 p 值(因为 Y 是中心性度量),我将 Y 随机化 10,000 次,并重新计算 10,000 个 Zeroinfl negbin 以生成系数的分布。然而,在该过程结束时,会弹出一些警告,包括:
我的数据集由 177 个观察值组成。
这是我正在使用的代码:
library(pscl)
dput(variables_meetQ1)
structure(list(name = c("N0003", "N0005", "N0007", "N0010", "N0011",
"N0013", "N0014", "N0015", "N0020", "N0021", "N0024", "N0025",
"N0033", "N0034", "N0035", "N0039", "N0041", "N0042", "N0043",
"N0044", "N0045", "N0046", "N0047", "N0048", "N0051", "N0053",
"N0055", "N0057", "N0058", "N0060", "N0063", "N0064", "N0067",
"N0069", "N0071", "N0074", "N0075", "N0077", "N0078", "N0079",
"N0080", "N0082", "N0083", "N0089", "N0095", "N0096", "N0102",
"N0107", "N0109", "N0110", "N0113", "N0114", "N0115", "N0116",
"N0119", "N0121", "N0123", "N0125", "N0127", "N0128", "N0133",
"N0135", "N0136", "N0139", "N0143", "N0146", "N0156", "N0157",
"N0158", "N0160", "N0161", "N0162", "N0163", "N0177", "N0183",
"N0186", "N0187", "N0204", "N0230", "N0251", "N0259", "N0263",
"N0264", "N0284", "N0287", "N0300", "N0327", "N0341", "N0346",
"N0373", "N0393", "N0399", "N0431", "N0439", "N0441", "N0443",
"N0447", "N0451", "N0469", "N0480", "N0481", "N0504", "N0537",
"N0542", "N0552", "N0553", "N0570", "N0571", "N0572", "N0581",
"N0582", "N0583", "N0609", "N0610", "N0626", "N0629", "N0646",
"N0663", "N0664", "N0675", "N0767", "N0782", "N0783", "N0792",
"N0801", "N0811", "N0829", "N0831", "N0833", "N0856", "N0879",
"N0890", "N0893", "N0900", "N0905", "N0934", "N0939", "N0947",
"N0955", "N1112", "N1119", "N1123", "N1149", "N1181", "N1190",
"N1196", "N1366", "N1367", "N0832", "N1325", "N0874", "N1145",
"N0466", "N0239", "N0768", "N0052", "N0409", "N0619", "N0679",
"N0295", "N0478", "N0023", "N0076", "N0200", "N0475", "N0242",
"N0203", "N0270", "N0292", "N1182", "N0054", "N0659", "N0026",
"N0132", "N0134", "N0322", "N0470"), Leader = c(0, 0, 0, 0, 0,
0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1,
0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0), Age_centred = c(-22.39415205, 7.605847953, 12.60584795,
9.605847953, -3.394152047, -0.394152047, 12.60584795, 1.605847953,
0.605847953, -25.39415205, -22.39415205, 12.60584795, 7.605847953,
19.60584795, 14.60584795, -16.39415205, -7.394152047, 36.60584795,
-21.39415205, 14.60584795, -10.39415205, 25.60584795, 2.605847953,
22.60584795, 6.605847953, 9.605847953, -11.39415205, -3.394152047,
2.605847953, -24.39415205, 26.60584795, 1.605847953, 3.605847953,
-2.394152047, -0.394152047, 24.60584795, -0.394152047, 0.605847953,
30.60584795, -3.394152047, 25.60584795, 0.605847953, 3.605847953,
0.605847953, 15.60584795, 4.605847953, 21.60584795, -10.39415205,
25.60584795, 10.60584795, 31.60584795, -8.394152047, -10.39415205,
-5.394152047, -2.394152047, -16.39415205, -4.394152047, -7.394152047,
40.60584795, -11.39415205, -8.394152047, 33.60584795, 25.60584795,
6.605847953, -5.394152047, 11.60584795, -26.39415205, -2.394152047,
1.605847953, 12.60584795, 2.605847953, -30.39415205, -25.39415205,
-14.39415205, -1.394152047, 11.60584795, 8.605847953, 11.60584795,
-8.394152047, -25.39415205, 3.605847953, -15.39415205, -18.39415205,
-3.394152047, 36.60584795, -6.394152047, 29.60584795, -11.39415205,
-0.394152047, -10.39415205, -26.39415205, -22.39415205, -8.394152047,
-14.39415205, -26.39415205, -26.39415205, -26.39415205, -31.39415205,
-19.39415205, -14.39415205, 10.60584795, -20.39415205, -3.394152047,
12.60584795, 16.60584795, -0.394152047, 6.605847953, -6.394152047,
-12.39415205, 7.605847953, -5.394152047, -14.39415205, 13.60584795,
-26.39415205, -3.394152047, -9.394152047, 12.60584795, -15.39415205,
-13.39415205, -24.39415205, 0.605847953, -15.39415205, -3.394152047,
-18.39415205, -1.394152047, 17.60584795, -12.39415205, -14.39415205,
-20.39415205, -5.394152047, 3.605847953, -20.39415205, -17.39415205,
0.605847953, -19.39415205, -9.394152047, -27.39415205, -27.39415205,
-16.39415205, 19.60584795, 24.60584795, 5.605847953, -3.394152047,
-11.39415205, 7.605847953, 11.60584795, -13.39415205, -13.39415205,
-16.39415205, -23.39415205, -5.394152047, 4.605847953, -23.39415205,
-4.394152047, -5.394152047, -0.394152047, 21.60584795, -22.39415205,
-0.394152047, -5.394152047, 0.605847953, -0.394152047, -5.394152047,
30.60584795, -10.39415205, 1.605847953, -9.394152047, 25.60584795,
-0.394152047, -28.39415205, -1.394152047, 28.60584795, -25.39415205,
-4.394152047, -6.394152047, -9.394152047, -16.39415205), n_kinship = c(1,
1, 0, 1, 9, 10, 7, 11, 1, 0, 3, 4, 1, 1, 0, 0, 8, 4, 0, 5, 3,
2, 1, 0, 0, 1, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1, 0, 0, 1, 3, 0, 0,
3, 1, 0, 2, 0, 0, 0, 1, 11, 7, 7, 6, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 2, 0, 3, 3, 3, 0, 0, 0, 1, 0, 0, 2, 0, 1, 0,
0, 0, 0, 3, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 3, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 4, 1, 0, 0, 0,
9, 1, 4, 2, 0, 4, 3, 3, 4, 1, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0,
3, 0, 2, 0, 4, 0, 1, 0, 7, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 6,
6, 0, 0, 2, 1, 0, 0, 0, 3, 0), n_call = c(0, 0, 0, 9, 15, 7,
0, 38, 9, 0, 3, 2, 6, 15, 13, 0, 29, 2, 0, 6, 0, 0, 0, 2, 2,
5, 0, 0, 0, 1, 23, 2, 0, 2, 0, 0, 1, 1, 6, 63, 0, 0, 0, 0, 0,
3, 0, 0, 4, 5, 23, 29, 0, 0, 0, 0, 0, 1, 0, 5, 0, 0, 3, 9, 0,
0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 20, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 7, 0, 0, 9, 0, 6, 0, 0, 0, 0,
0, 0, 0, 0, 2, 1, 0, 0, 2, 0, 0, 0, 7, 8, 3, 0, 0, 5, 0, 0, 0,
1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 2, 0, 0, 0, 0), n_met5 = c(0, 0, 1, 1, 1, 1, 0, 1, 0,
0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,
1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1,
1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0,
0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
), liaison = c(0, 0, 0, 6, 0, 0, 94, 1232, 0, 0, 0, 160, 0, 154,
0, 0, 26, 0, 0, 694, 34, 0, 0, 320, 170, 62, 0, 0, 0, 10, 30,
0, 0, 104, 0, 0, 0, 0, 354, 1658, 0, 8, 0, 0, 0, 0, 0, 0, 72,
0, 1404, 104, 0, 0, 50, 0, 0, 10, 0, 0, 0, 0, 0, 164, 0, 0, 0,
72, 578, 8, 2, 0, 0, 0, 0, 0, 6, 0, 92, 0, 0, 4, 4, 50, 0, 0,
0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA, 177L), class = "data.frame")
zinb_M4 <- zeroinfl(liaison ~ Leader + Age_centred + n_kinship + n_call
|n_met5, data = variables_meetQ1,
na.action = na.omit ,dist="negbin")
permutation_matrix <- matrix(0, nrow = length(variables_meetQ1$liaison),ncol = 10000)
for (i in 1:10000) {
permutation_matrix[,i] <- sample(variables_meetQ1$liaison,
size=length(variables_meetQ1$liaison),replace = FALSE)
}
zinb_M4_perm <- list()
for (i in 1:10000){
zinb_M4_perm[[i]] <- zeroinfl(permutation_matrix[,i] ~ Leader + Age_centred +
n_kinship + n_call | n_met5, data = variables_meetQ1,
na.action = na.omit ,dist="negbin")
}
coef_M4 <-matrix(0, nrow = 5, ncol = 10000)
for (i in 1:10000) {
coef_M4[,i] <- as.matrix(unname(zinb_M4_perm[[i]]$coefficients$count))
}
我实际上并不认为这些警告代表了一个严重的问题 - 它们将由在某些方面表现不佳的排列样本引起(例如具有complete分离,即单个数据分区中的所有零,或者具有零-通货膨胀概率估计为零[-Inf on the log-odds scale])。然而,弄清楚到底发生了什么是个好主意。
没有时间深入研究这一点,但这是调试策略的开始。
这是代码的一个稍微精简的版本,它在每次迭代时设置一个已知的种子,并在出现警告时停止,以便您可以查看该特定的排列数据集(最好存储所有排列) ,就像在您的代码中一样,然后查看它们,但是停下来看看发生了什么比标记哪些迭代会引起警告要容易一些......
nperm <- 1e4
coef_M4 <-matrix(0, nrow = 5, ncol = nperm)
options(warn=3) ## warnings to errors
for (i in seq(nperm)) {
cat(i, "\n")
set.seed(1 + i)
permsamp <- sample(variables_meetQ1$liaison,
size=nrow(variables_meetQ1),
replace = FALSE)
pfit <- zeroinfl(permsamp ~ Leader + Age_centred + n_kinship + n_call |n_met5,
data = variables_meetQ1,
na.action = na.omit ,dist="negbin")
coef_M4[,i] <- as.matrix(unname(pfit$coefficients$count))
}