我正在尝试将这个非常简单的 4 种线性 Lotka-Volterra 竞争模型拟合到观察到的数据,但由于某种原因,当我尝试
optim()
函数时,关于 deSolve
的某些内容似乎失败了。
# Data
data <- data.frame(Cod = c(0.1966126, 0.1989563, 0.2567677, 0.3158896, 0.4225435, 0.7219856,
1.0570824, 0.7266830, 0.6286763, 0.6389475),
Herring = c(1.988372, 2.788014, 3.397138, 2.557245, 2.627013, 3.045617,
3.161002, 3.531306, 3.432021, 3.617174),
Sprat = c(2.030273, 3.480469, 3.009277, 1.895996, 2.457520, 1.991211, 2.350098,
2.118164, 1.693359, 1.869141),
Flounder = c(0.4758220, 0.4425532, 0.4185687, 0.4967118, 0.7102515, 0.5733075,
0.7404255, 0.5996132, 0.6235977, 0.7187621))
# Model formulation
LLV <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
db1.dt = b1*(r1+a11*b1+a12*b2+a13*b3+a14*b4)
db2.dt = b2*(r2+a22*b2+a21*b1+a23*b3+a24*b4)
db3.dt = b3*(r3+a33*b3+a31*b1+a32*b2+a34*b4)
db4.dt = b4*(r4+a44*b4+a41*b1+a42*b2+a43*b3)
list(c(db1.dt, db2.dt, db3.dt, db4.dt))
})
}
# Model input and simulation
# Model input
params <- c(r1 = -0.342085, r2 = 0.6855681, r3 = 2.757769, r4 = 0.9744113,
a11 = -1.05973762, a12 = 0.09577309, a13 = -0.01915480, a14 = 1.36098939,
a21 = 0.17533326, a22 = -0.32247342, a23 = 0.03111628, a24 = 0.30212711,
a31 = 0.5303516, a32 = -0.4869761, a33 = -0.3194882, a34 = -1.5089027,
a41 = 0.004418133, a42 = 0.163716414, a43 = -0.237873378, a44 = -1.519158802)
ini <- c(b1 = data[1,1], b2 = data[1,2], b3 = data[1,3], b4 = data[1,4])
tmax <- 10
t <- seq(1,tmax,0.1)
# Results and first parameter guess is more or less okay
results <- deSolve::ode(y = ini, times = t, func = LLV, parms = params)
matplot(data, pch = 1)
matplot(x = results[,1], y = results[,-1], type = "l", add = TRUE)
在这里,我继续编写一个函数,最小化残差平方和,当包含在带有上述初始参数的
optim()
中时,猜测应该会产生我正在寻找的结果。
min.RSS <- function(data, params) {
output <- deSolve::ode(y = ini, times = t, func = LLV, parms = params)
predictions <- exp(output[,-1])
observations <- data
return(sum((predictions-observations)^2))
}
result <- optim(par = params, fn = min.RSS, data = data)
fit <- deSolve::ode(y = ini, times = t, func = LLV, parms = result$par)
matplot(x = fit[,1], y = fit[,-1], type = "l", lwd = 3, add = TRUE)
任何关于如何解决这个问题的想法将非常感激。
你有一个更好的适合,但你应该非常小心这个问题。我有点疯狂,并使用(开发中)fitode 包 来解决这个问题。我拟合了模型并获得了更好的拟合,还尝试在最佳拟合周围使用 100 个随机变化的起点进行拟合。 您的残差平方和为 1.19;
fitode
第一次尝试就达到了 0.29,100 次拟合中最好的结果是 RSS=0.16。 但是:这些拟合是高度不稳定的。该图显示了数据的拟合度和未来 5 个时间步长的预测 (1) 您的拟合度(虚线); (2)fitode初始拟合(虚线); (3) 其他 100 个 fiode 拟合(最佳拟合 0.05 RSS 内的拟合是实心的,比它差的则绘制得很轻)。
你可以看到样本外的预测大多是疯狂的。您的拟合实际上比一些更好的拟合更更稳定 - 它在整个社区崩溃之前到达时间步骤 13 - 但底线是在这种情况下与数据的良好拟合绝不能保证明智的答案。看起来 100 个拟合中的一个达到了预测时间序列的末尾而没有崩溃(这似乎是基于观察到的时间序列的合理合理的“常识”预测)。
为了可靠地拟合这些数据,您要么需要一个参数少得多的模型,要么需要以先验形式提供的外部信息,或者正则化——某种方式进行惩罚拟合,暗示“摇摆不定”的确定性轨迹或交互参数/增长率不合理。
## remotes::install_github("parksw3/fitode")
library(fitode)
## data with tags for fitode
data2 <- setNames(data,paste0(names(data),"_obs"))
data2 <- data.frame(times=seq(nrow(data2)),data2)
## Model formulation (for fitode)
LV_model <- odemodel(
name="4-species LV",
model=list(
Cod ~ Cod*(r1+a11*Cod+a12*Herring+a13*Sprat+a14*Flounder),
Herring ~ Herring*(r2+a22*Herring+a21*Cod+a23*Sprat+a24*Flounder),
Sprat ~ Sprat*(r3+a33*Sprat+a31*Cod+a32*Herring+a34*Flounder),
Flounder ~ Flounder*(r4+a44*Flounder+a41*Cod+a42*Herring+a43*Sprat)
),
observation=list(
Cod_obs ~ ols(mean=Cod),
Herring_obs ~ ols(mean=Herring),
Sprat_obs ~ ols(mean=Sprat),
Flounder_obs ~ ols(mean=Flounder)
),
initial=list(
Cod ~ data2$Cod_obs[1],
Herring ~ data2$Herring_obs[1],
Sprat ~ data2$Sprat_obs[1],
Flounder ~ data2$Flounder_obs[1]
),
link=setNames(rep("identity",length(pars)),pars),
par= pars
)
## plot results
plotres <- function(p,ODEint="rk",lty=1,
dt=0.1,
tvec=seq(1,10,by=dt),...) {
par(las=1, bty="l")
res <- deSolve::ode(ini, tvec, LLV, p, method=ODEint)
matplot(res[,1],res[,-1],type="l",lty=lty,...)
return(invisible(res[,-1]))
}
f1 <- fitode(
LV_model,
data=data2,
start=params,
control=list(maxit=1e5,trace=1000)
)
## fitode with multistart
ranfit <- function(n,fit,range=0.5) {
##
rpars <- params*runif(length(params),1-range,1+range)
newfit <- try(update(fit, start=rpars))
return(newfit)
}
cl <- makeCluster(10)
clusterSetRNGStream(cl = cl, 101)
clusterExport(cl, c("params","LV_model","data2"))
clusterEvalQ(cl,invisible(library(fitode)))
system.time(
multifit <- parLapply(cl, 1:100, ranfit, fit=f1, tvec=tvec)
)
saveRDS(multifit,file="SO65440448_multifit.rds")
ivec <- seq_along(multifit)
ivec <- ivec[sapply(multifit,function(x) !inherits(x,"try-error"))]
coef <- pred <- vector("list", length=length(ivec))
ll <- conv <- rep(NA,length(ivec))
for (i in seq_along(ivec)) {
nf <- multifit[[ivec[i]]]
coef[[i]] <- coef(nf)
pp <- predict(nf, times=1:10)
pred[[i]] <- cbind(times=pp[[1]][,1],
do.call(cbind,lapply(pp,"[",-1)))
ll[i] <- logLik(nf)
conv[i] <- nf@mle2@details$convergence
}
par(las=1,bty="l")
matplot(pred[[1]][,1],pred[[1]][,-1],
type="n",lty=1,ylim=c(0,6),
xlab="time",ylab="density")
lthresh <- 0.05
for (i in 1:length(pred)) {
good <- ll[i]>(max(ll)-lthresh)
alpha <- if (good) 0.8 else 0.1
lwd <- if (good) 2 else 1
matlines(pred[[i]][,1],pred[[i]][,-1],lty=1,
col=adjustcolor(palette()[1:4],alpha.f=alpha),
lwd=lwd)
}
matpoints(data2[,1],data2[,-1],pch=16,cex=3)
plotres(optimres$par,add=TRUE, lwd=3,lty=2,dt=1)
plotres(coef(f1),add=TRUE, lwd=3,lty=3,dt=1)
# Optimising parameter fit
LVmse = function(parms) {
out = as.matrix(deSolve::ode(ini, 1:10, LLV, parms, method="rk")[,-1])
RSS = sum((spp-out)^2, na.rm = TRUE) # Minimising residual sum of squares
return(RSS)
}
optimres <- optim(par = params, fn = LVmse)
# Assessment data from the Baltic Sea
ObsSSB <- data.frame(Herring = c(1932041, 1864349, 1672273, 1944689, 1905001, 1814679, 1626604, 1438139, 1520902, 1421057, 1266502, 1177136, 1090921, 1011718, 1013695, 856321, 714642, 647277, 675487, 649195, 651524, 539582, 483859, 452706, 417827, 363049, 354724, 339899, 332746, 367432, 377935, 424888, 461102, 478633, 477803, 536040, 565200, 557749, 598115, 629339, 695343, 643874, 579087, 597533, 581015, 460378, 364981),
Sprat = c(940000, 726000, 625000, 1044000, 695000, 377000, 227000, 199000, 254000, 394000, 616000, 605000, 570000, 461000, 403000, 423000, 556000, 775000, 1045000, 1360000, 1375000, 1429000, 1811000, 1777000, 1354000, 1353000, 1319000, 1196000, 942000, 829000, 1040000, 1324000, 1055000, 898000, 921000, 827000, 948000, 752000, 694000, 706000, 620000, 680000, 1077000, 1095000, 982000, 855000, 817000))
rownames(ObsSSB) <- 1974:2020
Fdata <- data.frame(Herring = c(0.1603, 0.17, 0.1579, 0.1469,0.1263, 0.1533, 0.1593, 0.1839, 0.1648, 0.2255, 0.2369, 0.2524, 0.2265, 0.2626, 0.2547, 0.3426, 0.3324, 0.3416, 0.299, 0.3367, 0.4076, 0.3872, 0.4094, 0.4564, 0.4848, 0.4127, 0.49, 0.4336, 0.3995, 0.3179, 0.2713, 0.2464, 0.2712, 0.2729, 0.2744, 0.2485, 0.2943, 0.2299, 0.1658, 0.1492, 0.2099, 0.2886, 0.3496, 0.352, 0.4474, 0.4951, 0.46),
Sprat = c(0.3696, 0.396, 0.3777, 0.336, 0.3408, 0.2627, 0.3005, 0.1828, 0.3033, 0.1358, 0.1796, 0.1659, 0.2091, 0.2689, 0.2391, 0.2155, 0.1369, 0.1734, 0.2004, 0.1625, 0.2638, 0.3485, 0.2984, 0.416, 0.3981, 0.37, 0.3193, 0.2866, 0.4008, 0.3958, 0.4933, 0.4536, 0.3848, 0.3793, 0.4115, 0.5091, 0.4403, 0.3968, 0.3462, 0.4192, 0.4341, 0.4538, 0.3455, 0.3961, 0.395, 0.3918, 0.368))
rownames(Fdata) <- 1974:2020
### Fitting a modified 2 species Lotka-Volterra model to observed SSB data ###
library(deSolve)
spp <- ObsSSB/10^6
# First guess of parameters
nospp <- ncol(spp)
data_lagged <- rbind(NA, spp) # Lagged data
db.bdt <- list()
for (i in 1:ncol(spp)) {
db.bdt[[i]] <- log(append(spp[,i], NA)/data_lagged[,i])
}
modlst <- NULL # Linear models
for(i in 1:nospp) {
moddat <- data.frame(db.bdt = db.bdt[[i]],
data_lagged)[-nrow(data_lagged),]
modlst[[i]] <- eval(parse(text = paste("lm(db.bdt~",
paste(colnames(data_lagged),
collapse = "+"), ", data=moddat)", sep = "")))
}
r_start <- unname(sapply(modlst, function(dat){coef(dat)["(Intercept)"]}))
A_start <- unname(sapply(modlst, function(dat){coef(dat)[-1]}))
# Model input
params <- c(r1 = r_start[1], r2 = r_start[2],
a11 = A_start[1,1], a12 = A_start[2,1],
a21 = A_start[1,2], a22 = A_start[2,2])
ini <- c(b1 = spp[1,1], b2 = spp[1,2])
tmax <- nrow(spp)
t <- seq(1,tmax,1)
# Model formulation
inv <- 1e-5 # Numerical fudge to avoid biomasses becoming negative.
HS2LLV <- function(time, state, parameters) {
with(as.list(c(state, parameters)), {
H1 <- H1(time)
H2 <- H2(time)
db1.dt = b1*(r1+a11*b1+a12*(b2^2))-H1*b1+inv
db2.dt = b2*(r2+a22*b2+a21*(b1^2))-H2*b2+inv
list(c(db1.dt, db2.dt))
})
}
# Fdata
matplot(as.numeric(rownames(Fdata)),Fdata, type = "l", xlab = "Time (yr)",
ylab = "Fishing Mortality (1/yr)", lty = 1)
# Fit two splines to Fdata to smooth out bumps for fit
library(splines)
Sp.fit <- array(dim = c(nrow(Fdata),ncol(Fdata)))
for (i in 1:ncol(Fdata)) {
x <- round(seq(1974,2020,length.out = 4), digits = 0)
y <- Fdata[as.character(x),i]
## Fit a couple of quadratic splines with different degrees of freedom
fit <- lm(y ~ bs(x, degree = 2, df=length(x)-1))
x0 <- seq(min(x), max(x), by = 1)
Sp.fit[,i] <- predict(fit, data.frame(x = x0))
}
matlines(1974:2020,Sp.fit, lty = 2, lwd = 2)
H1 <- approxfun(Sp.fit[,1])
H2 <- approxfun(Sp.fit[,2])
# Optimiser
LVmse = function(parms) {
out = as.matrix(deSolve::ode(ini, 1:tmax, HS2LLV, parms, method="rk4")[,-1])
RSS = sum((spp-out)^2, na.rm = TRUE)
return(RSS)
}
# Running model and optimising parameters
optimres1 <- optim(par = params, fn = LVmse)
fit1 <- deSolve::ode(y = ini, time = 1:47, func = HS2LLV, parms = optimres1$par)
# Plot
extrafont::loadfonts(device = "all")
matplot(1974:2020, ObsSSB/10^6, type = "n", xlab = "Time (years)",
ylab = "SSB (1000 x tons)", ylim = c(0,2.025))
grid()
matplot(1974:2020, ObsSSB/10^6, pch = 16, col = c("gray", "steelblue"), add = TRUE)
matplot(1974:2020, fit1[,2:3], type = "l", add = TRUE, col = c("gray", "steelblue"), lwd = 2)
legend("topright", legend = c("Herring","Sprat"),
lty = c(1,2), col = c("gray", "steelblue"), lwd = 2, bty = "n")