通过使用 optim() 最小化残差平方和来优化参数,将模型拟合到观测数据

问题描述 投票:0回答:3

我正在尝试将这个非常简单的 4 种线性 Lotka-Volterra 竞争模型拟合到观察到的数据,但由于某种原因,当我尝试

optim()
函数时,关于
deSolve
的某些内容似乎失败了。

# Data
data <- data.frame(Cod = c(0.1966126, 0.1989563, 0.2567677, 0.3158896, 0.4225435, 0.7219856,
                           1.0570824, 0.7266830, 0.6286763, 0.6389475),
                   Herring = c(1.988372, 2.788014, 3.397138, 2.557245, 2.627013, 3.045617, 
                               3.161002, 3.531306, 3.432021, 3.617174),
                   Sprat = c(2.030273, 3.480469, 3.009277, 1.895996, 2.457520, 1.991211, 2.350098,
                             2.118164, 1.693359, 1.869141),
                   Flounder = c(0.4758220, 0.4425532, 0.4185687, 0.4967118, 0.7102515, 0.5733075,
                                0.7404255, 0.5996132, 0.6235977, 0.7187621))
# Model formulation
LLV <- function(time, state, parameters) {
  with(as.list(c(state, parameters)), {
    db1.dt = b1*(r1+a11*b1+a12*b2+a13*b3+a14*b4)
    db2.dt = b2*(r2+a22*b2+a21*b1+a23*b3+a24*b4)
    db3.dt = b3*(r3+a33*b3+a31*b1+a32*b2+a34*b4)
    db4.dt = b4*(r4+a44*b4+a41*b1+a42*b2+a43*b3)
    list(c(db1.dt, db2.dt, db3.dt, db4.dt))
  })
}
# Model input and simulation
# Model input
params <- c(r1 = -0.342085, r2 = 0.6855681, r3 = 2.757769, r4 = 0.9744113,
            a11 = -1.05973762, a12 = 0.09577309, a13 = -0.01915480, a14 = 1.36098939,
            a21 = 0.17533326, a22 = -0.32247342, a23 = 0.03111628, a24 = 0.30212711,
            a31 = 0.5303516, a32 = -0.4869761, a33 = -0.3194882, a34 = -1.5089027,
            a41 = 0.004418133, a42 = 0.163716414, a43 = -0.237873378, a44 = -1.519158802)
ini <- c(b1 = data[1,1], b2 = data[1,2], b3 = data[1,3], b4 = data[1,4])
tmax <- 10
t <- seq(1,tmax,0.1)
# Results and first parameter guess is more or less okay
results <- deSolve::ode(y = ini, times = t, func = LLV, parms = params)
matplot(data, pch = 1)
matplot(x = results[,1], y = results[,-1], type = "l", add = TRUE)

在这里,我继续编写一个函数,最小化残差平方和,当包含在带有上述初始参数的

optim()
中时,猜测应该会产生我正在寻找的结果。

min.RSS <- function(data, params) {
  output <- deSolve::ode(y = ini, times = t, func = LLV, parms = params)
  predictions <- exp(output[,-1])
  observations <- data
  return(sum((predictions-observations)^2))
}
result <- optim(par = params, fn = min.RSS, data = data)
fit <- deSolve::ode(y = ini, times = t, func = LLV, parms = result$par)
matplot(x = fit[,1], y = fit[,-1], type = "l", lwd = 3, add = TRUE)

任何关于如何解决这个问题的想法将非常感激。

r optimization ode model-fitting
3个回答
3
投票

你有一个更好的适合,但你应该非常小心这个问题。我有点疯狂,并使用(开发中)fitode 包 来解决这个问题。我拟合了模型并获得了更好的拟合,还尝试在最佳拟合周围使用 100 个随机变化的起点进行拟合。 您的残差平方和为 1.19;

fitode
第一次尝试就达到了 0.29,100 次拟合中最好的结果是 RSS=0.16。 但是:这些拟合是高度不稳定的。该图显示了数据的拟合度和未来 5 个时间步长的预测 (1) 您的拟合度(虚线); (2)fitode初始拟合(虚线); (3) 其他 100 个 fiode 拟合(最佳拟合 0.05 RSS 内的拟合是实心的,比它差的则绘制得很轻)。

你可以看到样本外的预测大多是疯狂的。您的拟合实际上比一些更好的拟合更更稳定 - 它在整个社区崩溃之前到达时间步骤 13 - 但底线是在这种情况下与数据的良好拟合绝不能保证明智的答案。看起来 100 个拟合中的一个达到了预测时间序列的末尾而没有崩溃(这似乎是基于观察到的时间序列的合理合理的“常识”预测)。

为了可靠地拟合这些数据,您要么需要一个参数少得多的模型,要么需要以先验形式提供的外部信息,或者

正则化——某种方式进行惩罚拟合,暗示“摇摆不定”的确定性轨迹或交互参数/增长率不合理。

## remotes::install_github("parksw3/fitode") library(fitode) ## data with tags for fitode data2 <- setNames(data,paste0(names(data),"_obs")) data2 <- data.frame(times=seq(nrow(data2)),data2) ## Model formulation (for fitode) LV_model <- odemodel( name="4-species LV", model=list( Cod ~ Cod*(r1+a11*Cod+a12*Herring+a13*Sprat+a14*Flounder), Herring ~ Herring*(r2+a22*Herring+a21*Cod+a23*Sprat+a24*Flounder), Sprat ~ Sprat*(r3+a33*Sprat+a31*Cod+a32*Herring+a34*Flounder), Flounder ~ Flounder*(r4+a44*Flounder+a41*Cod+a42*Herring+a43*Sprat) ), observation=list( Cod_obs ~ ols(mean=Cod), Herring_obs ~ ols(mean=Herring), Sprat_obs ~ ols(mean=Sprat), Flounder_obs ~ ols(mean=Flounder) ), initial=list( Cod ~ data2$Cod_obs[1], Herring ~ data2$Herring_obs[1], Sprat ~ data2$Sprat_obs[1], Flounder ~ data2$Flounder_obs[1] ), link=setNames(rep("identity",length(pars)),pars), par= pars ) ## plot results plotres <- function(p,ODEint="rk",lty=1, dt=0.1, tvec=seq(1,10,by=dt),...) { par(las=1, bty="l") res <- deSolve::ode(ini, tvec, LLV, p, method=ODEint) matplot(res[,1],res[,-1],type="l",lty=lty,...) return(invisible(res[,-1])) } f1 <- fitode( LV_model, data=data2, start=params, control=list(maxit=1e5,trace=1000) ) ## fitode with multistart ranfit <- function(n,fit,range=0.5) { ## rpars <- params*runif(length(params),1-range,1+range) newfit <- try(update(fit, start=rpars)) return(newfit) } cl <- makeCluster(10) clusterSetRNGStream(cl = cl, 101) clusterExport(cl, c("params","LV_model","data2")) clusterEvalQ(cl,invisible(library(fitode))) system.time( multifit <- parLapply(cl, 1:100, ranfit, fit=f1, tvec=tvec) ) saveRDS(multifit,file="SO65440448_multifit.rds") ivec <- seq_along(multifit) ivec <- ivec[sapply(multifit,function(x) !inherits(x,"try-error"))] coef <- pred <- vector("list", length=length(ivec)) ll <- conv <- rep(NA,length(ivec)) for (i in seq_along(ivec)) { nf <- multifit[[ivec[i]]] coef[[i]] <- coef(nf) pp <- predict(nf, times=1:10) pred[[i]] <- cbind(times=pp[[1]][,1], do.call(cbind,lapply(pp,"[",-1))) ll[i] <- logLik(nf) conv[i] <- nf@mle2@details$convergence } par(las=1,bty="l") matplot(pred[[1]][,1],pred[[1]][,-1], type="n",lty=1,ylim=c(0,6), xlab="time",ylab="density") lthresh <- 0.05 for (i in 1:length(pred)) { good <- ll[i]>(max(ll)-lthresh) alpha <- if (good) 0.8 else 0.1 lwd <- if (good) 2 else 1 matlines(pred[[i]][,1],pred[[i]][,-1],lty=1, col=adjustcolor(palette()[1:4],alpha.f=alpha), lwd=lwd) } matpoints(data2[,1],data2[,-1],pch=16,cex=3) plotres(optimres$par,add=TRUE, lwd=3,lty=2,dt=1) plotres(coef(f1),add=TRUE, lwd=3,lty=3,dt=1)
    

1
投票
对于那些感兴趣的人,我已经设法获得了一个涉及更改颂歌集成方法的解决方案。这是工作优化器:

# Optimising parameter fit LVmse = function(parms) { out = as.matrix(deSolve::ode(ini, 1:10, LLV, parms, method="rk")[,-1]) RSS = sum((spp-out)^2, na.rm = TRUE) # Minimising residual sum of squares return(RSS) } optimres <- optim(par = params, fn = LVmse)
    

0
投票
距离我们上次通信已经过去很长时间了。我想知道,是否有可能为“fitode”制定一个模型,接受时间相关变量(例如开发率)?我的目标是阐明(正如您对上面的 4 个物种示例所做的那样)“fitode”返回的参数集(如果有),使得简单的两个物种修改后的 Lotka-Volterra 可以在未来共存模型。本质上我想重现你对一个简单模型(2 vs 4 物种)的答案,但按照以下示例添加收获率:

# Assessment data from the Baltic Sea ObsSSB <- data.frame(Herring = c(1932041, 1864349, 1672273, 1944689, 1905001, 1814679, 1626604, 1438139, 1520902, 1421057, 1266502, 1177136, 1090921, 1011718, 1013695, 856321, 714642, 647277, 675487, 649195, 651524, 539582, 483859, 452706, 417827, 363049, 354724, 339899, 332746, 367432, 377935, 424888, 461102, 478633, 477803, 536040, 565200, 557749, 598115, 629339, 695343, 643874, 579087, 597533, 581015, 460378, 364981), Sprat = c(940000, 726000, 625000, 1044000, 695000, 377000, 227000, 199000, 254000, 394000, 616000, 605000, 570000, 461000, 403000, 423000, 556000, 775000, 1045000, 1360000, 1375000, 1429000, 1811000, 1777000, 1354000, 1353000, 1319000, 1196000, 942000, 829000, 1040000, 1324000, 1055000, 898000, 921000, 827000, 948000, 752000, 694000, 706000, 620000, 680000, 1077000, 1095000, 982000, 855000, 817000)) rownames(ObsSSB) <- 1974:2020 Fdata <- data.frame(Herring = c(0.1603, 0.17, 0.1579, 0.1469,0.1263, 0.1533, 0.1593, 0.1839, 0.1648, 0.2255, 0.2369, 0.2524, 0.2265, 0.2626, 0.2547, 0.3426, 0.3324, 0.3416, 0.299, 0.3367, 0.4076, 0.3872, 0.4094, 0.4564, 0.4848, 0.4127, 0.49, 0.4336, 0.3995, 0.3179, 0.2713, 0.2464, 0.2712, 0.2729, 0.2744, 0.2485, 0.2943, 0.2299, 0.1658, 0.1492, 0.2099, 0.2886, 0.3496, 0.352, 0.4474, 0.4951, 0.46), Sprat = c(0.3696, 0.396, 0.3777, 0.336, 0.3408, 0.2627, 0.3005, 0.1828, 0.3033, 0.1358, 0.1796, 0.1659, 0.2091, 0.2689, 0.2391, 0.2155, 0.1369, 0.1734, 0.2004, 0.1625, 0.2638, 0.3485, 0.2984, 0.416, 0.3981, 0.37, 0.3193, 0.2866, 0.4008, 0.3958, 0.4933, 0.4536, 0.3848, 0.3793, 0.4115, 0.5091, 0.4403, 0.3968, 0.3462, 0.4192, 0.4341, 0.4538, 0.3455, 0.3961, 0.395, 0.3918, 0.368)) rownames(Fdata) <- 1974:2020 ### Fitting a modified 2 species Lotka-Volterra model to observed SSB data ### library(deSolve) spp <- ObsSSB/10^6 # First guess of parameters nospp <- ncol(spp) data_lagged <- rbind(NA, spp) # Lagged data db.bdt <- list() for (i in 1:ncol(spp)) { db.bdt[[i]] <- log(append(spp[,i], NA)/data_lagged[,i]) } modlst <- NULL # Linear models for(i in 1:nospp) { moddat <- data.frame(db.bdt = db.bdt[[i]], data_lagged)[-nrow(data_lagged),] modlst[[i]] <- eval(parse(text = paste("lm(db.bdt~", paste(colnames(data_lagged), collapse = "+"), ", data=moddat)", sep = ""))) } r_start <- unname(sapply(modlst, function(dat){coef(dat)["(Intercept)"]})) A_start <- unname(sapply(modlst, function(dat){coef(dat)[-1]})) # Model input params <- c(r1 = r_start[1], r2 = r_start[2], a11 = A_start[1,1], a12 = A_start[2,1], a21 = A_start[1,2], a22 = A_start[2,2]) ini <- c(b1 = spp[1,1], b2 = spp[1,2]) tmax <- nrow(spp) t <- seq(1,tmax,1) # Model formulation inv <- 1e-5 # Numerical fudge to avoid biomasses becoming negative. HS2LLV <- function(time, state, parameters) { with(as.list(c(state, parameters)), { H1 <- H1(time) H2 <- H2(time) db1.dt = b1*(r1+a11*b1+a12*(b2^2))-H1*b1+inv db2.dt = b2*(r2+a22*b2+a21*(b1^2))-H2*b2+inv list(c(db1.dt, db2.dt)) }) } # Fdata matplot(as.numeric(rownames(Fdata)),Fdata, type = "l", xlab = "Time (yr)", ylab = "Fishing Mortality (1/yr)", lty = 1) # Fit two splines to Fdata to smooth out bumps for fit library(splines) Sp.fit <- array(dim = c(nrow(Fdata),ncol(Fdata))) for (i in 1:ncol(Fdata)) { x <- round(seq(1974,2020,length.out = 4), digits = 0) y <- Fdata[as.character(x),i] ## Fit a couple of quadratic splines with different degrees of freedom fit <- lm(y ~ bs(x, degree = 2, df=length(x)-1)) x0 <- seq(min(x), max(x), by = 1) Sp.fit[,i] <- predict(fit, data.frame(x = x0)) } matlines(1974:2020,Sp.fit, lty = 2, lwd = 2) H1 <- approxfun(Sp.fit[,1]) H2 <- approxfun(Sp.fit[,2]) # Optimiser LVmse = function(parms) { out = as.matrix(deSolve::ode(ini, 1:tmax, HS2LLV, parms, method="rk4")[,-1]) RSS = sum((spp-out)^2, na.rm = TRUE) return(RSS) } # Running model and optimising parameters optimres1 <- optim(par = params, fn = LVmse) fit1 <- deSolve::ode(y = ini, time = 1:47, func = HS2LLV, parms = optimres1$par) # Plot extrafont::loadfonts(device = "all") matplot(1974:2020, ObsSSB/10^6, type = "n", xlab = "Time (years)", ylab = "SSB (1000 x tons)", ylim = c(0,2.025)) grid() matplot(1974:2020, ObsSSB/10^6, pch = 16, col = c("gray", "steelblue"), add = TRUE) matplot(1974:2020, fit1[,2:3], type = "l", add = TRUE, col = c("gray", "steelblue"), lwd = 2) legend("topright", legend = c("Herring","Sprat"), lty = c(1,2), col = c("gray", "steelblue"), lwd = 2, bty = "n")
    
© www.soinside.com 2019 - 2024. All rights reserved.