我想拟合一个指数衰减(或渐近曲线)递增形式的函数,这样:
Richness = C*(1-exp(k*Abundance)) # k < 0
我已经在此页面上阅读了有关
expn()
函数的信息,但就是找不到它(或 nls
包)。我找到的只是一个 nlstools 包,但它没有expn()
。我尝试使用通常的 nls
和 exp
函数,但我只得到递增的指数......
我想拟合如下图(在Paint中绘制),但我不知道曲线应该稳定在哪里(丰富度= C)。预先感谢。
这应该可以帮助您开始。阅读有关
nls(...)
的文档(在命令提示符下键入 ?nls
)。另请查找 ?summary
和 ?predict
。
set.seed(1) # so the example is reproduceable
df <- data.frame(Abundance=sort(sample(1:70,30)))
df$Richness <- with(df, 20*(1-exp(-0.03*Abundance))+rnorm(30))
fit <- nls(Richness ~ C*(1-exp(k*Abundance)),data=df,
algorithm="port",
start=c(C=10,k=-1),lower=c(C=0,k=-Inf), upper=c(C=Inf,k=0))
summary(fit)
# Formula: Richness ~ C * (1 - exp(k * Abundance))
#
# Parameters:
# Estimate Std. Error t value Pr(>|t|)
# C 20.004173 0.726344 27.54 < 2e-16 ***
# k -0.030183 0.002334 -12.93 2.5e-13 ***
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Residual standard error: 0.7942 on 28 degrees of freedom
#
# Algorithm "port", convergence message: relative convergence (4)
df$pred <- predict(fit)
plot(df$Abundance,df$Richness)
lines(df$Abundance,df$pred, col="blue",lty=2)
谢谢,jlHoward。阅读 shujaa 发送的链接后,我得到了类似的东西。
R <- function(a, b, abT) a*(1 - exp(-b*abT))
form <- Richness ~ R(a,b,Abundance)
fit <- nls(form, data=d, start=list(a=20,b=0.01))
plot(d$Abundance,d$Richness, xlab="Abundance", ylab="Richness")
lines(d$Abundance, predict(fit,list(x=d$Abundance)))
不过,我通过反复试验找到了初始值。所以你的解决方案看起来更好:)
编辑:结果:
指数衰减函数的递增形式如下所示:
y = a + b*(1-e^(-x * k))
您可以手动更改系数(a、b、k)以了解它们如何影响结果曲线:
library(tidyverse)
# increase exponent factor (k) -> increase initial growth rate
data.frame(x= seq(0,250,.5)) %>%
mutate(y0 = 0 + 60*(1-exp(-x*.00)),
y1 = 0 + 60*(1-exp(-x*.01)),
y2 = 0 + 60*(1-exp(-x*.02)),
y3 = 0 + 60*(1-exp(-x*.03))) %>%
ggplot() +
geom_abline() +
geom_point(aes(y=y0, x=x, color='k=.00')) +
geom_point(aes(y=y1, x=x, color='k=.01')) +
geom_point(aes(y=y2, x=x, color='k=.02')) +
geom_point(aes(y=y3, x=x, color='k=.03')) +
scale_color_manual(values = c("black","blue","red","yellow"))
# increase multiplication factor (b) -> set maximum y value
data.frame(x= seq(0,250,.5)) %>%
mutate(y0 = 0 + 20*(1-exp(-x*.03)),
y1 = 0 + 40*(1-exp(-x*.03)),
y2 = 0 + 60*(1-exp(-x*.03)),
y3 = 0 + 80*(1-exp(-x*.03))) %>%
ggplot() +
geom_abline() +
geom_point(aes(y=y0, x=x, color='b=20')) +
geom_point(aes(y=y1, x=x, color='b=40')) +
geom_point(aes(y=y2, x=x, color='b=60')) +
geom_point(aes(y=y3, x=x, color='b=80')) +
scale_color_manual(values = c("black","blue","red","yellow"))
# increase additive factor (a) -> shift the curve up/down
data.frame(x= seq(0,250,.5)) %>%
mutate(y0 = -10 + 60*(1-exp(-x*.03)),
y1 = 0 + 60*(1-exp(-x*.03)),
y2 = 10 + 60*(1-exp(-x*.03)),
y3 = 20 + 60*(1-exp(-x*.03))) %>%
ggplot() +
geom_abline() +
geom_point(aes(y=y0, x=x, color='a=-10')) +
geom_point(aes(y=y1, x=x, color='a=0')) +
geom_point(aes(y=y2, x=x, color='a=10')) +
geom_point(aes(y=y3, x=x, color='a=20')) +
scale_color_manual(values = c("black","blue","red","yellow"))
首先,创建一个遵循某种任意指数分布的数据集,其中存在一些误差:
set.seed(1);
df = data.frame(x = seq(0,250,.5) * runif(n=501)) %>%
mutate(y = (-2.5 + 75*(1-exp(-x*.05))) + rnorm(n=501,mean=0,sd=3))
我喜欢先进行一些数据可视化,以了解哪些功能可能运行良好。 您可以使用 ggplot
geom_smooth()
将各种线性和平滑曲线拟合到数据,如下所示:
ggplot(df, aes(x=x,y=y)) +
geom_abline() +
geom_point() +
geom_smooth(se=F, aes(color="loess")) +
geom_smooth(se=F, method="lm", aes(color="linear")) +
geom_smooth(se=F, method="lm", formula = y ~ poly(x,2), aes(color="2nd order polynomial")) +
geom_smooth(se=F, method="lm", formula = y ~ poly(x,3), aes(color="3rd order polynomial")) +
geom_smooth(se=F, method = "nls", formula = y ~ 1+MaxVal*(1-exp(-x*k)),
method.args = list(start=c(MaxVal=80, k=.25)),
aes(color="exponential no additive")) +
geom_smooth(se=F, method = "nls", formula = y ~ v+MaxVal*(1-exp(-x*k)),
method.args = list(start=c(MaxVal=80, k=.25, v=1)),
aes(color="exponential")) +
geom_smooth(se=F, formula = y ~ log(x+1), aes(color="logarithmic")) +
scale_color_manual(values = c("orange", "blue", "green", "red", "yellow", "pink", "purple"))
我适合:
method = 'loess' and formula = 'y ~ x'
),formula = 'y ~ x'
),log(0) = -Inf
错误。正如您所看到的,线性曲线和多项式曲线拟合得相当差,而黄土曲线、指数曲线和对数曲线乍一看似乎相当不错。
黄土曲线的潜在缺点是:(1)它是非参数的(没有方程),(2)它可能过度拟合(其中有更多的“摆动”),(3)它不是严格递增的。
对数曲线的潜在缺点:(1) 它并不是严格增加 - 当 x 值较高时,它开始下降。
您可以通过在高 x 值处添加另一个点(不更改源数据集)来更清楚地看到这一点:
df %>%
bind_rows(data.frame(x=300, y=75)) %>%
ggplot(., aes(x=x,y=y)) +
geom_abline() +
geom_point() +
geom_smooth(se=F, aes(color="loess")) +
geom_smooth(se=F, method = "nls", formula = y ~ 1+MaxVal*(1-exp(-x*k)),
method.args = list(start=c(MaxVal=80, k=.25)),
aes(color="exponential no additive")) +
geom_smooth(se=F, method = "nls", formula = y ~ v+MaxVal*(1-exp(-x*k)),
method.args = list(start=c(MaxVal=80, k=.25, v=1)),
aes(color="exponential")) +
geom_smooth(se=F, formula = y ~ log(x+1), aes(color="logarithmic")) +
scale_color_manual(values = c("green", "red", "pink", "purple"))
可视化很棒,向我们表明我们可能需要两个指数函数之一,但现在我们需要得到这些方程。
通常的方法在这里不起作用:
# plugging the formula into lm() won't work because
# you have unknown, undefined coefficients (b, k):
lm(formula = as.formula("y ~ 1 + b*(1-exp(-(x*k)))"), method="lm", data = df)
# Error in eval(predvars, data, env) : object 'b' not found
# even if you arbitrarily plug in some values for those coefficients, it doesn't work:
lm(formula = as.formula("y ~ 1 + 60*(1-exp(-(x*.05)))"), method="lm", data = df)
# Error in terms.formula(formula, data = data) :
# invalid model formula in ExtractVars
您需要一种求解最佳系数的方法。 您可以使用
nls()
函数,但是,如果您不为每个参数选择一些合理的起始值,它可能无法工作(它会自动将每个参数初始化为 1,这对于这种使用来说非常糟糕)案例):
fit = nls(formula = as.formula("y ~ 1 + b*(1-exp(-(x*k)))"), data = df)
# Warning in nls(formula = as.formula("y ~ 1 + b*(1-exp(-(x*k)))"), data = df) :
# No starting values specified for some parameters.
# Initializing ‘b’, ‘k’ to '1.'.
# Consider specifying 'start' or using a selfStart model
# Error in numericDeriv(form[[3L]], names(ind), env, central = nDcentral) :
# Missing value or an infinity produced when evaluating the model
合理接近的起始值就可以了:
# without additive term:
fit = nls(formula = as.formula("y ~ 1 + b*(1-exp(-(x*k)))"),
start = c(b=90, k=.05), data = df)
summary(fit)
Formula: y ~ 1 + b * (1 - exp(-(x * k)))
Parameters:
Estimate Std. Error t value Pr(>|t|)
b 71.881315 0.237978 302.1 <2e-16 ***
k 0.046566 0.000532 87.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.11 on 499 degrees of freedom
Number of iterations to convergence: 4
Achieved convergence tolerance: 0.00000256
因为该模型使用固定的附加值 (a = 1),所以 b 和 k 系数与我们的原始值不太接近(b = 71.9 与 75,k=.046 与 0.05)。
为加法系数添加一个附加参数让我们更接近:
# with additive term:
fit = nls(formula = as.formula("y ~ a + b*(1-exp(-(x*k)))"),
start = c(a=-15, b=60, k=.05), data = df)
summary(fit)
Formula: y ~ a + b * (1 - exp(-(x * k)))
Parameters:
Estimate Std. Error t value Pr(>|t|)
a -2.830697 0.517438 -5.47 0.000000071 ***
b 75.317600 0.512742 146.89 < 2e-16 ***
k 0.050070 0.000709 70.61 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.95 on 498 degrees of freedom
Number of iterations to convergence: 3
Achieved convergence tolerance: 0.000000179
如您所见,具有加性项的模型发现系数非常接近我们的原始值(a = -2.8 与 -2.5、b = 75.32 与 75、k=.05 与 0.05),剩余标准误差( RSE)基本上相当于我添加到任意数据集中的
sd=3
噪声。