我正在使用 Python statsmodels 包(Python 3.9 和 statsmodels 0.13)中的卡尔曼滤波器对时间序列进行建模。状态空间转移矩阵如下所示:
观察矩阵如下所示(请注意,它取决于未观察到的变量 v 以及两个观察到的变量 p 和 n 的过去值):
我如何将这种状态空间方程定义为 statsmodel MLEModel?我可以通过以下方式捕获该模型的大部分内容:
class AR1(sm.tsa.statespace.MLEModel):
start_params = [-0.5, 0.5, 0.0, 0.05, 0.05] # best guess at initial params
param_names = ['-psi', '1-phi', 'r_t', 'e_t', 'w_t']
def __init__(self, endog):
# Initialize the state space model
super(AR1, self).__init__(endog, k_states=2, k_posdef=1,
initialization='stationary')
# Setup the fixed components of the state space representation
self['design'] = [[1., 1.],
[1., 0]]
self['transition'] = [[1., 0],
[1., 0]]
self['selection', 0, 0] = 1.
# Describe how parameters enter the model
def update(self, params, transformed=True, **kwargs):
params = super(AR1, self).update(params, transformed, **kwargs)
self['design', 0, 1] = params[0] # param.0 is -psi
self['design', 1, 0] = params[1] # param.1 is (1-phi)
self['selection', 0, 0] = params[2] # param.2 is r_t
self['obs_cov', 0, 0] = params[3] # param.3 is e_t
self['obs_cov', 1, 1] = params[4] # param.4 is w_t
# Specify start parameters and parameter names
@property
def start_params(self):
return self.start_params
# Create and fit the model
mod = AR1(DepVars)
# Display results
res = mod.fit()
print(res.summary())
res.plot_diagnostics(figsize=(13,7))
但是,我似乎无法弄清楚如何添加观测方程的第一项,这取决于 psi/phi 以及先前的观测本身。我有什么遗漏的吗?也许通过某种方式将该术语视为截距?
任何想法将不胜感激。谢谢!!
是的,你说得对,这些可以包含在观察截距项中。这是我编写这个模型的方式:
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.statespace import tools
class SSM(sm.tsa.statespace.MLEModel):
# If you use _param_names and _start_params then the model
# will automatically pick them up
_param_names = ['phi', 'psi', 'sigma2_r', 'sigma2_e', 'sigma2_w']
_start_params = [0.5, 0.5, 0.1, 1, 1]
def __init__(self, endog):
# Extract lagged endog
X = sm.tsa.lagmat(endog, maxlag=1, trim='both', original='in')
endog = X[:, :2]
self.lagged_endog = X[:, 2:]
# Initialize the state space model
# Note: because your state process is nonstationary,
# you can't use stationary initialization
super().__init__(endog, k_states=2, k_posdef=1,
initialization='diffuse')
# Setup the fixed components of the state space representation
self['design'] = [[1., 1.],
[1., 0]]
self['transition'] = [[1., 0],
[1., 0]]
self['selection'] = [[1],
[0]]
# For the parameter estimation part, it is
# helpful to use parameter transformations to
# keep the parameters in the valid domain. Here
# I assumed that you wanted phi and psi to be
# between (-1, 1).
def transform_params(self, params):
params = params.copy()
for i in range(2):
params[i:i + 1] = tools.constrain_stationary_univariate(params[i:i + 1])
params[2:] = params[2:]**2
return params
def untransform_params(self, params):
params = params.copy()
for i in range(2):
params[i:i + 1] = tools.unconstrain_stationary_univariate(params[i:i + 1])
params[2:] = params[2:]**0.5
return params
# Describe how parameters enter the model
def update(self, params, **kwargs):
params = super().update(params, **kwargs)
# Here is where we're putting the lagged endog, multiplied
# by phi and psi into the intercept term
self['obs_intercept'] = self.lagged_endog.T * params[:2, None]
self['design', 0, 1] = -params[0]
self['design', 1, 0] = 1 - params[1]
self['state_cov', 0, 0] = params[2]
self['obs_cov'] = np.diag(params[3:5])
我们可以模拟一些数据,然后运行拟合例程来检查它是否正常工作:
rs = np.random.RandomState(12345)
# Specify some parameters to simulate data and
# check the model
nobs = 5000
params = np.r_[0.2, 0.8, 0.1, 1.5, 2.5]
# Simulate data
v = rs.normal(scale=params[2]**0.5, size=nobs + 1).cumsum()
e = rs.normal(scale=params[3]**0.5, size=nobs + 1)
w = rs.normal(scale=params[4]**0.5, size=nobs + 1)
p = np.zeros(nobs + 1)
n = np.zeros(nobs + 1)
for t in range(1, nobs):
p[t] = params[0] * p[t - 1] + v[t] - params[0] * v[t - 1] + e[t]
n[t] = params[1] * n[t - 1] + (1 - params[1]) * v[t] + w[t]
y = np.c_[p, n]
# Run MLE routine on the fitted data
mod = SSM(y)
res = mod.fit(disp=False)
# Check the estimated parameters
import pandas as pd
print(pd.DataFrame({
'True': params,
'Estimated': res.params
}, index=mod.param_names).round(2))
向我展示:
True Estimated
phi 0.2 0.17
psi 0.8 0.81
sigma2_r 0.1 0.09
sigma2_e 1.5 1.49
sigma2_w 2.5 2.47
对于建议的答案,当我在数据中将某些值设置为 nan 时,它似乎不再起作用。我很困惑哪一步需要数据的完整性。