我正在尝试使用 FaceBook Prophet 和 GridSearch 开发销售预测模型来调整超参数和交叉验证,以避免在调整模型时过度拟合。我使用具有相同代码的 XGBoost 开发了一个类似的模型。然而,在对代码进行必要的更改以适应 Prophet 之后,我遇到了一些我很难调试的错误。
# rename columns to match prophet requirements for time series forecasting model
fb.rename(columns={'DDATE': 'ds', 'SALE': 'y'}, inplace=True)
# set index to ds
fb.set_index('ds', inplace=True)
# modeling XGBoost model with time series cross validation and hyperparameter tuning using GridSearchCV
def fb_model(X, y, cv, params):
"""
Facebook Prophet model with time series cross validation and hyperparameter tuning using GridSearchCV
"""
prophet_model = Prophet()
gs = GridSearchCV(prophet_model, params, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
# Print the performance metrics for each fold and the average of the folds
rmse_scores = []
mse_scores = []
mae_scores = []
mape_scores = []
for fold, (train_idx, val_idx) in enumerate(cv.split(X)):
# Split the data into train and validation sets
X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
# Fit the model using GridSearchCV
gs.fit(X_train, y_train)
# Print the best parameters and the best score
print(f"Fold {fold+1} - Best score: {gs.best_score_:.2f}")
print(f"Fold {fold+1} - Best params: {gs.best_params_}")
# Make predictions for the current fold
predictions = gs.predict(X_val)
# Calculate performance metrics (RMSE, MSE, MAE, MAPE) for the current fold
rmse = np.sqrt(mean_squared_error(y_val, predictions))
mse = mean_squared_error(y_val, predictions)
mae = mean_absolute_error(y_val, predictions)
mape = np.mean(np.abs((predictions - y_val) / y_val)) * 100
# Append the performance metrics for the current fold to the lists
rmse_scores.append(rmse)
mse_scores.append(mse)
mae_scores.append(mae)
mape_scores.append(mape)
print(f"Fold {fold+1} - RMSE: {rmse:.2f}, MSE: {mse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%\n")
# Print the average performance metrics across all folds
print(f"Average performance across {cv.n_splits} folds:")
print(f"Average RMSE: {np.mean(rmse_scores):.2f}, Average MSE: {np.mean(mse_scores):.2f}, Average MAE: {np.mean(mae_scores):.2f}, Average MAPE: {np.mean(mape_scores):.2f}%\n")
return gs.best_estimator_
# define parameters
params = {
'growth': ['linear', 'logistic'],
'changepoint_prior_scale': [0.01, 0.1, 1.0],
'seasonality_mode': ['additive', 'multiplicative'],
'yearly_seasonality': [True, False],
'weekly_seasonality': [True, False],
'daily_seasonality': [True, False],
'seasonality_prior_scale': [0.01, 0.1, 1.0],
'holidays_prior_scale': [0.01, 0.1, 1.0],
'changepoint_range': [0.8, 0.9, 1.0]
}
# define time series cross validation
cv = TimeSeriesSplit(n_splits=5)
# split data into train and test
train = fb.loc[fb.index <= '2021-06-01']
test = fb.loc[fb.index > '2021-06-01']
# define X and y
X_train = train.drop('y', axis=1)
y_train = train['y']
X_test = test.drop('y', axis=1)
y_test = test['y']
# fit model
model = fb_model(X_train, y_train, cv, params)
# make predictions on test set
predictions = model.predict(X_test)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[65], line 77
74 y_test = test['y']
76 # fit model
---> 77 model = fb_model(X_train, y_train, cv, params)
79 # make predictions on test set
80 predictions = model.predict(X_test)
Cell In[65], line 21, in fb_model(X, y, cv, params)
18 X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
20 # Fit the model using GridSearchCV
---> 21 gs.fit(X_train, y_train)
23 # Print the best parameters and the best score
24 print(f"Fold {fold+1} - Best score: {gs.best_score_:.2f}")
File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_search.py:788, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
785 cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))
786 n_splits = cv_orig.get_n_splits(X, y, groups)
--> 788 base_estimator = clone(self.estimator)
790 parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)
792 fit_and_score_kwargs = dict(
793 scorer=scorers,
794 fit_params=fit_params,
(...)
...
84 )
86 klass = estimator.__class__
87 new_object_params = estimator.get_params(deep=False)
TypeError: Cannot clone object '<prophet.forecaster.Prophet object at 0x7fadf4db2c70>' (type <class 'prophet.forecaster.Prophet'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.