使用带有 GridSearch 和 TimeSeriesCrossValidation 的 FaceBook Prophet 进行销售预测

问题描述 投票:0回答:0

我正在尝试使用 FaceBook Prophet 和 GridSearch 开发销售预测模型来调整超参数和交叉验证,以避免在调整模型时过度拟合。我使用具有相同代码的 XGBoost 开发了一个类似的模型。然而,在对代码进行必要的更改以适应 Prophet 之后,我遇到了一些我很难调试的错误。

# rename columns to match prophet requirements for time series forecasting model
fb.rename(columns={'DDATE': 'ds', 'SALE': 'y'}, inplace=True)

# set index to ds
fb.set_index('ds', inplace=True)

# modeling XGBoost model with time series cross validation and hyperparameter tuning using GridSearchCV
def fb_model(X, y, cv, params):
    """
    Facebook Prophet model with time series cross validation and hyperparameter tuning using GridSearchCV
    """
    prophet_model = Prophet()
    gs = GridSearchCV(prophet_model, params, cv=cv, scoring='neg_mean_squared_error', n_jobs=-1)
    
    # Print the performance metrics for each fold and the average of the folds
    rmse_scores = []
    mse_scores = []
    mae_scores = []
    mape_scores = []
    
    for fold, (train_idx, val_idx) in enumerate(cv.split(X)):
        # Split the data into train and validation sets
        X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
        X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
        
        # Fit the model using GridSearchCV
        gs.fit(X_train, y_train)

        # Print the best parameters and the best score
        print(f"Fold {fold+1} - Best score: {gs.best_score_:.2f}")
        print(f"Fold {fold+1} - Best params: {gs.best_params_}")
        
        # Make predictions for the current fold
        predictions = gs.predict(X_val)

        # Calculate performance metrics (RMSE, MSE, MAE, MAPE) for the current fold
        rmse = np.sqrt(mean_squared_error(y_val, predictions))
        mse = mean_squared_error(y_val, predictions)
        mae = mean_absolute_error(y_val, predictions)
        mape = np.mean(np.abs((predictions - y_val) / y_val)) * 100

        # Append the performance metrics for the current fold to the lists
        rmse_scores.append(rmse)
        mse_scores.append(mse)
        mae_scores.append(mae)
        mape_scores.append(mape)
        print(f"Fold {fold+1} - RMSE: {rmse:.2f}, MSE: {mse:.2f}, MAE: {mae:.2f}, MAPE: {mape:.2f}%\n")
    
    # Print the average performance metrics across all folds
    print(f"Average performance across {cv.n_splits} folds:")
    print(f"Average RMSE: {np.mean(rmse_scores):.2f}, Average MSE: {np.mean(mse_scores):.2f}, Average MAE: {np.mean(mae_scores):.2f}, Average MAPE: {np.mean(mape_scores):.2f}%\n")
    
    return gs.best_estimator_

# define parameters
params = {
    'growth': ['linear', 'logistic'],
    'changepoint_prior_scale': [0.01, 0.1, 1.0],
    'seasonality_mode': ['additive', 'multiplicative'],
    'yearly_seasonality': [True, False],
    'weekly_seasonality': [True, False],
    'daily_seasonality': [True, False],
    'seasonality_prior_scale': [0.01, 0.1, 1.0],
    'holidays_prior_scale': [0.01, 0.1, 1.0],
    'changepoint_range': [0.8, 0.9, 1.0]
}

# define time series cross validation
cv = TimeSeriesSplit(n_splits=5)

# split data into train and test
train = fb.loc[fb.index <= '2021-06-01']
test = fb.loc[fb.index > '2021-06-01']

# define X and y
X_train = train.drop('y', axis=1)
y_train = train['y']
X_test = test.drop('y', axis=1)
y_test = test['y']

# fit model
model = fb_model(X_train, y_train, cv, params)

# make predictions on test set
predictions = model.predict(X_test)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[65], line 77
     74 y_test = test['y']
     76 # fit model
---> 77 model = fb_model(X_train, y_train, cv, params)
     79 # make predictions on test set
     80 predictions = model.predict(X_test)

Cell In[65], line 21, in fb_model(X, y, cv, params)
     18 X_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
     20 # Fit the model using GridSearchCV
---> 21 gs.fit(X_train, y_train)
     23 # Print the best parameters and the best score
     24 print(f"Fold {fold+1} - Best score: {gs.best_score_:.2f}")

File /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_search.py:788, in BaseSearchCV.fit(self, X, y, groups, **fit_params)
    785 cv_orig = check_cv(self.cv, y, classifier=is_classifier(estimator))
    786 n_splits = cv_orig.get_n_splits(X, y, groups)
--> 788 base_estimator = clone(self.estimator)
    790 parallel = Parallel(n_jobs=self.n_jobs, pre_dispatch=self.pre_dispatch)
    792 fit_and_score_kwargs = dict(
    793     scorer=scorers,
    794     fit_params=fit_params,
   (...)
...
     84             )
     86 klass = estimator.__class__
     87 new_object_params = estimator.get_params(deep=False)

TypeError: Cannot clone object '<prophet.forecaster.Prophet object at 0x7fadf4db2c70>' (type <class 'prophet.forecaster.Prophet'>): it does not seem to be a scikit-learn estimator as it does not implement a 'get_params' method.

python machine-learning xgboost facebook-prophet
© www.soinside.com 2019 - 2024. All rights reserved.