我目前正在尝试通过以下代码针对原始数据框“数据”的特定数据训练XGBoost模型。问题是,当我将原始数据框用作联接的左侧部分时,如下所示,所有模型都可以正常运行。
join = pd.merge(data, data_shift, left_index=True, right_index=True, how='left', suffixes=('', '_lag'))
但是当我将数据框“ location_product_data”用作合并/联接的左侧部分时,该拟合将引发上述错误消息。
data = data_raw.copy()
data = data.drop(['Number of day', 'Number of working days'], axis=1)
uniqueLocations = data['SC'].unique().tolist()
data['DATE'] = pd.to_datetime(data['DATE'])
data = data.set_index(pd.DatetimeIndex(data['DATE']))
year = '2015'
for location in uniqueLocations:
location_data = data.loc[data['SC'] == sc].copy()
uniqueProducts = location_data['SPK'].unique().tolist()
for product in uniqueProducts:
location_product_data = location_data.loc[location_data['SPK'] == product].copy()
#split complete dataset in training and test data
split_date = year+'-10-01'
data_shift = location_product_data.shift(1).copy()
data_shift = data_shift.drop(['SC', 'SPK'], axis=1).copy()
join = pd.merge(location_product_data, data_shift, left_index=True, right_index=True, how='left', suffixes=('', '_lag'))
join['NS AC_lag'] = join['NS AC'] - join['NS AC_lag']
data_train = join.loc[join.index <= split_date].copy()
data_test = join.loc[join.index > split_date].copy()
X_train, y_train = create_features(data_train, label='NS AC')
X_test, y_test = create_features(data_test, label='NS AC')
reg = xgb.XGBRegressor(n_estimators=1000)
params = {
'base_score': np.mean(y_train),
'eta': 0.1,
'max_depth': 3,
'gamma' :3,
'objective' :'reg:squarederror',
'eval_metric' :'mae'
}
reg.fit(X_train, y_train,
eval_set=[(X_train, y_train), (X_test, y_test)],
early_stopping_rounds=50,
verbose=True)
。
def create_features(df, label=None):
"""
Creates time series features from datetime index
"""
df['DATE'] = df.index
df['quarter'] = df['DATE'].dt.quarter
df['month'] = df['DATE'].dt.month
df['year'] = df['DATE'].dt.year
df['lag'] = df['NS AC_lag']
#df['lag2'] = df['NS AC_lag2']
#df['lag3'] = df['NS AC_lag3']
#df['min'] = df['min']
#df['mean'] = df['mean']
#df['max'] = df['max']
#df['t+1'] = df['t+1']
#df['spk'] = df['SPK']
#X = df[['month','quarter','year', 'lag', 'lag2','lag3','min', 'mean', 'max', 't+1']]
X = df[['month','quarter','year', 'lag']]
#X = df[['month','quarter','year', 'Number of day', 'Number of working days','lag', 'lag2','lag3','min', 'mean', 'max', 't+1']]
if label:
y = df[label]
return X, y
return X
您知道为什么会引发错误吗?我找不到原因了。谢谢! :)
错误:
--------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
<ipython-input-30-9232809a4751> in <module>
83 eval_set=[(X_train, y_train), (X_test, y_test)],
84 early_stopping_rounds=50,
---> 85 verbose=False)
86
87 #print('Training finished.')
/opt/conda/lib/python3.7/site-packages/xgboost/sklearn.py in fit(self, X, y, sample_weight, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, callbacks)
394 evals_result=evals_result, obj=obj, feval=feval,
395 verbose_eval=verbose, xgb_model=xgb_model,
--> 396 callbacks=callbacks)
397
398 if evals_result:
/opt/conda/lib/python3.7/site-packages/xgboost/training.py in train(params, dtrain, num_boost_round, evals, obj, feval, maximize, early_stopping_rounds, evals_result, verbose_eval, xgb_model, callbacks, learning_rates)
214 evals=evals,
215 obj=obj, feval=feval,
--> 216 xgb_model=xgb_model, callbacks=callbacks)
217
218
/opt/conda/lib/python3.7/site-packages/xgboost/training.py in _train_internal(params, dtrain, num_boost_round, evals, obj, feval, xgb_model, callbacks)
82 # check evaluation result.
83 if evals:
---> 84 bst_eval_set = bst.eval_set(evals, i, feval)
85 if isinstance(bst_eval_set, STRING_TYPES):
86 msg = bst_eval_set
/opt/conda/lib/python3.7/site-packages/xgboost/core.py in eval_set(self, evals, iteration, feval)
1170 dmats, evnames,
1171 c_bst_ulong(len(evals)),
-> 1172 ctypes.byref(msg)))
1173 res = msg.value.decode()
1174 if feval is not None:
/opt/conda/lib/python3.7/site-packages/xgboost/core.py in _check_call(ret)
174 """
175 if ret != 0:
--> 176 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
177
178
XGBoostError: [15:41:05] /workspace/src/metric/elementwise_metric.cu:325: Check failed: info.labels_.Size() != 0U (0 vs. 0) : label set cannot be empty
Stack trace:
[bt] (0) /opt/conda/xgboost/libxgboost.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x24) [0x7fae2412ecb4]
[bt] (1) /opt/conda/xgboost/libxgboost.so(xgboost::metric::EvalEWiseBase<xgboost::metric::EvalRowRMSE>::Eval(xgboost::HostDeviceVector<float> const&, xgboost::MetaInfo const&, bool)+0xfe) [0x7fae243102ee]
[bt] (2) /opt/conda/xgboost/libxgboost.so(xgboost::LearnerImpl::EvalOneIter(int, std::vector<xgboost::DMatrix*, std::allocator<xgboost::DMatrix*> > const&, std::vector<std::string, std::allocator<std::string> > const&)+0x3c9) [0x7fae241c8d99]
[bt] (3) /opt/conda/xgboost/libxgboost.so(XGBoosterEvalOneIter+0x371) [0x7fae2412b651]
[bt] (4) /opt/conda/lib/python3.7/lib-dynload/../../libffi.so.6(ffi_call_unix64+0x4c) [0x7fae8f98eec0]
[bt] (5) /opt/conda/lib/python3.7/lib-dynload/../../libffi.so.6(ffi_call+0x22d) [0x7fae8f98e87d]
[bt] (6) /opt/conda/lib/python3.7/lib-dynload/_ctypes.cpython-37m-x86_64-linux-gnu.so(_ctypes_callproc+0x2ce) [0x7fae8fba4f7e]
[bt] (7) /opt/conda/lib/python3.7/lib-dynload/_ctypes.cpython-37m-x86_64-linux-gnu.so(+0x139b4) [0x7fae8fba59b4]
[bt] (8) /opt/conda/bin/python(_PyObject_FastCallKeywords+0x49b) [0x557ed8936d2b]
问题已解决。以下几行可能会返回一个空的Dataframe。我已修复此问题,现在可以进行拟合/培训。
data_train = join.loc[join.index <= split_date].copy()
data_test = join.loc[join.index > split_date].copy()