我正在使用 GridSearchCV 使用以下代码在不同模型上运行 GridSearchCV:
def algorithm_pipeline(X_train_data, X_test_data, y_train_data, y_test_data,
model, param_grid, cv=10, scoring_fit='accuracy',
scoring_test=roc_auc_score, do_probabilities = True):
gs = GridSearchCV(
estimator=model,
param_grid=param_grid,
cv=cv,
n_jobs=-1,
scoring=scoring_fit,
verbose=2
)
fitted_model = gs.fit(X_train_data, y_train_data)
best_model = fitted_model.best_estimator_
if do_probabilities:
pred = fitted_model.predict_proba(X_test_data)
else:
pred = fitted_model.predict(X_test_data)
score = scoring_test(y_test_data, pred)
return [best_model, score]
models_to_train = [LogisticRegression(),XGBClassifier(), RandomForestClassifier()]
grid_parameters = [
{ # Logistic Regression
'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
'penalty':['none', 'elasticnet', 'l1', 'l2'],
'C':[0.001, 0.01, 0.1, 1, 10, 100]
},
{ # XGBoost
'n_estimators': [400, 700, 1000],
'colsample_bytree': [0.7, 0.8],
'max_depth': [15,20],
'reg_alpha': [1.1, 1.3],
'reg_lambda': [1.1, 1.3],
'subsample': [0.7, 0.9]
},
{ # Random Forest Classifier
'n_estimators': [200, 500],
'max_features': ['auto', 'sqrt', 'log2'],
'max_depth' : [4,6,8],
'criterion' :['gini', 'entropy']
}
]
models_preds_scores = []
with tqdm(total=len(models_to_train)) as pbar:
for i, model in enumerate(models_to_train):
pbar.update(1)
params = grid_parameters[i]
result = algorithm_pipeline(X_train, X_test, y_train, y_test,
model, params, cv=5)
models_preds_scores.append(result)
但是,每次运行完第一个模型时,都会返回以下错误:
joblib.externals.loky.process_executor._RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/externals/loky/backend/queues.py", line 159, in _feed
obj_ = dumps(obj, reducers=reducers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/externals/loky/backend/reduction.py", line 215, in dumps
dump(obj, buf, reducers=reducers, protocol=protocol)
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/externals/loky/backend/reduction.py", line 208, in dump
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 632, in dump
return Pickler.dump(self, obj)
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/_memmapping_reducer.py", line 446, in __call__
for dumped_filename in dump(a, filename):
^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/numpy_pickle.py", line 553, in dump
NumpyPickler(f, protocol=protocol).dump(value)
File "/usr/local/sas/grid/python3-prod/lib/python3.11/pickle.py", line 487, in dump
self.save(obj)
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/numpy_pickle.py", line 352, in save
wrapper.write_array(obj, self)
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/numpy_pickle.py", line 134, in write_array
pickler.file_handle.write(chunk.tobytes('C'))
OSError: [Errno 28] No space left on device
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/scratch/MAIN_DIR/prediction_tuning/crypto_prediction.py", line 78, in <module>
result = algorithm_pipeline(X_train, X_test, y_train, y_test,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/scratch/MAIN_DIR/prediction_tuning/crypto_prediction.py", line 38, in algorithm_pipeline
fitted_model = gs.fit(X_train_data, y_train_data)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/sklearn/base.py", line 1474, in wrapper
return fit_method(estimator, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/sklearn/model_selection/_search.py", line 970, in fit
self._run_search(evaluate_candidates)
File "/home/DIR/.local/lib/python3.11/site-packages/sklearn/model_selection/_search.py", line 1527, in _run_search
evaluate_candidates(ParameterGrid(self.param_grid))
File "/home/DIR/.local/lib/python3.11/site-packages/sklearn/model_selection/_search.py", line 916, in evaluate_candidates
out = parallel(
^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/sklearn/utils/parallel.py", line 67, in __call__
return super().__call__(iterable_with_config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 1952, in __call__
return output if self.return_generator else list(output)
^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 1595, in _get_outputs
yield from self._retrieve()
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 1699, in _retrieve
self._raise_error_fast()
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 1734, in _raise_error_fast
error_job.get_result(self.timeout)
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 736, in get_result
return self._return_or_raise()
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/DIR/.local/lib/python3.11/site-packages/joblib/parallel.py", line 754, in _return_or_raise
raise self._result
_pickle.PicklingError: Could not pickle the task to send it to the workers.
我认为这是内存不足的问题,因此我为作业分配了更多 RAM(48GB),或者为每个核心分配了更多 RAM。但结果是一样的。我不知道问题出在哪里。
根据
OSError: [Errno 28] No space left on device
看来硬盘已经没有空间了。