我正在尝试为 gpboosted 模型创建参数网格。目前,我正在使用网格调整搜索方法,但我一直在阅读 rBayesianOptimization 包中的贝叶斯优化。然而,似乎没有简单的方法将 gpboost 模型与贝叶斯调整连接起来。我能够开始参数搜索,但是该值返回为零。我想我会在这里问是否有人知道如何更好地连接这些流程。
这是代码。
Index <- createDataPartition(y = mean_tl_scale$fb_mean_tl_mm, p = 0.75, list = FALSE)
LM_train_tl_mean <- mean_tl_scale[Index, ]
LM_test_tl_mean <- mean_tl_scale[-Index, ]
# Convert data frames to matrices
LM_matrix_train <- data.matrix(LM_train_tl_mean)
LM_matrix_test <- data.matrix(LM_test_tl_mean)
# Extract feature data
features_train <- LM_matrix_train[, !colnames(LM_matrix_train) %in% c("year")]
features_test <- LM_matrix_test[, !colnames(LM_matrix_test) %in% c("year")]
colnames <- colnames(features_test)[-3]
# Train the gpmodel
gp_model <- GPModel(likelihood = "gaussian", cov_function = "exponential", group_data = LM_matrix_train[, c("year")])
boost_data <- gpb.Dataset(data = features_train[, colnames], label = features_train[, "fb_mean_tl_mm"])
gpb_boost <- gpb.Dataset.construct(boost_data)
# Define parameter bounds for Bayesian optimization
bounds <- list(
learning_rate = c(0.05, 0.15),
max_depth = c(5L, 7L),
min_child_weight = c(5L, 7L),
subsample = c(0.3, 0.5),
colsample_bytree = c(0.5, 0.9),
num_iterations = c(800L, 1000L),
lambda_l2 = c(0, 5)
)
# Define the optimization function
opt_func <- function(learning_rate, max_depth, min_child_weight, subsample, colsample_bytree, num_iterations, lambda_l2) {
params <- list(
objective = "regression",
learning_rate = learning_rate,
max_depth = max_depth,
min_child_weight = min_child_weight,
subsample = subsample,
colsample_bytree = colsample_bytree,
num_iterations = num_iterations,
lambda_l2 = lambda_l2
)
cv_result <- gpb.cv(
params = params,
data = gpb_boost,
gp_model = gp_model,
nrounds = 500,
nfold = 5,
verbose = 0,
eval = "rmse"
)
return(list(Score = -min(cv_result$evaluation_log$test_rmse_mean), Pred = NULL))
}
# Run Bayesian optimization
set.seed(68)
opt_result <- BayesianOptimization(
FUN = opt_func,
bounds = bounds,
init_points = 10,
n_iter = 20,
acq = "ei",
kappa = 2.576,
eps = 0.0,
verbose = TRUE
)
# Print the optimum set of parameters
opt_result
# Test the model with the output from Bayesian optimization
best_params <- list(
objective = "regression",
learning_rate = opt_result$Best_Par["learning_rate"],
max_depth = opt_result$Best_Par["max_depth"],
min_child_weight = opt_result$Best_Par["min_child_weight"],
subsample = opt_result$Best_Par["subsample"],
colsample_bytree = opt_result$Best_Par["colsample_bytree"],
num_iterations = opt_result$Best_Par["num_iterations"],
lambda_l2 = opt_result$Best_Par["lambda_l2"]
)
gp_model <- GPModel(group_data = LM_matrix_train[, c("year")], likelihood = "gaussian", cov_function = "exponential")
gpboost_model <- gpboost(
data = boost_data,
gp_model = gp_model,
params = best_params,
verbose = 1
)
感谢您对 GPBoost 的兴趣。
这里有一个示例,介绍如何使用“mlrMBO”R 包在 R 中进行贝叶斯优化。我希望这有帮助。