以下是xgboost的源码。xgb.train
包中的函数 xgboost
.
library(xgboost)
> xgb.train
function (params = list(), data, nrounds, watchlist = list(),
obj = NULL, feval = NULL, verbose = 1, print_every_n = 1L,
early_stopping_rounds = NULL, maximize = NULL, save_period = NULL,
save_name = "xgboost.model", xgb_model = NULL, callbacks = list(),
...)
{
check.deprecation(...)
params <- check.booster.params(params, ...)
check.custom.obj()
check.custom.eval()
dtrain <- data
if (!inherits(dtrain, "xgb.DMatrix"))
stop("second argument dtrain must be xgb.DMatrix")
if (length(watchlist) > 0) {
if (typeof(watchlist) != "list" || !all(vapply(watchlist,
inherits, logical(1), what = "xgb.DMatrix")))
stop("watchlist must be a list of xgb.DMatrix elements")
evnames <- names(watchlist)
if (is.null(evnames) || any(evnames == ""))
stop("each element of the watchlist must have a name tag")
}
params <- c(params, list(silent = ifelse(verbose > 1, 0,
1)))
print_every_n <- max(as.integer(print_every_n), 1L)
if (!has.callbacks(callbacks, "cb.print.evaluation") && verbose) {
callbacks <- add.cb(callbacks, cb.print.evaluation(print_every_n))
}
evaluation_log <- list()
if (!has.callbacks(callbacks, "cb.evaluation.log") && length(watchlist) >
0) {
callbacks <- add.cb(callbacks, cb.evaluation.log())
}
if (!is.null(save_period) && !has.callbacks(callbacks, "cb.save.model")) {
callbacks <- add.cb(callbacks, cb.save.model(save_period,
save_name))
}
stop_condition <- FALSE
if (!is.null(early_stopping_rounds) && !has.callbacks(callbacks,
"cb.early.stop")) {
callbacks <- add.cb(callbacks, cb.early.stop(early_stopping_rounds,
maximize = maximize, verbose = verbose))
}
cb <- categorize.callbacks(callbacks)
if (!is.null(params[["seed"]])) {
warning("xgb.train: `seed` is ignored in R package. Use `set.seed()` instead.")
}
is_update <- NVL(params[["process_type"]], ".") == "update"
handle <- xgb.Booster.handle(params, append(watchlist, dtrain),
xgb_model)
bst <- xgb.handleToBooster(handle)
num_class <- max(as.numeric(NVL(params[["num_class"]], 1)),
1)
num_parallel_tree <- max(as.numeric(NVL(params[["num_parallel_tree"]],
1)), 1)
niter_init <- 0
if (!is.null(xgb_model)) {
niter_init <- as.numeric(xgb.attr(bst, "niter")) + 1
if (length(niter_init) == 0) {
niter_init <- xgb.ntree(bst)%/%(num_parallel_tree *
num_class)
}
}
if (is_update && nrounds > niter_init)
stop("nrounds cannot be larger than ", niter_init, " (nrounds of xgb_model)")
rank <- 0
niter_skip <- ifelse(is_update, 0, niter_init)
begin_iteration <- niter_skip + 1
end_iteration <- niter_skip + nrounds
for (iteration in begin_iteration:end_iteration) {
for (f in cb$pre_iter) f()
xgb.iter.update(bst$handle, dtrain, iteration - 1, obj)
bst_evaluation <- numeric(0)
if (length(watchlist) > 0)
bst_evaluation <- xgb.iter.eval(bst$handle, watchlist,
iteration - 1, feval)
xgb.attr(bst$handle, "niter") <- iteration - 1
for (f in cb$post_iter) f()
if (stop_condition)
break
}
for (f in cb$finalize) f(finalize = TRUE)
bst <- xgb.Booster.complete(bst, saveraw = TRUE)
bst$niter = end_iteration
if (length(evaluation_log) > 0 && nrow(evaluation_log) >
0) {
if (inherits(xgb_model, "xgb.Booster") && !is_update &&
!is.null(xgb_model$evaluation_log) && isTRUE(all.equal(colnames(evaluation_log),
colnames(xgb_model$evaluation_log)))) {
evaluation_log <- rbindlist(list(xgb_model$evaluation_log,
evaluation_log))
}
bst$evaluation_log <- evaluation_log
}
bst$call <- match.call()
bst$params <- params
bst$callbacks <- callbacks
if (!is.null(colnames(dtrain)))
bst$feature_names <- colnames(dtrain)
bst$nfeatures <- ncol(dtrain)
return(bst)
}
特别是,我试图找到源代码中的以下位置。xgboost
正在计算梯度。我可以看到,这个对象 handle
定义为 handle <- xgb.Booster.handle(params, append(watchlist, dtrain),
xgb_model)
. 我发现 xgb.Booster.handle
这里。https:/github.comdmlcxgboostblobmasterR-packageRxgb.Booster.R。. 这个文件调用了几个C++文件,其中一个是 XGBoosterCreate_R
,在这里定义。https:/github.comdmlcxgboostblobmasterR-packagesrcxgboost_R.cc。.
但是,扫描C++代码后,我不清楚梯度步长到底是在哪里计算的。谁能指点一下,梯度在源码中定义在哪里?
有大量的文档,例如,见 https:/github.comdmlcxgboostblobmasterdocc.rst。 我认为很有见地。
撇开这一点不谈,你分享的链接是在实际源码的R封装器中(如果我可以这样称呼的话),源码以
#include <dmlc/logging.h>
#include <dmlc/omp.h>
#include <xgboost/c_api.h>
#include <vector>
#include <string>
#include <utility>
#include <cstring>
#include <cstdio>
#include <sstream>
#include "./xgboost_R.h"
第三行指向目录 xgboost/
在哪里找到 c_api.h
这是实际库的高级程序员接口。这段代码是惊人的好文档。乍一看,实际的迭代步骤可能是: XGBoosterUpdateOneIter
.