Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions R-package/tests/testthat/test_lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
, "[cegb_penalty_feature_lazy: ]"
, "[cegb_penalty_feature_coupled: ]"
, "[path_smooth: 0]"
, "[path_smooth_hessian: 0]"
, "[interaction_constraints: ]"
, sprintf("[verbosity: %i]", .LGB_VERBOSITY)
, "[saved_feature_importance_type: 0]"
Expand Down
14 changes: 14 additions & 0 deletions docs/Parameters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,20 @@ Learning Control Parameters

- note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth

- ``path_smooth_hessian`` :raw-html:`<a id="path_smooth_hessian" title="Permalink to this parameter" href="#path_smooth_hessian">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = double, constraints: ``path_smooth_hessian >= 0.0``

- controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples

- works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights

- has the same dimension as ``min_sum_hessian_in_leaf``

- cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other

- the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node

- note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth

- ``interaction_constraints`` :raw-html:`<a id="interaction_constraints" title="Permalink to this parameter" href="#interaction_constraints">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string

- controls which features can appear in the same branch
Expand Down
16 changes: 16 additions & 0 deletions include/LightGBM/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,15 @@ struct Config {
// descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
double path_smooth = 0;

// check = >= 0.0
// desc = controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples
// desc = works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights
// desc = has the same dimension as ``min_sum_hessian_in_leaf``
// desc = cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other
// descl2 = the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
// descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
double path_smooth_hessian = 0;

// desc = controls which features can appear in the same branch
// desc = by default interaction constraints are disabled, to enable them you can specify
// descl2 = for CLI, lists separated by commas, e.g. ``[0,1,2],[2,3]``
Expand Down Expand Up @@ -1155,6 +1164,13 @@ struct Config {

size_t file_load_progress_interval_bytes = size_t(10) * 1024 * 1024 * 1024;

double effective_path_smooth() const {
return path_smooth_hessian > kEpsilon ? path_smooth_hessian : path_smooth;
}
bool use_hessian_smoothing() const {
return path_smooth_hessian > kEpsilon;
}

bool is_parallel = false;
bool is_data_based_parallel = false;
LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params);
Expand Down
12 changes: 8 additions & 4 deletions src/io/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,10 +439,14 @@ void Config::CheckParamConflict(const std::unordered_map<std::string, std::strin
Log::Fatal("Cannot use regression_l1 objective when fitting linear trees.");
}
}
// min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
// the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
// be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
// calculated split gain can be positive even with zero gradient and hessian.
if (path_smooth > kEpsilon && path_smooth_hessian > kEpsilon) {
Log::Warning("Cannot use both path_smooth and path_smooth_hessian simultaneously. path_smooth will be ignored.");
path_smooth = 0;
}
// min_data_in_leaf must be at least 2 if count-based path smoothing is active. This is because when the split is
// calculated the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int,
// so it can be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path
// smoothing the calculated split gain can be positive even with zero gradient and hessian.
if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
min_data_in_leaf = 2;
Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");
Expand Down
7 changes: 7 additions & 0 deletions src/io/config_auto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
"cegb_penalty_feature_lazy",
"cegb_penalty_feature_coupled",
"path_smooth",
"path_smooth_hessian",
"interaction_constraints",
"verbosity",
"input_model",
Expand Down Expand Up @@ -501,6 +502,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
GetDouble(params, "path_smooth", &path_smooth);
CHECK_GE(path_smooth, 0.0);

GetDouble(params, "path_smooth_hessian", &path_smooth_hessian);
CHECK_GE(path_smooth_hessian, 0.0);

GetString(params, "interaction_constraints", &interaction_constraints);

GetInt(params, "verbosity", &verbosity);
Expand Down Expand Up @@ -740,6 +744,7 @@ std::string Config::SaveMembersToString() const {
str_buf << "[cegb_penalty_feature_lazy: " << Common::Join(cegb_penalty_feature_lazy, ",") << "]\n";
str_buf << "[cegb_penalty_feature_coupled: " << Common::Join(cegb_penalty_feature_coupled, ",") << "]\n";
str_buf << "[path_smooth: " << path_smooth << "]\n";
str_buf << "[path_smooth_hessian: " << path_smooth_hessian << "]\n";
str_buf << "[interaction_constraints: " << interaction_constraints << "]\n";
str_buf << "[verbosity: " << verbosity << "]\n";
str_buf << "[saved_feature_importance_type: " << saved_feature_importance_type << "]\n";
Expand Down Expand Up @@ -867,6 +872,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
{"cegb_penalty_feature_lazy", {}},
{"cegb_penalty_feature_coupled", {}},
{"path_smooth", {}},
{"path_smooth_hessian", {}},
{"interaction_constraints", {}},
{"verbosity", {"verbose"}},
{"input_model", {"model_input", "model_in"}},
Expand Down Expand Up @@ -1013,6 +1019,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
{"cegb_penalty_feature_lazy", "vector<double>"},
{"cegb_penalty_feature_coupled", "vector<double>"},
{"path_smooth", "double"},
{"path_smooth_hessian", "double"},
{"interaction_constraints", "vector<vector<int>>"},
{"verbosity", "int"},
{"input_model", "string"},
Expand Down
10 changes: 6 additions & 4 deletions src/treelearner/cuda/cuda_best_split_finder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ CUDABestSplitFinder::CUDABestSplitFinder(
max_cat_to_onehot_(config->max_cat_to_onehot),
extra_trees_(config->extra_trees),
extra_seed_(config->extra_seed),
use_smoothing_(config->path_smooth > 0),
path_smooth_(config->path_smooth),
use_smoothing_(config->effective_path_smooth() > 0),
path_smooth_(config->effective_path_smooth()),
path_smooth_use_hessian_(config->use_hessian_smoothing()),
num_total_bin_(feature_hist_offsets.empty() ? 0 : static_cast<int>(feature_hist_offsets.back())),
select_features_by_node_(select_features_by_node),
cuda_hist_(cuda_hist) {
Expand Down Expand Up @@ -276,8 +277,9 @@ void CUDABestSplitFinder::ResetConfig(const Config* config, const hist_t* cuda_h
max_cat_to_onehot_ = config->max_cat_to_onehot;
extra_trees_ = config->extra_trees;
extra_seed_ = config->extra_seed;
use_smoothing_ = (config->path_smooth > 0.0f);
path_smooth_ = config->path_smooth;
use_smoothing_ = (config->effective_path_smooth() > 0.0f);
path_smooth_ = config->effective_path_smooth();
path_smooth_use_hessian_ = config->use_hessian_smoothing();
cuda_hist_ = cuda_hist;

const int num_task_blocks = (num_tasks_ + NUM_TASKS_PER_SYNC_BLOCK - 1) / NUM_TASKS_PER_SYNC_BLOCK;
Expand Down
Loading