Skip to content

Commit cf17946

Browse files
committed
[c++] add path_smooth_hessian parameter for hessian-based path smoothing
1 parent d14c4ba commit cf17946

16 files changed

Lines changed: 286 additions & 117 deletions

R-package/tests/testthat/test_lgb.Booster.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,6 +1052,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
10521052
, "[cegb_penalty_feature_lazy: ]"
10531053
, "[cegb_penalty_feature_coupled: ]"
10541054
, "[path_smooth: 0]"
1055+
, "[path_smooth_hessian: 0]"
10551056
, "[interaction_constraints: ]"
10561057
, sprintf("[verbosity: %i]", .LGB_VERBOSITY)
10571058
, "[saved_feature_importance_type: 0]"

docs/Parameters.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,20 @@ Learning Control Parameters
675675

676676
- note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
677677

678+
- ``path_smooth_hessian`` :raw-html:`<a id="path_smooth_hessian" title="Permalink to this parameter" href="#path_smooth_hessian">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = double, constraints: ``path_smooth_hessian >= 0.0``
679+
680+
- controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples
681+
682+
- works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights
683+
684+
- has the same dimension as ``min_sum_hessian_in_leaf``
685+
686+
- cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other
687+
688+
- the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
689+
690+
- note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
691+
678692
- ``interaction_constraints`` :raw-html:`<a id="interaction_constraints" title="Permalink to this parameter" href="#interaction_constraints">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string
679693

680694
- controls which features can appear in the same branch

include/LightGBM/config.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,15 @@ struct Config {
584584
// descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
585585
double path_smooth = 0;
586586

587+
// check = >= 0.0
588+
// desc = controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples
589+
// desc = works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights
590+
// desc = has the same dimension as ``min_sum_hessian_in_leaf``
591+
// desc = cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other
592+
// descl2 = the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
593+
// descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
594+
double path_smooth_hessian = 0;
595+
587596
// desc = controls which features can appear in the same branch
588597
// desc = by default interaction constraints are disabled, to enable them you can specify
589598
// descl2 = for CLI, lists separated by commas, e.g. ``[0,1,2],[2,3]``
@@ -1155,6 +1164,13 @@ struct Config {
11551164

11561165
size_t file_load_progress_interval_bytes = size_t(10) * 1024 * 1024 * 1024;
11571166

1167+
double effective_path_smooth() const {
1168+
return path_smooth_hessian > kEpsilon ? path_smooth_hessian : path_smooth;
1169+
}
1170+
bool use_hessian_smoothing() const {
1171+
return path_smooth_hessian > kEpsilon;
1172+
}
1173+
11581174
bool is_parallel = false;
11591175
bool is_data_based_parallel = false;
11601176
LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params);

src/io/config.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -439,10 +439,14 @@ void Config::CheckParamConflict(const std::unordered_map<std::string, std::strin
439439
Log::Fatal("Cannot use regression_l1 objective when fitting linear trees.");
440440
}
441441
}
442-
// min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
443-
// the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
444-
// be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
445-
// calculated split gain can be positive even with zero gradient and hessian.
442+
if (path_smooth > kEpsilon && path_smooth_hessian > kEpsilon) {
443+
Log::Warning("Cannot use both path_smooth and path_smooth_hessian simultaneously. path_smooth will be ignored.");
444+
path_smooth = 0;
445+
}
446+
// min_data_in_leaf must be at least 2 if count-based path smoothing is active. This is because when the split is
447+
// calculated the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int,
448+
// so it can be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path
449+
// smoothing the calculated split gain can be positive even with zero gradient and hessian.
446450
if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
447451
min_data_in_leaf = 2;
448452
Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");

src/io/config_auto.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
254254
"cegb_penalty_feature_lazy",
255255
"cegb_penalty_feature_coupled",
256256
"path_smooth",
257+
"path_smooth_hessian",
257258
"interaction_constraints",
258259
"verbosity",
259260
"input_model",
@@ -501,6 +502,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
501502
GetDouble(params, "path_smooth", &path_smooth);
502503
CHECK_GE(path_smooth, 0.0);
503504

505+
GetDouble(params, "path_smooth_hessian", &path_smooth_hessian);
506+
CHECK_GE(path_smooth_hessian, 0.0);
507+
504508
GetString(params, "interaction_constraints", &interaction_constraints);
505509

506510
GetInt(params, "verbosity", &verbosity);
@@ -740,6 +744,7 @@ std::string Config::SaveMembersToString() const {
740744
str_buf << "[cegb_penalty_feature_lazy: " << Common::Join(cegb_penalty_feature_lazy, ",") << "]\n";
741745
str_buf << "[cegb_penalty_feature_coupled: " << Common::Join(cegb_penalty_feature_coupled, ",") << "]\n";
742746
str_buf << "[path_smooth: " << path_smooth << "]\n";
747+
str_buf << "[path_smooth_hessian: " << path_smooth_hessian << "]\n";
743748
str_buf << "[interaction_constraints: " << interaction_constraints << "]\n";
744749
str_buf << "[verbosity: " << verbosity << "]\n";
745750
str_buf << "[saved_feature_importance_type: " << saved_feature_importance_type << "]\n";
@@ -867,6 +872,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
867872
{"cegb_penalty_feature_lazy", {}},
868873
{"cegb_penalty_feature_coupled", {}},
869874
{"path_smooth", {}},
875+
{"path_smooth_hessian", {}},
870876
{"interaction_constraints", {}},
871877
{"verbosity", {"verbose"}},
872878
{"input_model", {"model_input", "model_in"}},
@@ -1013,6 +1019,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
10131019
{"cegb_penalty_feature_lazy", "vector<double>"},
10141020
{"cegb_penalty_feature_coupled", "vector<double>"},
10151021
{"path_smooth", "double"},
1022+
{"path_smooth_hessian", "double"},
10161023
{"interaction_constraints", "vector<vector<int>>"},
10171024
{"verbosity", "int"},
10181025
{"input_model", "string"},

src/treelearner/cuda/cuda_best_split_finder.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ CUDABestSplitFinder::CUDABestSplitFinder(
3535
max_cat_to_onehot_(config->max_cat_to_onehot),
3636
extra_trees_(config->extra_trees),
3737
extra_seed_(config->extra_seed),
38-
use_smoothing_(config->path_smooth > 0),
39-
path_smooth_(config->path_smooth),
38+
use_smoothing_(config->effective_path_smooth() > 0),
39+
path_smooth_(config->effective_path_smooth()),
40+
path_smooth_use_hessian_(config->use_hessian_smoothing()),
4041
num_total_bin_(feature_hist_offsets.empty() ? 0 : static_cast<int>(feature_hist_offsets.back())),
4142
select_features_by_node_(select_features_by_node),
4243
cuda_hist_(cuda_hist) {
@@ -276,8 +277,9 @@ void CUDABestSplitFinder::ResetConfig(const Config* config, const hist_t* cuda_h
276277
max_cat_to_onehot_ = config->max_cat_to_onehot;
277278
extra_trees_ = config->extra_trees;
278279
extra_seed_ = config->extra_seed;
279-
use_smoothing_ = (config->path_smooth > 0.0f);
280-
path_smooth_ = config->path_smooth;
280+
use_smoothing_ = (config->effective_path_smooth() > 0.0f);
281+
path_smooth_ = config->effective_path_smooth();
282+
path_smooth_use_hessian_ = config->use_hessian_smoothing();
281283
cuda_hist_ = cuda_hist;
282284

283285
const int num_task_blocks = (num_tasks_ + NUM_TASKS_PER_SYNC_BLOCK - 1) / NUM_TASKS_PER_SYNC_BLOCK;

0 commit comments

Comments
 (0)