lightgbm-org · amqadmiakur8 · Apr 22, 2026
@@ -1052,6 +1052,7 @@ test_that("all parameters are stored correctly with save_model_to_string()", {
         , "[cegb_penalty_feature_lazy: ]"
         , "[cegb_penalty_feature_coupled: ]"
         , "[path_smooth: 0]"
+        , "[path_smooth_hessian: 0]"
         , "[interaction_constraints: ]"
         , sprintf("[verbosity: %i]", .LGB_VERBOSITY)
         , "[saved_feature_importance_type: 0]"

@@ -675,6 +675,20 @@ Learning Control Parameters
 
       -  note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
 
+-  ``path_smooth_hessian`` :raw-html:`<a id="path_smooth_hessian" title="Permalink to this parameter" href="#path_smooth_hessian">&#x1F517;&#xFE0E;</a>`, default = ``0``, type = double, constraints: ``path_smooth_hessian >=  0.0``
+
+   -  controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples
+
+   -  works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights
+
+   -  has the same dimension as ``min_sum_hessian_in_leaf``
+
+   -  cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other
+
+      -  the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
+
+      -  note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
+
 -  ``interaction_constraints`` :raw-html:`<a id="interaction_constraints" title="Permalink to this parameter" href="#interaction_constraints">&#x1F517;&#xFE0E;</a>`, default = ``""``, type = string
 
    -  controls which features can appear in the same branch

@@ -584,6 +584,15 @@ struct Config {
   // descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
   double path_smooth = 0;
 
+  // check = >= 0.0
+  // desc = controls smoothing applied to tree nodes using the sum of hessians instead of the number of samples
+  // desc = works the same way as ``path_smooth`` but uses the sum of hessians as the weight, making it more appropriate when samples have different weights
+  // desc = has the same dimension as ``min_sum_hessian_in_leaf``
+  // desc = cannot be used simultaneously with ``path_smooth``; set one to ``0`` when using the other
+  // descl2 = the weight of each node is ``w * (h / path_smooth_hessian) / (h / path_smooth_hessian + 1) + w_p / (h / path_smooth_hessian + 1)``, where ``h`` is the sum of hessians in the node, ``w`` is the optimal node weight to minimise the loss (approximately ``-sum_gradients / sum_hessians``), and ``w_p`` is the weight of the parent node
+  // descl2 = note that the parent output ``w_p`` itself has smoothing applied, unless it is the root node, so that the smoothing effect accumulates with the tree depth
+  double path_smooth_hessian = 0;
+
   // desc = controls which features can appear in the same branch
   // desc = by default interaction constraints are disabled, to enable them you can specify
   // descl2 = for CLI, lists separated by commas, e.g. ``[0,1,2],[2,3]``
@@ -1155,6 +1164,13 @@ struct Config {
 
   size_t file_load_progress_interval_bytes = size_t(10) * 1024 * 1024 * 1024;
 
+  double effective_path_smooth() const {
+    return path_smooth_hessian > kEpsilon ? path_smooth_hessian : path_smooth;
+  }
+  bool use_hessian_smoothing() const {
+    return path_smooth_hessian > kEpsilon;
+  }
+
   bool is_parallel = false;
   bool is_data_based_parallel = false;
   LIGHTGBM_EXPORT void Set(const std::unordered_map<std::string, std::string>& params);

@@ -439,10 +439,14 @@ void Config::CheckParamConflict(const std::unordered_map<std::string, std::strin
       Log::Fatal("Cannot use regression_l1 objective when fitting linear trees.");
     }
   }
-  // min_data_in_leaf must be at least 2 if path smoothing is active. This is because when the split is calculated
-  // the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int, so it can
-  // be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path smoothing the
-  // calculated split gain can be positive even with zero gradient and hessian.
+  if (path_smooth > kEpsilon && path_smooth_hessian > kEpsilon) {
+    Log::Warning("Cannot use both path_smooth and path_smooth_hessian simultaneously. path_smooth will be ignored.");
+    path_smooth = 0;
+  }
+  // min_data_in_leaf must be at least 2 if count-based path smoothing is active. This is because when the split is
+  // calculated the count is calculated using the proportion of hessian in the leaf which is rounded up to nearest int,
+  // so it can be 1 when there is actually no data in the leaf. In rare cases this can cause a bug because with path
+  // smoothing the calculated split gain can be positive even with zero gradient and hessian.
   if (path_smooth > kEpsilon && min_data_in_leaf < 2) {
     min_data_in_leaf = 2;
     Log::Warning("min_data_in_leaf has been increased to 2 because this is required when path smoothing is active.");

@@ -254,6 +254,7 @@ const std::unordered_set<std::string>& Config::parameter_set() {
   "cegb_penalty_feature_lazy",
   "cegb_penalty_feature_coupled",
   "path_smooth",
+  "path_smooth_hessian",
   "interaction_constraints",
   "verbosity",
   "input_model",
@@ -501,6 +502,9 @@ void Config::GetMembersFromString(const std::unordered_map<std::string, std::str
   GetDouble(params, "path_smooth", &path_smooth);
   CHECK_GE(path_smooth,  0.0);
 
+  GetDouble(params, "path_smooth_hessian", &path_smooth_hessian);
+  CHECK_GE(path_smooth_hessian,  0.0);
+
   GetString(params, "interaction_constraints", &interaction_constraints);
 
   GetInt(params, "verbosity", &verbosity);
@@ -740,6 +744,7 @@ std::string Config::SaveMembersToString() const {
   str_buf << "[cegb_penalty_feature_lazy: " << Common::Join(cegb_penalty_feature_lazy, ",") << "]\n";
   str_buf << "[cegb_penalty_feature_coupled: " << Common::Join(cegb_penalty_feature_coupled, ",") << "]\n";
   str_buf << "[path_smooth: " << path_smooth << "]\n";
+  str_buf << "[path_smooth_hessian: " << path_smooth_hessian << "]\n";
   str_buf << "[interaction_constraints: " << interaction_constraints << "]\n";
   str_buf << "[verbosity: " << verbosity << "]\n";
   str_buf << "[saved_feature_importance_type: " << saved_feature_importance_type << "]\n";
@@ -867,6 +872,7 @@ const std::unordered_map<std::string, std::vector<std::string>>& Config::paramet
     {"cegb_penalty_feature_lazy", {}},
     {"cegb_penalty_feature_coupled", {}},
     {"path_smooth", {}},
+    {"path_smooth_hessian", {}},
     {"interaction_constraints", {}},
     {"verbosity", {"verbose"}},
     {"input_model", {"model_input", "model_in"}},
@@ -1013,6 +1019,7 @@ const std::unordered_map<std::string, std::string>& Config::ParameterTypes() {
     {"cegb_penalty_feature_lazy", "vector<double>"},
     {"cegb_penalty_feature_coupled", "vector<double>"},
     {"path_smooth", "double"},
+    {"path_smooth_hessian", "double"},
     {"interaction_constraints", "vector<vector<int>>"},
     {"verbosity", "int"},
     {"input_model", "string"},

@@ -35,8 +35,9 @@ CUDABestSplitFinder::CUDABestSplitFinder(
   max_cat_to_onehot_(config->max_cat_to_onehot),
   extra_trees_(config->extra_trees),
   extra_seed_(config->extra_seed),
-  use_smoothing_(config->path_smooth > 0),
-  path_smooth_(config->path_smooth),
+  use_smoothing_(config->effective_path_smooth() > 0),
+  path_smooth_(config->effective_path_smooth()),
+  path_smooth_use_hessian_(config->use_hessian_smoothing()),
   num_total_bin_(feature_hist_offsets.empty() ? 0 : static_cast<int>(feature_hist_offsets.back())),
   select_features_by_node_(select_features_by_node),
   cuda_hist_(cuda_hist) {
@@ -276,8 +277,9 @@ void CUDABestSplitFinder::ResetConfig(const Config* config, const hist_t* cuda_h
   max_cat_to_onehot_ = config->max_cat_to_onehot;
   extra_trees_ = config->extra_trees;
   extra_seed_ = config->extra_seed;
-  use_smoothing_ = (config->path_smooth > 0.0f);
-  path_smooth_ = config->path_smooth;
+  use_smoothing_ = (config->effective_path_smooth() > 0.0f);
+  path_smooth_ = config->effective_path_smooth();
+  path_smooth_use_hessian_ = config->use_hessian_smoothing();
   cuda_hist_ = cuda_hist;
 
   const int num_task_blocks = (num_tasks_ + NUM_TASKS_PER_SYNC_BLOCK - 1) / NUM_TASKS_PER_SYNC_BLOCK;