lightgbm-org · FranciscoRMendes · Apr 19, 2026 · Apr 20, 2026 · Apr 21, 2026
@@ -3115,7 +3115,7 @@ def set_weight(
         # Check if the weight contains values other than one
         if weight is not None:
             if _is_pyarrow_array(weight):
-                if pa_compute.all(pa_compute.equal(weight, 1)).as_py():
+                if pa_compute.all(pa_compute.equal(weight, 1)).as_py():  # type: ignore[attr-defined]
                     weight = None
             elif np.all(weight == 1):
                 weight = None

@@ -1013,6 +1013,8 @@ def fit(
         params["metric"] = [e for e in eval_metrics_builtin if e not in params["metric"]] + params["metric"]
         params["metric"] = [metric for metric in params["metric"] if metric is not None]
 
+        self._fitted_with_feature_names = isinstance(X, (pd_DataFrame, pa_Table))
+
         if not isinstance(X, (pd_DataFrame, pa_Table)):
             _X, _y = _LGBMValidateData(
                 self,
@@ -1359,25 +1361,29 @@ def feature_name_(self) -> List[str]:
     def feature_names_in_(self) -> np.ndarray:
         """:obj:`array` of shape = [n_features]: scikit-learn compatible version of ``.feature_name_``.
 
+        Only available when training data had feature names (e.g. a pandas DataFrame).
+        When training was done with data without feature names (e.g. a numpy array),
+        accessing this attribute raises ``AttributeError``.
+
         .. versionadded:: 4.5.0
         """
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
+        if not self._fitted_with_feature_names:
+            raise AttributeError(
+                f"'{type(self).__name__}' object has no attribute 'feature_names_in_'. "
+                "The training data did not have feature names "
+                "(e.g. was a numpy array rather than a pandas DataFrame)."
+            )
         return np.array(self.feature_name_)
 
     @feature_names_in_.deleter
     def feature_names_in_(self) -> None:
         """Intercept calls to delete ``feature_names_in_``.
 
         Some code paths in ``scikit-learn`` try to delete the ``feature_names_in_`` attribute
-        on estimators when a new training dataset that doesn't have features is passed.
-        LightGBM automatically assigns feature names to such datasets
-        (like ``Column_0``, ``Column_1``, etc.) and so does not want that behavior.
-
-        However, that behavior is coupled to ``scikit-learn`` automatically updating
-        ``n_features_in_`` in those same code paths, which is necessary for compliance
-        with its API (via argument ``reset`` to functions like ``validate_data()`` and
-        ``check_array()``).
+        on estimators when a new training dataset that doesn't have feature names is passed.
+        This is handled via ``_fitted_with_feature_names``, so deletion is a no-op here.
 
         .. note::
 

@@ -3,6 +3,7 @@
 import itertools
 import math
 import re
+import warnings
 from functools import partial
 from os import getenv
 from pathlib import Path
@@ -1688,8 +1689,8 @@ def test_fit_only_raises_num_rounds_warning_when_expected(capsys):
 
 @pytest.mark.parametrize("estimator_class", estimator_classes)
 def test_getting_feature_names_in_np_input(estimator_class):
-    # input is a numpy array, which doesn't have feature names. LightGBM adds
-    # feature names to the fitted model, which is inconsistent with sklearn's behavior
+    # Input is a numpy array, which doesn't have feature names.
+    # feature_names_in_ should not be set (raises AttributeError), consistent with sklearn's behavior.
     X, y = load_digits(n_class=2, return_X_y=True)
     params = {"n_estimators": 2, "num_leaves": 7}
     if estimator_class is lgb.LGBMModel:
@@ -1703,7 +1704,24 @@ def test_getting_feature_names_in_np_input(estimator_class):
         model.fit(X, y, group=[X.shape[0]])
     else:
         model.fit(X, y)
-    np_assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]), strict=True)
+    assert not hasattr(model, "feature_names_in_"), (
+        "feature_names_in_ should not be set when training data had no feature names"
+    )
+    # auto-generated names should still be accessible via the LightGBM-specific feature_name_ property
+    assert model.feature_name_ == [f"Column_{i}" for i in range(X.shape[1])]
+
+
+@pytest.mark.parametrize("estimator_class", [lgb.LGBMClassifier, lgb.LGBMRegressor])
+def test_no_spurious_feature_name_warning_on_np_predict(estimator_class):
+    # Regression test for https://github.com/lightgbm-org/LightGBM/issues/6798
+    # sklearn 1.6+ warns "X does not have valid feature names, but ... was fitted with feature names"
+    # when predict() is called with a numpy array after fit() on a numpy array, because LightGBM
+    # auto-generates feature names. This should not produce any warning.
+    X, y = load_digits(n_class=2, return_X_y=True)
+    model = estimator_class(n_estimators=2, num_leaves=7).fit(X, y)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        model.predict(X[:5])
 
 
 @pytest.mark.parametrize("estimator_class", estimator_classes)