Skip to content

Commit f283ed6

Browse files
NicolasHugthomasjpfan
authored andcommitted
CLN Removed max_bins from splitter in GBDT (scikit-learn#13927)
1 parent 9f7e867 commit f283ed6

File tree

3 files changed

+6
-14
lines changed

3 files changed

+6
-14
lines changed

sklearn/ensemble/_hist_gradient_boosting/grower.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def __init__(self, X_binned, gradients, hessians, max_leaf_nodes=None,
177177
self.histogram_builder = HistogramBuilder(
178178
X_binned, max_bins, gradients, hessians, hessians_are_constant)
179179
self.splitter = Splitter(
180-
X_binned, max_bins, actual_n_bins, l2_regularization,
180+
X_binned, actual_n_bins, l2_regularization,
181181
min_hessian_to_split, min_samples_leaf, min_gain_to_split,
182182
hessians_are_constant)
183183
self.max_leaf_nodes = max_leaf_nodes

sklearn/ensemble/_hist_gradient_boosting/splitting.pyx

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,6 @@ cdef class Splitter:
9191
----------
9292
X_binned : ndarray of int, shape (n_samples, n_features)
9393
The binned input samples. Must be Fortran-aligned.
94-
max_bins : int
95-
The maximum number of bins. Used to define the shape of the
96-
histograms.
9794
actual_n_bins : ndarray, shape (n_features,)
9895
The actual number of bins needed for each feature, which is lower or
9996
equal to max_bins.
@@ -114,7 +111,6 @@ cdef class Splitter:
114111
cdef public:
115112
const X_BINNED_DTYPE_C [::1, :] X_binned
116113
unsigned int n_features
117-
unsigned int max_bins
118114
unsigned int [::1] actual_n_bins
119115
unsigned char hessians_are_constant
120116
Y_DTYPE_C l2_regularization
@@ -126,18 +122,15 @@ cdef class Splitter:
126122
unsigned int [::1] left_indices_buffer
127123
unsigned int [::1] right_indices_buffer
128124

129-
def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned, unsigned int
130-
max_bins, np.ndarray[np.uint32_t] actual_n_bins,
125+
def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned,
126+
np.ndarray[np.uint32_t] actual_n_bins,
131127
Y_DTYPE_C l2_regularization, Y_DTYPE_C
132128
min_hessian_to_split=1e-3, unsigned int
133129
min_samples_leaf=20, Y_DTYPE_C min_gain_to_split=0.,
134130
unsigned char hessians_are_constant=False):
135131

136132
self.X_binned = X_binned
137133
self.n_features = X_binned.shape[1]
138-
# Note: all histograms will have <max_bins> bins, but some of the
139-
# last bins may be unused if actual_n_bins[f] < max_bins
140-
self.max_bins = max_bins
141134
self.actual_n_bins = actual_n_bins
142135
self.l2_regularization = l2_regularization
143136
self.min_hessian_to_split = min_hessian_to_split

sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ def test_histogram_split(n_bins):
4141
all_hessians,
4242
hessians_are_constant)
4343
splitter = Splitter(X_binned,
44-
n_bins,
4544
actual_n_bins,
4645
l2_regularization,
4746
min_hessian_to_split,
@@ -99,7 +98,7 @@ def test_gradient_and_hessian_sanity(constant_hessian):
9998
dtype=np.uint32)
10099
builder = HistogramBuilder(X_binned, n_bins, all_gradients,
101100
all_hessians, constant_hessian)
102-
splitter = Splitter(X_binned, n_bins, actual_n_bins,
101+
splitter = Splitter(X_binned, actual_n_bins,
103102
l2_regularization, min_hessian_to_split,
104103
min_samples_leaf, min_gain_to_split, constant_hessian)
105104

@@ -196,7 +195,7 @@ def test_split_indices():
196195
builder = HistogramBuilder(X_binned, n_bins,
197196
all_gradients, all_hessians,
198197
hessians_are_constant)
199-
splitter = Splitter(X_binned, n_bins, actual_n_bins,
198+
splitter = Splitter(X_binned, actual_n_bins,
200199
l2_regularization, min_hessian_to_split,
201200
min_samples_leaf, min_gain_to_split,
202201
hessians_are_constant)
@@ -251,7 +250,7 @@ def test_min_gain_to_split():
251250
dtype=np.uint32)
252251
builder = HistogramBuilder(X_binned, n_bins, all_gradients,
253252
all_hessians, hessians_are_constant)
254-
splitter = Splitter(X_binned, n_bins, actual_n_bins,
253+
splitter = Splitter(X_binned, actual_n_bins,
255254
l2_regularization, min_hessian_to_split,
256255
min_samples_leaf, min_gain_to_split,
257256
hessians_are_constant)

0 commit comments

Comments
 (0)