@@ -68,7 +68,7 @@ def _validate_estimator(self):
6868 )
6969
7070 def _make_samples (
71- self , X , y_dtype , y_type , nn_data , nn_num , n_samples , step_size = 1.0
71+ self , X , y_dtype , y_type , nn_data , nn_num , n_samples , step_size = 1.0 , y = None
7272 ):
7373 """A support function that returns artificial samples constructed along
7474 the line connecting nearest neighbours.
@@ -98,6 +98,10 @@ def _make_samples(
9898 step_size : float, default=1.0
9999 The step size to create samples.
100100
101+ y : ndarray of shape (n_samples_all,), default=None
102+ The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
103+ weight the distances in the sample generation process.
104+
101105 Returns
102106 -------
103107 X_new : {ndarray, sparse matrix} of shape (n_samples_new, n_features)
@@ -114,11 +118,13 @@ def _make_samples(
114118 rows = np .floor_divide (samples_indices , nn_num .shape [1 ])
115119 cols = np .mod (samples_indices , nn_num .shape [1 ])
116120
117- X_new = self ._generate_samples (X , nn_data , nn_num , rows , cols , steps , y_type )
121+ X_new = self ._generate_samples (X , nn_data , nn_num , rows , cols , steps , y_type , y )
118122 y_new = np .full (n_samples , fill_value = y_type , dtype = y_dtype )
119123 return X_new , y_new
120124
121- def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type = None ):
125+ def _generate_samples (
126+ self , X , nn_data , nn_num , rows , cols , steps , y_type = None , y = None
127+ ):
122128 r"""Generate a synthetic sample.
123129
124130 The rule for the generation is:
@@ -153,15 +159,26 @@ def _generate_samples(self, X, nn_data, nn_num, rows, cols, steps, y_type=None):
153159 steps : ndarray of shape (n_samples,), dtype=float
154160 Step sizes for new samples.
155161
156- y_type : None
157- Unused parameter. Only for compatibility reason with SMOTE-NC.
162+ y_type : str, int or None, default=None
163+ Class label of the current target classes for which we want to generate
164+ samples.
165+
166+ y : ndarray of shape (n_samples_all,), default=None
167+ The true target associated with `nn_data`. Used by Borderline SMOTE-2 to
168+ weight the distances in the sample generation process.
158169
159170 Returns
160171 -------
161172 X_new : {ndarray, sparse matrix} of shape (n_samples, n_features)
162173 Synthetically generated samples.
163174 """
164175 diffs = nn_data [nn_num [rows , cols ]] - X [rows ]
176+ if y is not None : # only entering for BorderlineSMOTE-2
177+ random_state = check_random_state (self .random_state )
178+ mask_pair_samples = y [nn_num [rows , cols ]] != y_type
179+ diffs [mask_pair_samples ] *= random_state .uniform (
180+ low = 0.0 , high = 0.5 , size = (mask_pair_samples .sum (), 1 )
181+ )
165182
166183 if sparse .issparse (X ):
167184 sparse_func = type (X ).__name__
@@ -736,7 +753,7 @@ def _fit_resample(self, X, y):
736753
737754 return X_resampled , y_resampled
738755
739- def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type ):
756+ def _generate_samples (self , X , nn_data , nn_num , rows , cols , steps , y_type , y = None ):
740757 """Generate a synthetic sample with an additional steps for the
741758 categorical features.
742759
0 commit comments