Skip to content

Commit d121c68

Browse files
committed
Merge pull request #53 from glemaitre/bug_make_samples
[MRG] Address issue #52
2 parents 74e24de + eff68c2 commit d121c68

File tree

1 file changed

+14
-14
lines changed
  • unbalanced_dataset/over_sampling

1 file changed

+14
-14
lines changed

unbalanced_dataset/over_sampling/smote.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def fit(self, X, y):
243243

244244
return self
245245

246-
def in_danger_noise(self, samples, y, kind='danger'):
246+
def _in_danger_noise(self, samples, y, kind='danger'):
247247
"""Estimate if a set of sample are in danger or not.
248248
249249
Parameters
@@ -288,7 +288,7 @@ def in_danger_noise(self, samples, y, kind='danger'):
288288
else:
289289
raise ValueError('Unknown string for parameter kind.')
290290

291-
def make_samples(self, X, y_type, nn_data, nn_num, n_samples,
291+
def _make_samples(self, X, y_type, nn_data, nn_num, n_samples,
292292
step_size=1.):
293293
"""A support function that returns artificial samples constructed along
294294
the line connecting nearest neighbours.
@@ -303,11 +303,11 @@ def make_samples(self, X, y_type, nn_data, nn_num, n_samples,
303303
target values for the synthetic variables with correct length in
304304
a clear format.
305305
306-
nn_data : ndarray, shape(n_samples_all, n_features)
306+
nn_data : ndarray, shape (n_samples_all, n_features)
307307
Data set carrying all the neighbours to be used
308308
309-
nn_num : int
310-
The number of nearest neighbours to be used.
309+
nn_num : ndarray, shape (n_samples_all, k_nearest_neighbours)
310+
The nearest neighbours of each sample in nn_data.
311311
312312
n_samples : int
313313
The number of samples to generate.
@@ -429,7 +429,7 @@ def transform(self, X, y):
429429

430430
# --- Generating synthetic samples
431431
# Use static method make_samples to generate minority samples
432-
X_new, y_new = self.make_samples(X_min,
432+
X_new, y_new = self._make_samples(X_min,
433433
self.min_c_,
434434
X_min,
435435
nns,
@@ -457,7 +457,7 @@ def transform(self, X, y):
457457
print("done!")
458458

459459
# Boolean array with True for minority samples in danger
460-
danger_index = self.in_danger_noise(X_min, y, kind='danger')
460+
danger_index = self._in_danger_noise(X_min, y, kind='danger')
461461

462462
# If all minority samples are safe, return the original data set.
463463
if not any(danger_index):
@@ -485,7 +485,7 @@ def transform(self, X, y):
485485
# B1 and B2 types diverge here!!!
486486
if self.kind == 'borderline1':
487487
# Create synthetic samples for borderline points.
488-
X_new, y_new = self.make_samples(X_min[danger_index],
488+
X_new, y_new = self._make_samples(X_min[danger_index],
489489
self.min_c_,
490490
X_min,
491491
nns,
@@ -512,7 +512,7 @@ def transform(self, X, y):
512512
fractions = betavariate(alpha=10, beta=10)
513513

514514
# Only minority
515-
X_new_1, y_new_1 = self.make_samples(X_min[danger_index],
515+
X_new_1, y_new_1 = self._make_samples(X_min[danger_index],
516516
self.min_c_,
517517
X_min,
518518
nns,
@@ -521,7 +521,7 @@ def transform(self, X, y):
521521
step_size=1.)
522522

523523
# Only majority with smaller step size
524-
X_new_2, y_new_2 = self.make_samples(X_min[danger_index],
524+
X_new_2, y_new_2 = self._make_samples(X_min[danger_index],
525525
self.min_c_,
526526
X[y != self.min_c_],
527527
nns,
@@ -567,11 +567,11 @@ def transform(self, X, y):
567567

568568
# Now, get rid of noisy support vectors
569569

570-
noise_bool = self.in_danger_noise(support_vector, y, kind='noise')
570+
noise_bool = self._in_danger_noise(support_vector, y, kind='noise')
571571

572572
# Remove noisy support vectors
573573
support_vector = support_vector[np.logical_not(noise_bool)]
574-
danger_bool = self.in_danger_noise(support_vector, y,
574+
danger_bool = self._in_danger_noise(support_vector, y,
575575
kind='danger')
576576
safety_bool = np.logical_not(danger_bool)
577577

@@ -608,7 +608,7 @@ def transform(self, X, y):
608608
support_vector[danger_bool],
609609
return_distance=False)[:, 1:]
610610

611-
X_new_1, y_new_1 = self.make_samples(
611+
X_new_1, y_new_1 = self._make_samples(
612612
support_vector[danger_bool],
613613
self.min_c_,
614614
X_min,
@@ -622,7 +622,7 @@ def transform(self, X, y):
622622
support_vector[safety_bool],
623623
return_distance=False)[:, 1:]
624624

625-
X_new_2, y_new_2 = self.make_samples(
625+
X_new_2, y_new_2 = self._make_samples(
626626
support_vector[safety_bool],
627627
self.min_c_,
628628
X_min,

0 commit comments

Comments
 (0)