scikit-learn-contrib · glemaitre · Nov 24, 2017 · Nov 23, 2017 · Nov 23, 2017 · Nov 23, 2017
diff --git a/doc/under_sampling.rst b/doc/under_sampling.rst
@@ -112,7 +112,7 @@ samples. :class:`NearMiss` implements 3 different types of heuristic which can
 be selected with the parameter ``version``::
 
  >>> from imblearn.under_sampling import NearMiss
- >>> nm1 = NearMiss(random_state=0, version=1)
+ >>> nm1 = NearMiss(version=1)
  >>> X_resampled_nm1, y_resampled = nm1.fit_sample(X, y)
  >>> print(sorted(Counter(y_resampled).items()))
  [(0, 64), (1, 64), (2, 64)]
@@ -247,7 +247,7 @@ the sample inspected to keep it in the dataset::
  >>> sorted(Counter(y).items())
  [(0, 64), (1, 262), (2, 4674)]
  >>> from imblearn.under_sampling import EditedNearestNeighbours
- >>> enn = EditedNearestNeighbours(random_state=0)
+ >>> enn = EditedNearestNeighbours()
  >>> X_resampled, y_resampled = enn.fit_sample(X, y)
  >>> print(sorted(Counter(y_resampled).items()))
  [(0, 64), (1, 213), (2, 4568)]
@@ -261,7 +261,7 @@ the decision to keep a given sample or not.
 Generally, repeating the algorithm will delete more data::
 
  >>> from imblearn.under_sampling import RepeatedEditedNearestNeighbours
- >>> renn = RepeatedEditedNearestNeighbours(random_state=0)
+ >>> renn = RepeatedEditedNearestNeighbours()
  >>> X_resampled, y_resampled = renn.fit_sample(X, y)
  >>> print(sorted(Counter(y_resampled).items()))
  [(0, 64), (1, 208), (2, 4551)]
@@ -271,7 +271,7 @@ Generally, repeating the algorithm will delete more data::
 internal nearest neighbors algorithm is increased at each iteration::
 
  >>> from imblearn.under_sampling import AllKNN
- >>> allknn = AllKNN(random_state=0)
+ >>> allknn = AllKNN()
  >>> X_resampled, y_resampled = allknn.fit_sample(X, y)
  >>> print(sorted(Counter(y_resampled).items()))
  [(0, 64), (1, 220), (2, 4601)]
@@ -338,7 +338,7 @@ between the :class:`EditedNearestNeighbours` and the output a 3 nearest
 neighbors classifier. The class can be used as::
 
  >>> from imblearn.under_sampling import NeighbourhoodCleaningRule
- >>> ncr = NeighbourhoodCleaningRule(random_state=0)
+ >>> ncr = NeighbourhoodCleaningRule()
  >>> X_resampled, y_resampled = ncr.fit_sample(X, y)
  >>> print(sorted(Counter(y_resampled).items()))
  [(0, 64), (1, 234), (2, 4666)]

diff --git a/examples/under-sampling/plot_illustration_tomek_links.py b/examples/under-sampling/plot_illustration_tomek_links.py
@@ -67,7 +67,7 @@ def make_plot_despine(ax):
 # samples. If ``ratio='auto'`` only the sample from the majority class will be
 # removed. If ``ratio='all'`` both samples will be removed.
 
-sampler = TomekLinks(random_state=0)
+sampler = TomekLinks()
 
 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
 

diff --git a/imblearn/base.py b/imblearn/base.py
@@ -131,9 +131,8 @@ class BaseSampler(SamplerMixin):
  instead.
  """
 
- def __init__(self, ratio='auto', random_state=None):
+ def __init__(self, ratio='auto'):
  self.ratio = ratio
- self.random_state = random_state
  self.logger = logging.getLogger(__name__)
 
  def fit(self, X, y):

diff --git a/imblearn/combine/smote_enn.py b/imblearn/combine/smote_enn.py
@@ -125,8 +125,7 @@ def _validate_estimator(self):
  ' Got {} instead.'.format(type(self.enn)))
  # Otherwise create a default EditedNearestNeighbours
  else:
- self.enn_ = EditedNearestNeighbours(ratio='all',
- random_state=self.random_state)
+ self.enn_ = EditedNearestNeighbours(ratio='all')
 
  def fit(self, X, y):
  """Find the classes statistics before to perform sampling.

diff --git a/imblearn/combine/smote_tomek.py b/imblearn/combine/smote_tomek.py
@@ -134,8 +134,7 @@ def _validate_estimator(self):
  'Got {} instead.'.format(type(self.tomek)))
  # Otherwise create a default TomekLinks
  else:
- self.tomek_ = TomekLinks(ratio='all',
- random_state=self.random_state)
+ self.tomek_ = TomekLinks(ratio='all')
 
  def fit(self, X, y):
  """Find the classes statistics before to perform sampling.

diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
@@ -113,8 +113,8 @@ def __init__(self,
  random_state=None,
  n_max_subset=None,
  estimator=None):
- super(BalanceCascade, self).__init__(ratio=ratio,
-  random_state=random_state)
+ super(BalanceCascade, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.estimator = estimator
  self.n_max_subset = n_max_subset

diff --git a/imblearn/ensemble/easy_ensemble.py b/imblearn/ensemble/easy_ensemble.py
@@ -101,8 +101,8 @@ def __init__(self,
  random_state=None,
  replacement=False,
  n_subsets=10):
- super(EasyEnsemble, self).__init__(ratio=ratio,
-  random_state=random_state)
+ super(EasyEnsemble, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.replacement = replacement
  self.n_subsets = n_subsets

diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py
@@ -104,7 +104,8 @@ def __init__(self,
  random_state=None,
  n_neighbors=5,
  n_jobs=1):
- super(ADASYN, self).__init__(ratio=ratio, random_state=random_state)
+ super(ADASYN, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.n_neighbors = n_neighbors
  self.n_jobs = n_jobs
 

diff --git a/imblearn/over_sampling/base.py b/imblearn/over_sampling/base.py
@@ -5,8 +5,6 @@
 # Christos Aridas
 # License: MIT
 
-from sklearn.utils import check_X_y
-
 from ..base import BaseSampler
 
 

diff --git a/imblearn/over_sampling/random_over_sampler.py b/imblearn/over_sampling/random_over_sampler.py
@@ -76,8 +76,8 @@ class RandomOverSampler(BaseOverSampler):
  """
 
  def __init__(self, ratio='auto', random_state=None):
- super(RandomOverSampler, self).__init__(
-  ratio=ratio, random_state=random_state)
+ super(RandomOverSampler, self).__init__(ratio=ratio)
+ self.random_state = random_state
 
  def _sample(self, X, y):
  """Resample the dataset.

diff --git a/imblearn/over_sampling/smote.py b/imblearn/over_sampling/smote.py
@@ -143,7 +143,8 @@ def __init__(self,
  kind='regular',
  svm_estimator=None,
  n_jobs=1):
- super(SMOTE, self).__init__(ratio=ratio, random_state=random_state)
+ super(SMOTE, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.kind = kind
  self.k_neighbors = k_neighbors
  self.m_neighbors = m_neighbors

diff --git a/imblearn/under_sampling/prototype_generation/cluster_centroids.py b/imblearn/under_sampling/prototype_generation/cluster_centroids.py
@@ -109,7 +109,8 @@ def __init__(self,
  voting='auto',
  n_jobs=1):
  super(ClusterCentroids, self).__init__(
- ratio=ratio, random_state=random_state)
+ ratio=ratio)
+ self.random_state = random_state
  self.estimator = estimator
  self.voting = voting
  self.n_jobs = n_jobs

diff --git a/imblearn/under_sampling/prototype_selection/condensed_nearest_neighbour.py b/imblearn/under_sampling/prototype_selection/condensed_nearest_neighbour.py
@@ -119,7 +119,8 @@ def __init__(self,
  n_seeds_S=1,
  n_jobs=1):
  super(CondensedNearestNeighbour, self).__init__(
- ratio=ratio, random_state=random_state)
+ ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.n_neighbors = n_neighbors
  self.n_seeds_S = n_seeds_S

diff --git a/imblearn/under_sampling/prototype_selection/edited_nearest_neighbours.py b/imblearn/under_sampling/prototype_selection/edited_nearest_neighbours.py
@@ -18,6 +18,8 @@
 
 from ..base import BaseCleaningSampler
 from ...utils import check_neighbors_object
+from ...utils.deprecation import deprecate_parameter
+
 
 SEL_KIND = ('all', 'mode')
 
@@ -62,6 +64,9 @@ class EditedNearestNeighbours(BaseCleaningSampler):
  number generator; If ``None``, the random number generator is the
  ``RandomState`` instance used by ``np.random``.
 
+ .. deprecated:: 0.4
+ ``random_state`` is deprecated in 0.4 and will be removed in 0.6.
+
  n_neighbors : int or object, optional (default=3)
  If ``int``, size of the neighbourhood to consider to compute the
  nearest neighbors. If object, an estimator that inherits from
@@ -112,7 +117,7 @@ class EditedNearestNeighbours(BaseCleaningSampler):
  ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
  >>> print('Original dataset shape {}'.format(Counter(y)))
  Original dataset shape Counter({1: 900, 0: 100})
- >>> enn = EditedNearestNeighbours(random_state=42)
+ >>> enn = EditedNearestNeighbours()
  >>> X_res, y_res = enn.fit_sample(X, y)
  >>> print('Resampled dataset shape {}'.format(Counter(y_res)))
  Resampled dataset shape Counter({1: 887, 0: 100})
@@ -126,16 +131,20 @@ def __init__(self,
  n_neighbors=3,
  kind_sel='all',
  n_jobs=1):
- super(EditedNearestNeighbours, self).__init__(
- ratio=ratio,
- random_state=random_state)
+ super(EditedNearestNeighbours, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.n_neighbors = n_neighbors
  self.kind_sel = kind_sel
  self.n_jobs = n_jobs
 
  def _validate_estimator(self):
  """Validate the estimator created in the ENN."""
+
+ # check for deprecated random_state
+ if self.random_state is not None:
+ deprecate_parameter(self, '0.4', 'random_state')
+
  self.nn_ = check_neighbors_object('n_neighbors', self.n_neighbors,
  additional_neighbor=1)
  self.nn_.set_params(**{'n_jobs': self.n_jobs})
@@ -243,6 +252,9 @@ class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
  number generator; If ``None``, the random number generator is the
  ``RandomState`` instance used by ``np.random``.
 
+ .. deprecated:: 0.4
+ ``random_state`` is deprecated in 0.4 and will be removed in 0.6.
+
  n_neighbors : int or object, optional (default=3)
  If ``int``, size of the neighbourhood to consider to compute the
  nearest neighbors. If object, an estimator that inherits from
@@ -297,7 +309,7 @@ class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
  ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
  >>> print('Original dataset shape {}'.format(Counter(y)))
  Original dataset shape Counter({1: 900, 0: 100})
- >>> renn = RepeatedEditedNearestNeighbours(random_state=42)
+ >>> renn = RepeatedEditedNearestNeighbours()
  >>> X_res, y_res = renn.fit_sample(X, y)
  >>> print('Resampled dataset shape {}'.format(Counter(y_res)))
  Resampled dataset shape Counter({1: 887, 0: 100})
@@ -312,8 +324,8 @@ def __init__(self,
  max_iter=100,
  kind_sel='all',
  n_jobs=1):
- super(RepeatedEditedNearestNeighbours, self).__init__(
-  ratio=ratio, random_state=random_state)
+ super(RepeatedEditedNearestNeighbours, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.n_neighbors = n_neighbors
  self.kind_sel = kind_sel
@@ -322,6 +334,11 @@ def __init__(self,
 
  def _validate_estimator(self):
  """Private function to create the NN estimator"""
+
+ # check for deprecated random_state
+ if self.random_state is not None:
+ deprecate_parameter(self, '0.4', 'random_state')
+
  if self.max_iter < 2:
  raise ValueError('max_iter must be greater than 1.'
  ' Got {} instead.'.format(type(self.max_iter)))
@@ -331,7 +348,6 @@ def _validate_estimator(self):
 
  self.enn_ = EditedNearestNeighbours(ratio=self.ratio,
  return_indices=self.return_indices,
- random_state=self.random_state,
  n_neighbors=self.nn_,
  kind_sel=self.kind_sel,
  n_jobs=self.n_jobs)
@@ -459,6 +475,9 @@ class AllKNN(BaseCleaningSampler):
  number generator; If ``None``, the random number generator is the
  ``RandomState`` instance used by ``np.random``.
 
+ .. deprecated:: 0.4
+ ``random_state`` is deprecated in 0.4 and will be removed in 0.6.
+
  n_neighbors : int or object, optional (default=3)
  If ``int``, size of the neighbourhood to consider to compute the
  nearest neighbors. If object, an estimator that inherits from
@@ -514,7 +533,7 @@ class without early stopping.
  ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
  >>> print('Original dataset shape {}'.format(Counter(y)))
  Original dataset shape Counter({1: 900, 0: 100})
- >>> allknn = AllKNN(random_state=42)
+ >>> allknn = AllKNN()
  >>> X_res, y_res = allknn.fit_sample(X, y)
  >>> print('Resampled dataset shape {}'.format(Counter(y_res)))
  Resampled dataset shape Counter({1: 887, 0: 100})
@@ -529,7 +548,8 @@ def __init__(self,
  kind_sel='all',
  allow_minority=False,
  n_jobs=1):
- super(AllKNN, self).__init__(ratio=ratio, random_state=random_state)
+ super(AllKNN, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.n_neighbors = n_neighbors
  self.kind_sel = kind_sel
@@ -538,6 +558,11 @@ def __init__(self,
 
  def _validate_estimator(self):
  """Create objects required by AllKNN"""
+
+ # check for deprecated random_state
+ if self.random_state is not None:
+ deprecate_parameter(self, '0.4', 'random_state')
+
  if self.kind_sel not in SEL_KIND:
  raise NotImplementedError
 
@@ -546,7 +571,6 @@ def _validate_estimator(self):
 
  self.enn_ = EditedNearestNeighbours(ratio=self.ratio,
  return_indices=self.return_indices,
- random_state=self.random_state,
  n_neighbors=self.nn_,
  kind_sel=self.kind_sel,
  n_jobs=self.n_jobs)

diff --git a/imblearn/under_sampling/prototype_selection/instance_hardness_threshold.py b/imblearn/under_sampling/prototype_selection/instance_hardness_threshold.py
@@ -116,8 +116,8 @@ def __init__(self,
  random_state=None,
  cv=5,
  n_jobs=1):
- super(InstanceHardnessThreshold, self).__init__(
-  ratio=ratio, random_state=random_state)
+ super(InstanceHardnessThreshold, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.estimator = estimator
  self.return_indices = return_indices
  self.cv = cv
@@ -148,10 +148,6 @@ def _sample(self, X, y):
  y : array-like, shape (n_samples,)
  Corresponding label for each sample in X.
 
- Returns
- -------
- X_resampled : {ndarray, sparse matrix}, shape \
-(n_samples_new, n_features)
  The array containing the resampled data.
 
  y_resampled : ndarray, shape (n_samples_new,)

diff --git a/imblearn/under_sampling/prototype_selection/nearmiss.py b/imblearn/under_sampling/prototype_selection/nearmiss.py
@@ -15,6 +15,7 @@
 
 from ..base import BaseUnderSampler
 from ...utils import check_neighbors_object
+from ...utils.deprecation import deprecate_parameter
 
 
 class NearMiss(BaseUnderSampler):
@@ -51,6 +52,9 @@ class NearMiss(BaseUnderSampler):
  number generator; If ``None``, the random number generator is the
  ``RandomState`` instance used by ``np.random``.
 
+ .. deprecated:: 0.4
+ ``random_state`` is deprecated in 0.4 and will be removed in 0.6.
+
  version : int, optional (default=1)
  Version of the NearMiss to use. Possible values are 1, 2 or 3.
 
@@ -101,7 +105,7 @@ class NearMiss(BaseUnderSampler):
  ... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
  >>> print('Original dataset shape {}'.format(Counter(y)))
  Original dataset shape Counter({1: 900, 0: 100})
- >>> nm = NearMiss(random_state=42)
+ >>> nm = NearMiss()
  >>> X_res, y_res = nm.fit_sample(X, y)
  >>> print('Resampled dataset shape {}'.format(Counter(y_res)))
  Resampled dataset shape Counter({0: 100, 1: 100})
@@ -116,7 +120,8 @@ def __init__(self,
  n_neighbors=3,
  n_neighbors_ver3=3,
  n_jobs=1):
- super(NearMiss, self).__init__(ratio=ratio, random_state=random_state)
+ super(NearMiss, self).__init__(ratio=ratio)
+ self.random_state = random_state
  self.return_indices = return_indices
  self.version = version
  self.n_neighbors = n_neighbors
@@ -196,6 +201,11 @@ def _selection_dist_based(self,
 
  def _validate_estimator(self):
  """Private function to create the NN estimator"""
+
+ # check for deprecated random_state
+ if self.random_state is not None:
+ deprecate_parameter(self, '0.4', 'random_state')
+
  self.nn_ = check_neighbors_object('n_neighbors', self.n_neighbors)
  self.nn_.set_params(**{'n_jobs': self.n_jobs})