FIX depreacte ratio and ratio_

scikit-learn-contrib · glemaitre · May 8, 2018 · Mar 20, 2018 · Mar 20, 2018 · Mar 26, 2018
commit f3fef5cb0646f798afd6dbd83b0a57f1fcaeb22b
diff --git a/imblearn/base.py b/imblearn/base.py
@@ -7,6 +7,7 @@
 from __future__ import division
 
 import logging
+import warnings
 from abc import ABCMeta, abstractmethod
 
 import numpy as np
@@ -18,6 +19,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 from .utils import check_ratio, check_target_type, hash_X_y
+from .utils.deprecation import deprecate_parameter
 
 
 class SamplerMixin(six.with_metaclass(ABCMeta, BaseEstimator)):
@@ -61,7 +63,7 @@ def sample(self, X, y):
  y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
 
- check_is_fitted(self, 'ratio_')
+ check_is_fitted(self, 'sampling_target_')
  self._check_X_y(X, y)
 
  output = self._sample(X, y)
@@ -143,10 +145,26 @@ class BaseSampler(SamplerMixin):
  instead.
  """
 
- def __init__(self, ratio='auto'):
+ def __init__(self, sampling_target='auto', ratio=None):
+ self.sampling_target = sampling_target
+ # FIXME: remove in 0.6
  self.ratio = ratio
  self.logger = logging.getLogger(self.__module__)
 
+ @property
+ def ratio_(self):
+ # FIXME: remove in 0.6
+ warnings.warn("'ratio' and 'ratio_' are deprecated. "
+ "Use 'sampling_target' and 'sampling_target_' instead.",
+ DeprecationWarning)
+ return self.sampling_target_
+
+ def _deprecate_ratio(self):
+ # both ratio and sampling_target should not be set
+ if self.ratio is not None:
+ deprecate_parameter(self, '0.4', 'ratio', 'sampling_target')
+ self.sampling_target = self.ratio
+
  def fit(self, X, y):
  """Find the classes statistics before to perform sampling.
 
@@ -164,11 +182,13 @@ def fit(self, X, y):
  Return self.
 
  """
+ self._deprecate_ratio()
  y = check_target_type(y)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
  self.X_hash_, self.y_hash_ = hash_X_y(X, y)
  # self.sampling_type is already checked in check_ratio
- self.ratio_ = check_ratio(self.ratio, y, self._sampling_type)
+ self.sampling_target_ = check_ratio(self.sampling_target, y,
+ self._sampling_type)
 
  return self
 
@@ -250,15 +270,23 @@ def fit(self, X, y):
  if self.accept_sparse else False)
  self.X_hash_, self.y_hash_ = hash_X_y(X, y)
  # when using a sampler, ratio_ is supposed to exist after fit
- self.ratio_ = 'is_fitted'
+ self.sampling_target_ = 'is_fitted'
 
  return self
 
+ @property
+ def ratio_(self):
+ # FIXME: remove in 0.6
+ warnings.warn("'ratio' and 'ratio_' are deprecated. "
+ "Use 'sampling_target' and 'sampling_target_' instead.",
+ DeprecationWarning)
+ return self.sampling_target_
+
  def _sample(self, X, y, func=None, kw_args=None):
  y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc']
  if self.accept_sparse else False)
- check_is_fitted(self, 'ratio_')
+ check_is_fitted(self, 'sampling_target_')
  X_hash, y_hash = hash_X_y(X, y)
  if self.X_hash_ != X_hash or self.y_hash_ != y_hash:
  raise RuntimeError("X and y need to be same array earlier fitted.")

diff --git a/imblearn/combine/smote_enn.py b/imblearn/combine/smote_enn.py
@@ -7,6 +7,7 @@
 from __future__ import division
 
 import logging
+import warnings
 
 from sklearn.utils import check_X_y
 
@@ -93,15 +94,17 @@ class SMOTEENN(SamplerMixin):
  """
 
  def __init__(self,
- ratio='auto',
+ sampling_target='auto',
  random_state=None,
  smote=None,
- enn=None):
+ enn=None,
+ ratio=None):
  super(SMOTEENN, self).__init__()
- self.ratio = ratio
+ self.sampling_target = sampling_target
  self.random_state = random_state
  self.smote = smote
  self.enn = enn
+ self.ratio = ratio
  self.logger = logging.getLogger(__name__)
 
  def _validate_estimator(self):
@@ -115,7 +118,9 @@ def _validate_estimator(self):
  # Otherwise create a default SMOTE
  else:
  self.smote_ = SMOTE(
- ratio=self.ratio, random_state=self.random_state)
+ sampling_target=self.sampling_target,
+ random_state=self.random_state,
+ ratio=self.ratio)
 
  if self.enn is not None:
  if isinstance(self.enn, EditedNearestNeighbours):
@@ -125,7 +130,15 @@ def _validate_estimator(self):
  ' Got {} instead.'.format(type(self.enn)))
  # Otherwise create a default EditedNearestNeighbours
  else:
- self.enn_ = EditedNearestNeighbours(ratio='all')
+ self.enn_ = EditedNearestNeighbours(sampling_target='all')
+
+ @property
+ def ratio_(self):
+ # FIXME: remove in 0.6
+ warnings.warn("'ratio' and 'ratio_' are deprecated. "
+ "Use 'sampling_target' and 'sampling_target_' instead.",
+ DeprecationWarning)
+ return self.sampling_target_
 
  def fit(self, X, y):
  """Find the classes statistics before to perform sampling.
@@ -146,7 +159,7 @@ def fit(self, X, y):
  """
  y = check_target_type(y)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
- self.ratio_ = self.ratio
+ self.sampling_target_ = self.sampling_target
  self.X_hash_, self.y_hash_ = hash_X_y(X, y)
 
  return self

diff --git a/imblearn/combine/smote_tomek.py b/imblearn/combine/smote_tomek.py
@@ -8,6 +8,7 @@
 from __future__ import division
 
 import logging
+import warnings
 
 from sklearn.utils import check_X_y
 
@@ -100,15 +101,17 @@ class SMOTETomek(SamplerMixin):
  """
 
  def __init__(self,
- ratio='auto',
+ sampling_target='auto',
  random_state=None,
  smote=None,
- tomek=None):
+ tomek=None,
+ ratio=None):
  super(SMOTETomek, self).__init__()
- self.ratio = ratio
+ self.sampling_target = sampling_target
  self.random_state = random_state
  self.smote = smote
  self.tomek = tomek
+ self.ratio = ratio
  self.logger = logging.getLogger(__name__)
 
  def _validate_estimator(self):
@@ -123,7 +126,9 @@ def _validate_estimator(self):
  # Otherwise create a default SMOTE
  else:
  self.smote_ = SMOTE(
- ratio=self.ratio, random_state=self.random_state)
+ sampling_target=self.sampling_target,
+ random_state=self.random_state,
+ ratio=self.ratio)
 
  if self.tomek is not None:
  if isinstance(self.tomek, TomekLinks):
@@ -133,7 +138,15 @@ def _validate_estimator(self):
  'Got {} instead.'.format(type(self.tomek)))
  # Otherwise create a default TomekLinks
  else:
- self.tomek_ = TomekLinks(ratio='all')
+ self.tomek_ = TomekLinks(sampling_target='all')
+
+ @property
+ def ratio_(self):
+ # FIXME: remove in 0.6
+ warnings.warn("'ratio' and 'ratio_' are deprecated. "
+ "Use 'sampling_target' and 'sampling_target_' instead.",
+ DeprecationWarning)
+ return self.sampling_target_
 
  def fit(self, X, y):
  """Find the classes statistics before to perform sampling.
@@ -154,7 +167,7 @@ def fit(self, X, y):
  """
  y = check_target_type(y)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
- self.ratio_ = self.ratio
+ self.sampling_target_ = self.sampling_target
  self.X_hash_, self.y_hash_ = hash_X_y(X, y)
 
  return self

diff --git a/imblearn/ensemble/balance_cascade.py b/imblearn/ensemble/balance_cascade.py
@@ -108,12 +108,14 @@ class BalanceCascade(BaseEnsembleSampler):
  """
 
  def __init__(self,
- ratio='auto',
+ sampling_target='auto',
  return_indices=False,
  random_state=None,
  n_max_subset=None,
- estimator=None):
- super(BalanceCascade, self).__init__(ratio=ratio)
+ estimator=None,
+ ratio=None):
+ super(BalanceCascade, self).__init__(sampling_target=sampling_target,
+ ratio=ratio)
  self.random_state = random_state
  self.return_indices = return_indices
  self.estimator = estimator
@@ -138,7 +140,8 @@ def fit(self, X, y):
  """
  super(BalanceCascade, self).fit(X, y)
  y = check_target_type(y)
- self.ratio_ = check_ratio(self.ratio, y, 'under-sampling')
+ self.sampling_target_ = check_ratio(self.sampling_target, y,
+ 'under-sampling')
  return self
 
  def _validate_estimator(self):
@@ -201,8 +204,8 @@ def _sample(self, X, y):
  # value which will be picked at each round
  index_constant = np.empty((0, ), dtype=y.dtype)
  for target_class in target_stats.keys():
- if target_class in self.ratio_.keys():
- n_samples = self.ratio_[target_class]
+ if target_class in self.sampling_target_.keys():
+ n_samples = self.sampling_target_[target_class]
  # extract the data of interest for this round from the
  # current class
  index_class = np.flatnonzero(y == target_class)
@@ -246,8 +249,9 @@ def _sample(self, X, y):
  # check that there is enough samples for another round
  target_stats = Counter(safe_indexing(
  y, np.flatnonzero(samples_mask)))
- for target_class in self.ratio_.keys():
- if target_stats[target_class] < self.ratio_[target_class]:
+ for target_class in self.sampling_target_.keys():
+ if (target_stats[target_class] <
+ self.sampling_target_[target_class]):
  b_subset_search = False
 
  X_resampled, y_resampled = [], []

diff --git a/imblearn/ensemble/base.py b/imblearn/ensemble/base.py
@@ -4,6 +4,8 @@
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: MIT
 
+import warnings
+
 import numpy as np
 
 from sklearn.preprocessing import label_binarize
@@ -23,6 +25,13 @@ class BaseEnsembleSampler(BaseSampler):
 
  _sampling_type = 'ensemble'
 
+ @property
+ def ratio_(self):
+ warnings.warn("'ratio' and 'ratio_' are deprecated. "
+ "Use 'sampling_target' and 'sampling_target_' instead.",
+ DeprecationWarning)
+ return self.sampling_target_
+
  def sample(self, X, y):
  """Resample the dataset.
 
@@ -49,7 +58,7 @@ def sample(self, X, y):
  y, binarize_y = check_target_type(y, indicate_one_vs_all=True)
  X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
 
- check_is_fitted(self, 'ratio_')
+ check_is_fitted(self, 'sampling_target_')
  self._check_X_y(X, y)
 
  output = self._sample(X, y)

diff --git a/imblearn/ensemble/classifier.py b/imblearn/ensemble/classifier.py
@@ -192,11 +192,12 @@ def __init__(self,
  bootstrap_features=False,
  oob_score=False,
  warm_start=False,
- ratio='auto',
+ sampling_target='auto',
  replacement=False,
  n_jobs=1,
  random_state=None,
- verbose=0):
+ verbose=0,
+ ratio=None):
 
  super(BaggingClassifier, self).__init__(
  base_estimator,
@@ -210,6 +211,7 @@ def __init__(self,
  n_jobs=n_jobs,
  random_state=random_state,
  verbose=verbose)
+ self.sampling_target = sampling_target
  self.ratio = ratio
  self.replacement = replacement
 
@@ -230,8 +232,10 @@ def _validate_estimator(self, default=DecisionTreeClassifier()):
  base_estimator = clone(default)
 
  self.base_estimator_ = Pipeline(
- [('sampler', RandomUnderSampler(ratio=self.ratio,
- replacement=self.replacement)),
+ [('sampler', RandomUnderSampler(
+ sampling_target=self.sampling_target,
+ replacement=self.replacement,
+ ratio=self.ratio)),
  ('classifier', base_estimator)])
 
  def fit(self, X, y):

diff --git a/imblearn/ensemble/easy_ensemble.py b/imblearn/ensemble/easy_ensemble.py
@@ -96,12 +96,14 @@ class EasyEnsemble(BaseEnsembleSampler):
  """
 
  def __init__(self,
- ratio='auto',
+ sampling_target='auto',
  return_indices=False,
  random_state=None,
  replacement=False,
- n_subsets=10):
- super(EasyEnsemble, self).__init__(ratio=ratio)
+ n_subsets=10,
+ ratio=None):
+ super(EasyEnsemble, self).__init__(sampling_target=sampling_target,
+ ratio=ratio)
  self.random_state = random_state
  self.return_indices = return_indices
  self.replacement = replacement
@@ -142,7 +144,7 @@ def _sample(self, X, y):
 
  for _ in range(self.n_subsets):
  rus = RandomUnderSampler(
- ratio=self.ratio_, return_indices=True,
+ sampling_target=self.sampling_target_, return_indices=True,
  random_state=random_state.randint(MAX_INT),
  replacement=self.replacement)
  sel_x, sel_y, sel_idx = rus.fit_sample(X, y)

diff --git a/imblearn/over_sampling/adasyn.py b/imblearn/over_sampling/adasyn.py
@@ -98,11 +98,13 @@ class ADASYN(BaseOverSampler):
  """
 
  def __init__(self,
- ratio='auto',
+ sampling_target='auto',
  random_state=None,
  n_neighbors=5,
- n_jobs=1):
- super(ADASYN, self).__init__(ratio=ratio)
+ n_jobs=1,
+ ratio=None):
+ super(ADASYN, self).__init__(sampling_target=sampling_target,
+ ratio=ratio)
  self.random_state = random_state
  self.n_neighbors = n_neighbors
  self.n_jobs = n_jobs
@@ -141,7 +143,7 @@ def _sample(self, X, y):
  X_resampled = X.copy()
  y_resampled = y.copy()
 
- for class_sample, n_samples in self.ratio_.items():
+ for class_sample, n_samples in self.sampling_target_.items():
  if n_samples == 0:
  continue
  target_class_indices = np.flatnonzero(y == class_sample)