scikit-learn-contrib · glemaitre · May 8, 2018 · Mar 20, 2018 · Mar 20, 2018 · Mar 26, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -205,4 +205,5 @@ Imbalance-learn provides some fast-prototyping tools.
  utils.estimator_checks.check_estimator
  utils.check_neighbors_object
  utils.check_ratio
+ utils.check_sampling_strategy
  utils.hash_X_y
diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
@@ -94,29 +94,31 @@ Imbalanced generator
 ====================
 
 :func:`make_imbalance` turns an original dataset into an imbalanced
-dataset. This behaviour is driven by the parameter ``ratio`` which behave
-similarly to other resampling algorithm. ``ratio`` can be given as a dictionary
-where the key corresponds to the class and the value is the the number of
-samples in the class::
+dataset. This behaviour is driven by the parameter ``sampling_strategy`` which
+behave similarly to other resampling algorithm. ``sampling_strategy`` can be
+given as a dictionary where the key corresponds to the class and the value is
+the number of samples in the class::
 
  >>> from sklearn.datasets import load_iris
  >>> from imblearn.datasets import make_imbalance
  >>> iris = load_iris()
- >>> ratio = {0: 20, 1: 30, 2: 40}
- >>> X_imb, y_imb = make_imbalance(iris.data, iris.target, ratio=ratio)
+ >>> sampling_strategy = {0: 20, 1: 30, 2: 40}
+ >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
+ ... sampling_strategy=sampling_strategy)
  >>> sorted(Counter(y_imb).items())
  [(0, 20), (1, 30), (2, 40)]
 
 Note that all samples of a class are passed-through if the class is not mentioned
 in the dictionary::
 
- >>> ratio = {0: 10}
- >>> X_imb, y_imb = make_imbalance(iris.data, iris.target, ratio=ratio)
+ >>> sampling_strategy = {0: 10}
+ >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
+ ... sampling_strategy=sampling_strategy)
  >>> sorted(Counter(y_imb).items())
  [(0, 10), (1, 50), (2, 50)]
 
 Instead of a dictionary, a function can be defined and directly pass to
-``ratio``::
+``sampling_strategy``::
 
  >>> def ratio_multiplier(y):
  ... multiplier = {0: 0.5, 1: 0.7, 2: 0.95}
@@ -125,9 +127,9 @@ Instead of a dictionary, a function can be defined and directly pass to
  ... target_stats[key] = int(value * multiplier[key])
  ... return target_stats
  >>> X_imb, y_imb = make_imbalance(iris.data, iris.target,
- ... ratio=ratio_multiplier)
+ ... sampling_strategy=ratio_multiplier)
  >>> sorted(Counter(y_imb).items())
  [(0, 25), (1, 35), (2, 47)]
 
 See :ref:`sphx_glr_auto_examples_datasets_plot_make_imbalance.py` and
-:ref:`sphx_glr_auto_examples_plot_ratio_usage.py`.
+:ref:`sphx_glr_auto_examples_plot_sampling_strategy_usage.py`.
diff --git a/doc/developers_utils.rst b/doc/developers_utils.rst
@@ -26,9 +26,10 @@ which accepts arrays, matrices, or sparse matrices as arguments, the following
 should be used when applicable.
 
 - :func:`check_neighbors_object`: Check the objects is consistent to be a NN.
-- :func:`check_target_type`: Check the target types to be conform to the current samplers.
-- :func:`check_ratio`: Checks ratio for consistent type and return a dictionary
- containing each targeted class with its corresponding number of pixel.
+- :func:`check_target_type`: Check the target types to be conform to the current sam plers.
+- :func:`check_sampling_strategy`: Checks that sampling target is onsistent with
+ the type and return a dictionary containing each targeted class with its
+ corresponding number of pixel.
 
 
 Deprecation

diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -92,12 +92,13 @@ output of an :class:`EasyEnsemble` sampler with an ensemble of classifiers
 (i.e. ``BaggingClassifier``). Therefore, :class:`BalancedBaggingClassifier`
 takes the same parameters than the scikit-learn
 ``BaggingClassifier``. Additionally, there is two additional parameters,
-``ratio`` and ``replacement``, as in the :class:`EasyEnsemble` sampler::
+``sampling_strategy`` and ``replacement``, as in the :class:`EasyEnsemble`
+sampler::
 
 
  >>> from imblearn.ensemble import BalancedBaggingClassifier
  >>> bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
- ... ratio='auto',
+ ... sampling_strategy='auto',
  ... replacement=False,
  ... random_state=0)
  >>> bbc.fit(X_train, y_train) # doctest: +ELLIPSIS

diff --git a/doc/under_sampling.rst b/doc/under_sampling.rst
@@ -103,7 +103,7 @@ by considering independently each targeted class::
  >>> print(np.vstack({tuple(row) for row in X_resampled}).shape)
  (181, 2)
 
-See :ref:`sphx_glr_auto_examples_plot_ratio_usage.py`,
+See :ref:`sphx_glr_auto_examples_plot_sampling_strategy_usage.py`.,
 :ref:`sphx_glr_auto_examples_under-sampling_plot_comparison_under_sampling.py`,
 and :ref:`sphx_glr_auto_examples_under-sampling_plot_random_under_sampler.py`.
 
@@ -214,11 +214,11 @@ the samples of interest in green.
  :scale: 60
  :align: center
 
-The parameter ``ratio`` control which sample of the link will be removed. For
-instance, the default (i.e., ``ratio='auto'``) will remove the sample from the
-majority class. Both samples from the majority and minority class can be
-removed by setting ``ratio`` to ``'all'``. The figure illustrates this
-behaviour.
+The parameter ``sampling_strategy`` control which sample of the link will be
+removed. For instance, the default (i.e., ``sampling_strategy='auto'``) will
+remove the sample from the majority class. Both samples from the majority and
+minority class can be removed by setting ``sampling_strategy`` to ``'all'``. The
+figure illustrates this behaviour.
 
 .. image:: ./auto_examples/under-sampling/images/sphx_glr_plot_illustration_tomek_links_002.png
  :target: ./auto_examples/under-sampling/plot_illustration_tomek_links.html

diff --git a/doc/whats_new/v0.0.4.rst b/doc/whats_new/v0.0.4.rst
@@ -6,6 +6,18 @@ Version 0.4 (under development)
 Changelog
 ---------
 
+API
+...
+
+- Replace the parameter ``ratio`` by ``sampling_strategy``. :issue:`411` by
+ :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Enable to use a ``float`` with binary classification for
+ ``sampling_strategy``. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Enable to use a ``list`` for the cleaning methods to specify the class to
+ sample. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
 Enhancement
 ...........
 
@@ -34,3 +46,20 @@ Maintenance
 
 - Remove deprecated parameters in 0.2 - :issue:`331` by :user:`Guillaume
  Lemaitre <glemaitre>`.
+
+Deprecation
+...........
+
+- Deprecate ``ratio`` in favor of ``sampling_strategy``. :issue:`411` by
+ :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Deprecate the use of a ``dict`` for cleaning methods. a ``list`` should be
+ used. :issue:`411` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+- Deprecate ``random_state`` in :class:`imblearn.under_sampling.NearMiss`,
+ :class:`imblearn.under_sampling.EditedNearestNeighbors`,
+ :class:`imblearn.under_sampling.RepeatedEditedNearestNeighbors`,
+ :class:`imblearn.under_sampling.AllKNN`,
+ :class:`imblearn.under_sampling.NeighbourhoodCleaningRule`,
+ :class:`imblearn.under_sampling.InstanceHardnessThreshold`,
+ :class:`imblearn.under_sampling.CondensedNearestNeighbours`.
diff --git a/examples/applications/plot_multi_class_under_sampling.py b/examples/applications/plot_multi_class_under_sampling.py
@@ -29,8 +29,9 @@
 
 # Create a folder to fetch the dataset
 iris = load_iris()
-X, y = make_imbalance(iris.data, iris.target, ratio={0: 25, 1: 50, 2: 50},
- random_state=0)
+X, y = make_imbalance(iris.data, iris.target,
+ sampling_strategy={0: 25, 1: 50, 2: 50},
+ random_state=RANDOM_STATE)
 
 X_train, X_test, y_train, y_test = train_test_split(
  X, y, random_state=RANDOM_STATE)
@@ -39,7 +40,7 @@
 print('Testing target statistics: {}'.format(Counter(y_test)))
 
 # Create a pipeline
-pipeline = make_pipeline(NearMiss(version=2, random_state=RANDOM_STATE),
+pipeline = make_pipeline(NearMiss(version=2),
  LinearSVC(random_state=RANDOM_STATE))
 pipeline.fit(X_train, y_train)
 

diff --git a/examples/datasets/plot_make_imbalance.py b/examples/datasets/plot_make_imbalance.py
@@ -55,12 +55,12 @@ def ratio_func(y, multiplier, minority_class):
 for i, multiplier in enumerate(multipliers, start=1):
  ax = axs[i]
 
- X_, y_ = make_imbalance(X, y, ratio=ratio_func,
+ X_, y_ = make_imbalance(X, y, sampling_strategy=ratio_func,
  **{"multiplier": multiplier,
  "minority_class": 1})
  ax.scatter(X_[y_ == 0, 0], X_[y_ == 0, 1], label="Class #0", alpha=0.5)
  ax.scatter(X_[y_ == 1, 0], X_[y_ == 1, 1], label="Class #1", alpha=0.5)
- ax.set_title('ratio = {}'.format(multiplier))
+ ax.set_title('sampling_strategy = {}'.format(multiplier))
  plot_decoration(ax)
 
 plt.tight_layout()

diff --git a/examples/plot_ratio_usage.py b/examples/plot_ratio_usage.py