Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions doc/under_sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ samples. :class:`NearMiss` implements 3 different types of heuristic which can
be selected with the parameter ``version``::

>>> from imblearn.under_sampling import NearMiss
>>> nm1 = NearMiss(random_state=0, version=1)
>>> nm1 = NearMiss(version=1)
>>> X_resampled_nm1, y_resampled = nm1.fit_sample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 64), (2, 64)]
Expand Down Expand Up @@ -247,7 +247,7 @@ the sample inspected to keep it in the dataset::
>>> sorted(Counter(y).items())
[(0, 64), (1, 262), (2, 4674)]
>>> from imblearn.under_sampling import EditedNearestNeighbours
>>> enn = EditedNearestNeighbours(random_state=0)
>>> enn = EditedNearestNeighbours()
>>> X_resampled, y_resampled = enn.fit_sample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 213), (2, 4568)]
Expand All @@ -261,7 +261,7 @@ the decision to keep a given sample or not.
Generally, repeating the algorithm will delete more data::

>>> from imblearn.under_sampling import RepeatedEditedNearestNeighbours
>>> renn = RepeatedEditedNearestNeighbours(random_state=0)
>>> renn = RepeatedEditedNearestNeighbours()
>>> X_resampled, y_resampled = renn.fit_sample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 208), (2, 4551)]
Expand All @@ -271,7 +271,7 @@ Generally, repeating the algorithm will delete more data::
internal nearest neighbors algorithm is increased at each iteration::

>>> from imblearn.under_sampling import AllKNN
>>> allknn = AllKNN(random_state=0)
>>> allknn = AllKNN()
>>> X_resampled, y_resampled = allknn.fit_sample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 220), (2, 4601)]
Expand Down Expand Up @@ -338,7 +338,7 @@ between the :class:`EditedNearestNeighbours` and the output a 3 nearest
neighbors classifier. The class can be used as::

>>> from imblearn.under_sampling import NeighbourhoodCleaningRule
>>> ncr = NeighbourhoodCleaningRule(random_state=0)
>>> ncr = NeighbourhoodCleaningRule()
>>> X_resampled, y_resampled = ncr.fit_sample(X, y)
>>> print(sorted(Counter(y_resampled).items()))
[(0, 64), (1, 234), (2, 4666)]
Expand Down
2 changes: 1 addition & 1 deletion examples/under-sampling/plot_illustration_tomek_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def make_plot_despine(ax):
# samples. If ``ratio='auto'`` only the sample from the majority class will be
# removed. If ``ratio='all'`` both samples will be removed.

sampler = TomekLinks(random_state=0)
sampler = TomekLinks()

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))

Expand Down
3 changes: 1 addition & 2 deletions imblearn/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,8 @@ class BaseSampler(SamplerMixin):
instead.
"""

def __init__(self, ratio='auto', random_state=None):
def __init__(self, ratio='auto'):
self.ratio = ratio
self.random_state = random_state
self.logger = logging.getLogger(__name__)

def fit(self, X, y):
Expand Down
3 changes: 1 addition & 2 deletions imblearn/combine/smote_enn.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,7 @@ def _validate_estimator(self):
' Got {} instead.'.format(type(self.enn)))
# Otherwise create a default EditedNearestNeighbours
else:
self.enn_ = EditedNearestNeighbours(ratio='all',
random_state=self.random_state)
self.enn_ = EditedNearestNeighbours(ratio='all')

def fit(self, X, y):
"""Find the classes statistics before to perform sampling.
Expand Down
3 changes: 1 addition & 2 deletions imblearn/combine/smote_tomek.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ def _validate_estimator(self):
'Got {} instead.'.format(type(self.tomek)))
# Otherwise create a default TomekLinks
else:
self.tomek_ = TomekLinks(ratio='all',
random_state=self.random_state)
self.tomek_ = TomekLinks(ratio='all')

def fit(self, X, y):
"""Find the classes statistics before to perform sampling.
Expand Down
4 changes: 2 additions & 2 deletions imblearn/ensemble/balance_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def __init__(self,
random_state=None,
n_max_subset=None,
estimator=None):
super(BalanceCascade, self).__init__(ratio=ratio,
random_state=random_state)
super(BalanceCascade, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.estimator = estimator
self.n_max_subset = n_max_subset
Expand Down
4 changes: 2 additions & 2 deletions imblearn/ensemble/easy_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def __init__(self,
random_state=None,
replacement=False,
n_subsets=10):
super(EasyEnsemble, self).__init__(ratio=ratio,
random_state=random_state)
super(EasyEnsemble, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.replacement = replacement
self.n_subsets = n_subsets
Expand Down
3 changes: 2 additions & 1 deletion imblearn/over_sampling/adasyn.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def __init__(self,
random_state=None,
n_neighbors=5,
n_jobs=1):
super(ADASYN, self).__init__(ratio=ratio, random_state=random_state)
super(ADASYN, self).__init__(ratio=ratio)
self.random_state = random_state
self.n_neighbors = n_neighbors
self.n_jobs = n_jobs

Expand Down
2 changes: 0 additions & 2 deletions imblearn/over_sampling/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
# Christos Aridas
# License: MIT

from sklearn.utils import check_X_y

from ..base import BaseSampler


Expand Down
4 changes: 2 additions & 2 deletions imblearn/over_sampling/random_over_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ class RandomOverSampler(BaseOverSampler):
"""

def __init__(self, ratio='auto', random_state=None):
super(RandomOverSampler, self).__init__(
ratio=ratio, random_state=random_state)
super(RandomOverSampler, self).__init__(ratio=ratio)
self.random_state = random_state

def _sample(self, X, y):
"""Resample the dataset.
Expand Down
3 changes: 2 additions & 1 deletion imblearn/over_sampling/smote.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def __init__(self,
kind='regular',
svm_estimator=None,
n_jobs=1):
super(SMOTE, self).__init__(ratio=ratio, random_state=random_state)
super(SMOTE, self).__init__(ratio=ratio)
self.random_state = random_state
self.kind = kind
self.k_neighbors = k_neighbors
self.m_neighbors = m_neighbors
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ def __init__(self,
voting='auto',
n_jobs=1):
super(ClusterCentroids, self).__init__(
ratio=ratio, random_state=random_state)
ratio=ratio)
self.random_state = random_state
self.estimator = estimator
self.voting = voting
self.n_jobs = n_jobs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def __init__(self,
n_seeds_S=1,
n_jobs=1):
super(CondensedNearestNeighbour, self).__init__(
ratio=ratio, random_state=random_state)
ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.n_neighbors = n_neighbors
self.n_seeds_S = n_seeds_S
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from ..base import BaseCleaningSampler
from ...utils import check_neighbors_object
from ...utils.deprecation import deprecate_parameter


SEL_KIND = ('all', 'mode')

Expand Down Expand Up @@ -62,6 +64,9 @@ class EditedNearestNeighbours(BaseCleaningSampler):
number generator; If ``None``, the random number generator is the
``RandomState`` instance used by ``np.random``.

.. deprecated:: 0.4
``random_state`` is deprecated in 0.4 and will be removed in 0.6.

n_neighbors : int or object, optional (default=3)
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
Expand Down Expand Up @@ -112,7 +117,7 @@ class EditedNearestNeighbours(BaseCleaningSampler):
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
>>> print('Original dataset shape {}'.format(Counter(y)))
Original dataset shape Counter({1: 900, 0: 100})
>>> enn = EditedNearestNeighbours(random_state=42)
>>> enn = EditedNearestNeighbours()
>>> X_res, y_res = enn.fit_sample(X, y)
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
Resampled dataset shape Counter({1: 887, 0: 100})
Expand All @@ -126,16 +131,20 @@ def __init__(self,
n_neighbors=3,
kind_sel='all',
n_jobs=1):
super(EditedNearestNeighbours, self).__init__(
ratio=ratio,
random_state=random_state)
super(EditedNearestNeighbours, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.n_neighbors = n_neighbors
self.kind_sel = kind_sel
self.n_jobs = n_jobs

def _validate_estimator(self):
"""Validate the estimator created in the ENN."""

# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')

self.nn_ = check_neighbors_object('n_neighbors', self.n_neighbors,
additional_neighbor=1)
self.nn_.set_params(**{'n_jobs': self.n_jobs})
Expand Down Expand Up @@ -243,6 +252,9 @@ class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
number generator; If ``None``, the random number generator is the
``RandomState`` instance used by ``np.random``.

.. deprecated:: 0.4
``random_state`` is deprecated in 0.4 and will be removed in 0.6.

n_neighbors : int or object, optional (default=3)
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
Expand Down Expand Up @@ -297,7 +309,7 @@ class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
>>> print('Original dataset shape {}'.format(Counter(y)))
Original dataset shape Counter({1: 900, 0: 100})
>>> renn = RepeatedEditedNearestNeighbours(random_state=42)
>>> renn = RepeatedEditedNearestNeighbours()
>>> X_res, y_res = renn.fit_sample(X, y)
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
Resampled dataset shape Counter({1: 887, 0: 100})
Expand All @@ -312,8 +324,8 @@ def __init__(self,
max_iter=100,
kind_sel='all',
n_jobs=1):
super(RepeatedEditedNearestNeighbours, self).__init__(
ratio=ratio, random_state=random_state)
super(RepeatedEditedNearestNeighbours, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.n_neighbors = n_neighbors
self.kind_sel = kind_sel
Expand All @@ -322,6 +334,11 @@ def __init__(self,

def _validate_estimator(self):
"""Private function to create the NN estimator"""

# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')

if self.max_iter < 2:
raise ValueError('max_iter must be greater than 1.'
' Got {} instead.'.format(type(self.max_iter)))
Expand All @@ -331,7 +348,6 @@ def _validate_estimator(self):

self.enn_ = EditedNearestNeighbours(ratio=self.ratio,
return_indices=self.return_indices,
random_state=self.random_state,
n_neighbors=self.nn_,
kind_sel=self.kind_sel,
n_jobs=self.n_jobs)
Expand Down Expand Up @@ -459,6 +475,9 @@ class AllKNN(BaseCleaningSampler):
number generator; If ``None``, the random number generator is the
``RandomState`` instance used by ``np.random``.

.. deprecated:: 0.4
``random_state`` is deprecated in 0.4 and will be removed in 0.6.

n_neighbors : int or object, optional (default=3)
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
Expand Down Expand Up @@ -514,7 +533,7 @@ class without early stopping.
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
>>> print('Original dataset shape {}'.format(Counter(y)))
Original dataset shape Counter({1: 900, 0: 100})
>>> allknn = AllKNN(random_state=42)
>>> allknn = AllKNN()
>>> X_res, y_res = allknn.fit_sample(X, y)
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
Resampled dataset shape Counter({1: 887, 0: 100})
Expand All @@ -529,7 +548,8 @@ def __init__(self,
kind_sel='all',
allow_minority=False,
n_jobs=1):
super(AllKNN, self).__init__(ratio=ratio, random_state=random_state)
super(AllKNN, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.n_neighbors = n_neighbors
self.kind_sel = kind_sel
Expand All @@ -538,6 +558,11 @@ def __init__(self,

def _validate_estimator(self):
"""Create objects required by AllKNN"""

# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')

if self.kind_sel not in SEL_KIND:
raise NotImplementedError

Expand All @@ -546,7 +571,6 @@ def _validate_estimator(self):

self.enn_ = EditedNearestNeighbours(ratio=self.ratio,
return_indices=self.return_indices,
random_state=self.random_state,
n_neighbors=self.nn_,
kind_sel=self.kind_sel,
n_jobs=self.n_jobs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ def __init__(self,
random_state=None,
cv=5,
n_jobs=1):
super(InstanceHardnessThreshold, self).__init__(
ratio=ratio, random_state=random_state)
super(InstanceHardnessThreshold, self).__init__(ratio=ratio)
self.random_state = random_state
self.estimator = estimator
self.return_indices = return_indices
self.cv = cv
Expand Down Expand Up @@ -148,10 +148,6 @@ def _sample(self, X, y):
y : array-like, shape (n_samples,)
Corresponding label for each sample in X.

Returns
-------
X_resampled : {ndarray, sparse matrix}, shape \
(n_samples_new, n_features)
The array containing the resampled data.

y_resampled : ndarray, shape (n_samples_new,)
Expand Down
14 changes: 12 additions & 2 deletions imblearn/under_sampling/prototype_selection/nearmiss.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from ..base import BaseUnderSampler
from ...utils import check_neighbors_object
from ...utils.deprecation import deprecate_parameter


class NearMiss(BaseUnderSampler):
Expand Down Expand Up @@ -51,6 +52,9 @@ class NearMiss(BaseUnderSampler):
number generator; If ``None``, the random number generator is the
``RandomState`` instance used by ``np.random``.

.. deprecated:: 0.4
``random_state`` is deprecated in 0.4 and will be removed in 0.6.

version : int, optional (default=1)
Version of the NearMiss to use. Possible values are 1, 2 or 3.

Expand Down Expand Up @@ -101,7 +105,7 @@ class NearMiss(BaseUnderSampler):
... n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
>>> print('Original dataset shape {}'.format(Counter(y)))
Original dataset shape Counter({1: 900, 0: 100})
>>> nm = NearMiss(random_state=42)
>>> nm = NearMiss()
>>> X_res, y_res = nm.fit_sample(X, y)
>>> print('Resampled dataset shape {}'.format(Counter(y_res)))
Resampled dataset shape Counter({0: 100, 1: 100})
Expand All @@ -116,7 +120,8 @@ def __init__(self,
n_neighbors=3,
n_neighbors_ver3=3,
n_jobs=1):
super(NearMiss, self).__init__(ratio=ratio, random_state=random_state)
super(NearMiss, self).__init__(ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.version = version
self.n_neighbors = n_neighbors
Expand Down Expand Up @@ -196,6 +201,11 @@ def _selection_dist_based(self,

def _validate_estimator(self):
"""Private function to create the NN estimator"""

# check for deprecated random_state
if self.random_state is not None:
deprecate_parameter(self, '0.4', 'random_state')

self.nn_ = check_neighbors_object('n_neighbors', self.n_neighbors)
self.nn_.set_params(**{'n_jobs': self.n_jobs})

Expand Down
Loading