Skip to content

Commit f42522c

Browse files
glemaitreqinhanmin2014
authored andcommitted
MNT Change warning type and validation location for contamination in LOF (scikit-learn#11634)
1 parent 5592a2e commit f42522c

File tree

2 files changed

+55
-37
lines changed

2 files changed

+55
-37
lines changed

sklearn/neighbors/lof.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ class LocalOutlierFactor(NeighborsBase, KNeighborsMixin, UnsupervisedMixin,
100100
threshold on the decision function. If "auto", the decision function
101101
threshold is determined as in the original paper.
102102
103+
.. versionchanged:: 0.20
104+
The default value of ``contamination`` will change from 0.1 in 0.20
105+
to ``'auto'`` in 0.22.
106+
103107
novelty : boolean, default False
104108
By default, LocalOutlierFactor is only meant to be used for outlier
105109
detection (novelty=False). Set novelty to True if you want to use
@@ -150,12 +154,6 @@ def __init__(self, n_neighbors=20, algorithm='auto', leaf_size=30,
150154
algorithm=algorithm,
151155
leaf_size=leaf_size, metric=metric, p=p,
152156
metric_params=metric_params, n_jobs=n_jobs)
153-
154-
if contamination == "legacy":
155-
warnings.warn('default contamination parameter 0.1 will change '
156-
'in version 0.22 to "auto". This will change the '
157-
'predict method behavior.',
158-
DeprecationWarning)
159157
self.contamination = contamination
160158
self.novelty = novelty
161159

@@ -224,10 +222,19 @@ def fit(self, X, y=None):
224222
-------
225223
self : object
226224
"""
227-
if self.contamination not in ["auto", "legacy"]: # rm legacy in 0.22
228-
if not(0. < self.contamination <= .5):
225+
if self.contamination == "legacy":
226+
warnings.warn('default contamination parameter 0.1 will change '
227+
'in version 0.22 to "auto". This will change the '
228+
'predict method behavior.',
229+
FutureWarning)
230+
self._contamination = 0.1
231+
else:
232+
self._contamination = self.contamination
233+
234+
if self._contamination != 'auto':
235+
if not(0. < self._contamination <= .5):
229236
raise ValueError("contamination must be in (0, 0.5], "
230-
"got: %f" % self.contamination)
237+
"got: %f" % self._contamination)
231238

232239
super(LocalOutlierFactor, self).fit(X)
233240

@@ -251,15 +258,12 @@ def fit(self, X, y=None):
251258

252259
self.negative_outlier_factor_ = -np.mean(lrd_ratios_array, axis=1)
253260

254-
if self.contamination == "auto":
261+
if self._contamination == "auto":
255262
# inliers score around -1 (the higher, the less abnormal).
256263
self.offset_ = -1.5
257-
elif self.contamination == "legacy": # to rm in 0.22
258-
self.offset_ = scoreatpercentile(
259-
self.negative_outlier_factor_, 100. * 0.1)
260264
else:
261265
self.offset_ = scoreatpercentile(
262-
self.negative_outlier_factor_, 100. * self.contamination)
266+
self.negative_outlier_factor_, 100. * self._contamination)
263267

264268
return self
265269

sklearn/neighbors/tests/test_lof.py

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
# License: BSD 3 clause
44

55
from math import sqrt
6+
7+
import pytest
68
import numpy as np
79
from sklearn import neighbors
810

@@ -32,8 +34,9 @@
3234
iris.target = iris.target[perm]
3335

3436

35-
@ignore_warnings(category=DeprecationWarning)
36-
# contamination changed to 'auto' 0.22
37+
@pytest.mark.filterwarnings(
38+
'ignore:default contamination parameter 0.1:FutureWarning')
39+
# XXX: Remove in 0.22
3740
def test_lof():
3841
# Toy sample (the last two samples are outliers):
3942
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [5, 3], [-4, 2]]
@@ -53,8 +56,9 @@ def test_lof():
5356
assert_array_equal(clf.fit_predict(X), 6 * [1] + 2 * [-1])
5457

5558

56-
@ignore_warnings(category=DeprecationWarning)
57-
# contamination changed to 'auto' 0.22
59+
@pytest.mark.filterwarnings(
60+
'ignore:default contamination parameter 0.1:FutureWarning')
61+
# XXX: Remove in 0.22
5862
def test_lof_performance():
5963
# Generate train/test data
6064
rng = check_random_state(2)
@@ -76,8 +80,9 @@ def test_lof_performance():
7680
assert_greater(roc_auc_score(y_test, y_pred), .99)
7781

7882

79-
@ignore_warnings(category=DeprecationWarning)
80-
# contamination changed to 'auto' 0.22
83+
@pytest.mark.filterwarnings(
84+
'ignore:default contamination parameter 0.1:FutureWarning')
85+
# XXX: Remove in 0.22
8186
def test_lof_values():
8287
# toy samples:
8388
X_train = [[1, 1], [1, 2], [2, 1]]
@@ -99,8 +104,9 @@ def test_lof_values():
99104
assert_array_almost_equal(-clf2.score_samples([[1., 1.]]), [s_1])
100105

101106

102-
@ignore_warnings(category=DeprecationWarning)
103-
# contamination changed to 'auto' 0.22
107+
@pytest.mark.filterwarnings(
108+
'ignore:default contamination parameter 0.1:FutureWarning')
109+
# XXX: Remove in 0.22
104110
def test_lof_precomputed(random_state=42):
105111
"""Tests LOF with a distance matrix."""
106112
# Note: smaller samples may result in spurious test success
@@ -126,8 +132,9 @@ def test_lof_precomputed(random_state=42):
126132
assert_array_almost_equal(pred_X_Y, pred_D_Y)
127133

128134

129-
@ignore_warnings(category=DeprecationWarning)
130-
# contamination changed to 'auto' 0.22
135+
@pytest.mark.filterwarnings(
136+
'ignore:default contamination parameter 0.1:FutureWarning')
137+
# XXX: Remove in 0.22
131138
def test_n_neighbors_attribute():
132139
X = iris.data
133140
clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
@@ -140,8 +147,9 @@ def test_n_neighbors_attribute():
140147
assert_equal(clf.n_neighbors_, X.shape[0] - 1)
141148

142149

143-
@ignore_warnings(category=DeprecationWarning)
144-
# contamination changed to 'auto' 0.22
150+
@pytest.mark.filterwarnings(
151+
'ignore:default contamination parameter 0.1:FutureWarning')
152+
# XXX: Remove in 0.22
145153
def test_score_samples():
146154
X_train = [[1, 1], [1, 2], [2, 1]]
147155
clf1 = neighbors.LocalOutlierFactor(n_neighbors=2,
@@ -163,8 +171,9 @@ def test_contamination():
163171
assert_raises(ValueError, clf.fit, X)
164172

165173

166-
@ignore_warnings(category=DeprecationWarning)
167-
# contamination changed to 'auto' 0.22
174+
@pytest.mark.filterwarnings(
175+
'ignore:default contamination parameter 0.1:FutureWarning')
176+
# XXX: Remove in 0.22
168177
def test_novelty_errors():
169178
X = iris.data
170179

@@ -182,8 +191,9 @@ def test_novelty_errors():
182191
assert_raises_regex(AttributeError, msg, getattr, clf, 'fit_predict')
183192

184193

185-
@ignore_warnings(category=DeprecationWarning)
186-
# contamination changed to 'auto' 0.22
194+
@pytest.mark.filterwarnings(
195+
'ignore:default contamination parameter 0.1:FutureWarning')
196+
# XXX: Remove in 0.22
187197
def test_novelty_training_scores():
188198
# check that the scores of the training samples are still accessible
189199
# when novelty=True through the negative_outlier_factor_ attribute
@@ -202,8 +212,9 @@ def test_novelty_training_scores():
202212
assert_array_almost_equal(scores_1, scores_2)
203213

204214

205-
@ignore_warnings(category=DeprecationWarning)
206-
# contamination changed to 'auto' 0.22
215+
@pytest.mark.filterwarnings(
216+
'ignore:default contamination parameter 0.1:FutureWarning')
217+
# XXX: Remove in 0.22
207218
def test_hasattr_prediction():
208219
# check availability of prediction methods depending on novelty value.
209220
X = [[1, 1], [1, 2], [2, 1]]
@@ -225,16 +236,19 @@ def test_hasattr_prediction():
225236
assert not hasattr(clf, 'score_samples')
226237

227238

228-
@ignore_warnings(category=DeprecationWarning)
229-
# contamination changed to 'auto' 0.22
239+
@pytest.mark.filterwarnings(
240+
'ignore:default contamination parameter 0.1:FutureWarning')
241+
# XXX: Remove in 0.22
230242
def test_novelty_true_common_tests():
243+
231244
# the common tests are run for the default LOF (novelty=False).
232245
# here we run these common tests for LOF when novelty=True
233246
check_estimator(neighbors.LocalOutlierFactor(novelty=True))
234247

235248

236-
def test_deprecation():
237-
assert_warns_message(DeprecationWarning,
249+
def test_contamination_future_warning():
250+
X = [[1, 1], [1, 2], [2, 1]]
251+
assert_warns_message(FutureWarning,
238252
'default contamination parameter 0.1 will change '
239253
'in version 0.22 to "auto"',
240-
neighbors.LocalOutlierFactor, )
254+
neighbors.LocalOutlierFactor().fit, X)

0 commit comments

Comments
 (0)