Skip to content

Commit 79406f4

Browse files
indianajensennelson-liu
authored andcommitted
fixed error message when y_pred and y_test labels don't match
fixes as per existing pull request scikit-learn#6714 fixed log_loss bug enhance log_loss labels option feature log_loss changed test log_loss case u add ValueError in log_loss fixes as per existing pull request scikit-learn#6714 fixed error message when y_pred and y_test labels don't match fixed error message when y_pred and y_test labels don't match corrected doc/whats_new.rst for syntax and with correct formatting of credits additional formatting fixes for doc/whats_new.rst fixed versionadded comment removed superfluous line removed superflous line
1 parent 6028e42 commit 79406f4

File tree

3 files changed

+35
-28
lines changed

3 files changed

+35
-28
lines changed

doc/whats_new.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,11 @@ Enhancements
270270
(`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
271271
By `YenChen Lin`_.
272272

273+
- Added `labels` flag to :class:`metrics.log_loss` to correct metric`s when
274+
only one class is present in test data set
275+
`#7166 <https://github.com/scikit-learn/scikit-learn/pull/7166/>`_
276+
by `Hong Guangguo`_ with support of `Mads Jensen`_.
277+
273278
Bug fixes
274279
.........
275280

@@ -4376,3 +4381,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
43764381
.. _Konstantin Podshumok: https://github.com/podshumok
43774382

43784383
.. _David Staub: https://github.com/staubda
4384+
4385+
.. _Hong Guangguo: https://github.com/hongguangguo
4386+
4387+
.. _Mads Jensen: https://github.com/indianajensen

sklearn/metrics/classification.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1544,8 +1544,8 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
15441544
raise ValueError("{0} is not supported".format(y_type))
15451545

15461546

1547-
def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
1548-
sample_weight=None):
1547+
def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
1548+
labels=None):
15491549
"""Log loss, aka logistic loss or cross-entropy loss.
15501550
15511551
This is the loss function used in (multinomial) logistic regression
@@ -1567,10 +1567,6 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
15671567
Predicted probabilities, as returned by a classifier's
15681568
predict_proba method.
15691569
1570-
1571-
labels : array-like, optional (default=None)
1572-
If not provided, labels will be inferred from y_true
1573-
15741570
eps : float
15751571
Log loss is undefined for p=0 or p=1, so probabilities are
15761572
clipped to max(eps, min(1 - eps, p)).
@@ -1582,6 +1578,10 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
15821578
sample_weight : array-like of shape = [n_samples], optional
15831579
Sample weights.
15841580
1581+
labels : array-like, optional (default=None)
1582+
If not provided, labels will be inferred from y_true
1583+
.. versionadded:: 0.18
1584+
15851585
Returns
15861586
-------
15871587
loss : float
@@ -1604,8 +1604,8 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
16041604
lb = LabelBinarizer()
16051605
lb.fit(labels) if labels is not None else lb.fit(y_true)
16061606
if labels is None and len(lb.classes_) == 1:
1607-
raise ValueError('y_true has only one label,'
1608-
'maybe get error log loss, should use labels option')
1607+
raise ValueError('y_true has only one label. Please provide '
1608+
'the true labels explicitly through the labels argument.')
16091609

16101610
T = lb.transform(y_true)
16111611

@@ -1633,7 +1633,9 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
16331633
Y = check_array(Y)
16341634
if T.shape[1] != Y.shape[1]:
16351635
raise ValueError("y_true and y_pred have different number of classes "
1636-
"%d, %d" % (T.shape[1], Y.shape[1]))
1636+
"%d, %d.\nPlease provide the true labels explicitly "
1637+
"through the labels argument" %
1638+
(T.shape[1], Y.shape[1]))
16371639

16381640
# Renormalize
16391641
Y /= Y.sum(axis=1)[:, np.newaxis]

sklearn/metrics/tests/test_classification.py

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,32 +1383,28 @@ def test_log_loss():
13831383
loss = log_loss(y_true, y_pred)
13841384
assert_almost_equal(loss, 1.0383217, decimal=6)
13851385

1386-
#test labels option
1386+
# test labels option
13871387

1388-
X = [[1,1], [1,1], [2,2], [2,2]]
1389-
y_label = [1,1,2,2]
1388+
y_true = [2, 2]
1389+
y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
13901390

1391-
X_test = [[2,2], [2,2]]
1392-
y_true = [2,2]
1393-
y_score = np.array([[0.1,0.9], [0.1, 0.9]])
1394-
1395-
# because y_true label are the same, if not use labels option, will get error
1396-
#error_logloss = log_loss(y_true, y_score)
1397-
#label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
1398-
#assert_almost_equal(error_logloss, label_not_of_2_loss)
1399-
#assert_raises(log_loss(y_true, y_score))
1391+
# because y_true label are the same, there should be an error if the
1392+
# labels option has not been used
14001393

1401-
error_str = ('y_true has only one label,'
1402-
'maybe get error log loss, should use labels option')
1394+
# error_logloss = log_loss(y_true, y_score)
1395+
# label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
1396+
# assert_almost_equal(error_logloss, label_not_of_2_loss)
1397+
# assert_raises(log_loss(y_true, y_score))
1398+
1399+
error_str = ('y_true has only one label. Please provide '
1400+
'the true labels explicitly through the labels argument.')
14031401

14041402
assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
14051403

1406-
# use labels, it works
1407-
ture_log_loss = -np.mean(np.log(y_score[:, 1]))
1404+
# when the labels argument is used, it works
1405+
true_log_loss = -np.mean(np.log(y_score[:, 1]))
14081406
calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
1409-
assert_almost_equal(calculated_log_loss, ture_log_loss)
1410-
1411-
1407+
assert_almost_equal(calculated_log_loss, true_log_loss)
14121408

14131409

14141410
def test_log_loss_pandas_input():

0 commit comments

Comments
 (0)