christianbrodbeck
diff --git a/‎doc/whats_new.rst‎
Lines changed: 9 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sklearn/metrics/classification.py‎
Lines changed: 11 additions & 9 deletions b/‎sklearn/metrics/classification.py‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎sklearn/metrics/tests/test_classification.py‎
Lines changed: 15 additions & 19 deletions b/‎sklearn/metrics/tests/test_classification.py‎
Lines changed: 15 additions & 19 deletions
@@ -270,6 +270,11 @@ Enhancements
  (`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
  By `YenChen Lin`_.
 
+ - Added `labels` flag to :class:`metrics.log_loss` to correct metric`s when
+ only one class is present in test data set
+ `#7166 <https://github.com/scikit-learn/scikit-learn/pull/7166/>`_ 
+ by `Hong Guangguo`_ with support of `Mads Jensen`_.
+
 Bug fixes
 .........
 
@@ -4376,3 +4381,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Konstantin Podshumok: https://github.com/podshumok
 
 .. _David Staub: https://github.com/staubda
+
+.. _Hong Guangguo: https://github.com/hongguangguo
+
+.. _Mads Jensen: https://github.com/indianajensen
@@ -1544,8 +1544,8 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
  raise ValueError("{0} is not supported".format(y_type))
 
 
-def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
- sample_weight=None):
+def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
+ labels=None):
  """Log loss, aka logistic loss or cross-entropy loss.
 
  This is the loss function used in (multinomial) logistic regression
@@ -1567,10 +1567,6 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
  Predicted probabilities, as returned by a classifier's
  predict_proba method.
 
-
- labels : array-like, optional (default=None)
- If not provided, labels will be inferred from y_true
-
  eps : float
  Log loss is undefined for p=0 or p=1, so probabilities are
  clipped to max(eps, min(1 - eps, p)).
@@ -1582,6 +1578,10 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
  sample_weight : array-like of shape = [n_samples], optional
  Sample weights.
 
+ labels : array-like, optional (default=None) 
+ If not provided, labels will be inferred from y_true
+ .. versionadded:: 0.18
+ 
  Returns
  -------
  loss : float
@@ -1604,8 +1604,8 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
  lb = LabelBinarizer()
  lb.fit(labels) if labels is not None else lb.fit(y_true)
  if labels is None and len(lb.classes_) == 1:
- raise ValueError('y_true has only one label,'
- 'maybe get error log loss, should use labels option')
+ raise ValueError('y_true has only one label. Please provide '
+ 'the true labels explicitly through the labels argument.')
 
  T = lb.transform(y_true)
 
@@ -1633,7 +1633,9 @@ def log_loss(y_true, y_pred, labels=None, eps=1e-15, normalize=True,
  Y = check_array(Y)
  if T.shape[1] != Y.shape[1]:
  raise ValueError("y_true and y_pred have different number of classes "
- "%d, %d" % (T.shape[1], Y.shape[1]))
+ "%d, %d.\nPlease provide the true labels explicitly "
+ "through the labels argument" %
+ (T.shape[1], Y.shape[1]))
 
  # Renormalize
  Y /= Y.sum(axis=1)[:, np.newaxis]
 
@@ -1383,32 +1383,28 @@ def test_log_loss():
  loss = log_loss(y_true, y_pred)
  assert_almost_equal(loss, 1.0383217, decimal=6)
 
- #test labels option
+ # test labels option
 
- X = [[1,1], [1,1], [2,2], [2,2]]
- y_label = [1,1,2,2]
+ y_true = [2, 2]
+ y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
 
- X_test = [[2,2], [2,2]]
- y_true = [2,2]
- y_score = np.array([[0.1,0.9], [0.1, 0.9]])
- 
- # because y_true label are the same, if not use labels option, will get error
- #error_logloss = log_loss(y_true, y_score)
- #label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
- #assert_almost_equal(error_logloss, label_not_of_2_loss)
- #assert_raises(log_loss(y_true, y_score))
+ # because y_true label are the same, there should be an error if the
+ # labels option has not been used
 
- error_str = ('y_true has only one label,'
- 'maybe get error log loss, should use labels option')
+ # error_logloss = log_loss(y_true, y_score)
+ # label_not_of_2_loss = -np.mean(np.log(y_score[:,0]))
+ # assert_almost_equal(error_logloss, label_not_of_2_loss)
+ # assert_raises(log_loss(y_true, y_score))
+
+ error_str = ('y_true has only one label. Please provide '
+ 'the true labels explicitly through the labels argument.')
 
  assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
 
- # use labels, it works
- ture_log_loss = -np.mean(np.log(y_score[:, 1]))
+ # when the labels argument is used, it works
+ true_log_loss = -np.mean(np.log(y_score[:, 1]))
  calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
- assert_almost_equal(calculated_log_loss, ture_log_loss)
-
- 
+ assert_almost_equal(calculated_log_loss, true_log_loss)
 
 
 def test_log_loss_pandas_input():