zheng-da
diff --git a/‎doc/whats_new.rst‎
Lines changed: 9 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎sklearn/metrics/classification.py‎
Lines changed: 58 additions & 25 deletions b/‎sklearn/metrics/classification.py‎
Lines changed: 58 additions & 25 deletions
diff --git a/‎sklearn/metrics/tests/test_classification.py‎
Lines changed: 26 additions & 1 deletion b/‎sklearn/metrics/tests/test_classification.py‎
Lines changed: 26 additions & 1 deletion
@@ -270,6 +270,11 @@ Enhancements
  (`#6913 <https://github.com/scikit-learn/scikit-learn/pull/6913>`_)
  By `YenChen Lin`_.
 
+ - Added ``labels`` flag to :class:`metrics.log_loss` to to explicitly provide
+ the labels when the number of classes in ``y_true`` and ``y_pred`` differ.
+ (`#7239 <https://github.com/scikit-learn/scikit-learn/pull/7239/>`_)
+ by `Hong Guangguo`_ with help from `Mads Jensen`_ and `Nelson Liu`_.
+
 Bug fixes
 .........
 
@@ -4376,3 +4381,7 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Konstantin Podshumok: https://github.com/podshumok
 
 .. _David Staub: https://github.com/staubda
+
+.. _Hong Guangguo: https://github.com/hongguangguo
+
+.. _Mads Jensen: https://github.com/indianajensen
@@ -1544,13 +1544,15 @@ def hamming_loss(y_true, y_pred, classes=None, sample_weight=None):
  raise ValueError("{0} is not supported".format(y_type))
 
 
-def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
+def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None,
+ labels=None):
  """Log loss, aka logistic loss or cross-entropy loss.
 
  This is the loss function used in (multinomial) logistic regression
  and extensions of it such as neural networks, defined as the negative
  log-likelihood of the true labels given a probabilistic classifier's
- predictions. For a single sample with true label yt in {0,1} and
+ predictions. The log loss is only defined for two or more labels.
+ For a single sample with true label yt in {0,1} and
  estimated probability yp that yt = 1, the log loss is
 
  -log P(yt|yp) = -(yt log(yp) + (1 - yt) log(1 - yp))
@@ -1562,9 +1564,13 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
  y_true : array-like or label indicator matrix
  Ground truth (correct) labels for n_samples samples.
 
- y_pred : array-like of float, shape = (n_samples, n_classes)
+ y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
  Predicted probabilities, as returned by a classifier's
- predict_proba method.
+ predict_proba method. If ``y_pred.shape = (n_samples,)``
+ the probabilities provided are assumed to be that of the
+ positive class. The labels in ``y_pred`` are assumed to be
+ ordered alphabetically, as done by
+ :class:`preprocessing.LabelBinarizer`.
 
  eps : float
  Log loss is undefined for p=0 or p=1, so probabilities are
@@ -1577,6 +1583,12 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
  sample_weight : array-like of shape = [n_samples], optional
  Sample weights.
 
+ labels : array-like, optional (default=None)
+ If not provided, labels will be inferred from y_true. If ``labels``
+ is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
+ assumed to be binary and are inferred from ``y_true``.
+ .. versionadded:: 0.18
+
  Returns
  -------
  loss : float
@@ -1596,37 +1608,58 @@ def log_loss(y_true, y_pred, eps=1e-15, normalize=True, sample_weight=None):
  -----
  The logarithm used is the natural logarithm (base-e).
  """
+ y_pred = check_array(y_pred, ensure_2d=False)
+ check_consistent_length(y_pred, y_true)
+
  lb = LabelBinarizer()
- T = lb.fit_transform(y_true)
- if T.shape[1] == 1:
- T = np.append(1 - T, T, axis=1)
 
- y_pred = check_array(y_pred, ensure_2d=False)
- # Clipping
- Y = np.clip(y_pred, eps, 1 - eps)
+ if labels is not None:
+ lb.fit(labels)
+ else:
+ lb.fit(y_true)
+
+ if len(lb.classes_) == 1:
+ if labels is None:
+ raise ValueError('y_true contains only one label ({0}). Please provide '
+ 'the true labels explicitly through the labels '
+ 'argument.'.format(lb.classes_[0]))
+ else:
+ raise ValueError('The labels array needs to contain at least two labels'
+ 'for log_loss, got {0}.'.format(lb.classes_))
 
- # This happens in cases when elements in y_pred have type "str".
- if not isinstance(Y, np.ndarray):
- raise ValueError("y_pred should be an array of floats.")
+ transformed_labels = lb.transform(y_true)
+
+ if transformed_labels.shape[1] == 1:
+ transformed_labels = np.append(1 - transformed_labels,
+ transformed_labels, axis=1)
+
+ # Clipping
+ y_pred = np.clip(y_pred, eps, 1 - eps)
 
  # If y_pred is of single dimension, assume y_true to be binary
  # and then check.
- if Y.ndim == 1:
- Y = Y[:, np.newaxis]
- if Y.shape[1] == 1:
- Y = np.append(1 - Y, Y, axis=1)
+ if y_pred.ndim == 1:
+ y_pred = y_pred[:, np.newaxis]
+ if y_pred.shape[1] == 1:
+ y_pred = np.append(1 - y_pred, y_pred, axis=1)
 
  # Check if dimensions are consistent.
- check_consistent_length(T, Y)
- T = check_array(T)
- Y = check_array(Y)
- if T.shape[1] != Y.shape[1]:
- raise ValueError("y_true and y_pred have different number of classes "
- "%d, %d" % (T.shape[1], Y.shape[1]))
+ transformed_labels = check_array(transformed_labels)
+ if len(lb.classes_) != y_pred.shape[1]:
+ if labels is None:
+ raise ValueError("y_true and y_pred contain different number of classes "
+ "{0}, {1}. Please provide the true labels explicitly "
+ "through the labels argument. Classes found in"
+ "y_true: {2}".format(transformed_labels.shape[1],
+ y_pred.shape[1], lb.classes_))
+ else:
+ raise ValueError('The number of classes in labels is different '
+ 'from that in y_pred. Classes found in '
+ 'labels: {0}'.format(lb.classes_))
 
  # Renormalize
- Y /= Y.sum(axis=1)[:, np.newaxis]
- loss = -(T * np.log(Y)).sum(axis=1)
+ y_pred /= y_pred.sum(axis=1)[:, np.newaxis]
+ loss = -(transformed_labels * np.log(y_pred)).sum(axis=1)
 
  return _weighted_sum(loss, sample_weight, normalize)
 
 
@@ -45,7 +45,6 @@
 from sklearn.metrics import zero_one_loss
 from sklearn.metrics import brier_score_loss
 
-
 from sklearn.metrics.classification import _check_targets
 from sklearn.exceptions import UndefinedMetricWarning
 
@@ -1384,6 +1383,32 @@ def test_log_loss():
  loss = log_loss(y_true, y_pred)
  assert_almost_equal(loss, 1.0383217, decimal=6)
 
+ # test labels option
+
+ y_true = [2, 2]
+ y_pred = [[0.2, 0.7], [0.6, 0.5]]
+ y_score = np.array([[0.1, 0.9], [0.1, 0.9]])
+ error_str = ('y_true contains only one label (2). Please provide '
+ 'the true labels explicitly through the labels argument.')
+ assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
+
+ y_pred = [[0.2, 0.7], [0.6, 0.5], [0.2, 0.3]]
+ error_str = ('Found arrays with inconsistent numbers of '
+ 'samples: [2 3]')
+ assert_raise_message(ValueError, error_str, log_loss, y_true, y_pred)
+
+ # works when the labels argument is used
+
+ true_log_loss = -np.mean(np.log(y_score[:, 1]))
+ calculated_log_loss = log_loss(y_true, y_score, labels=[1, 2])
+ assert_almost_equal(calculated_log_loss, true_log_loss)
+
+ # ensure labels work when len(np.unique(y_true)) != y_pred.shape[1]
+ y_true = [1, 2, 2]
+ y_score2 = [[0.2, 0.7, 0.3], [0.6, 0.5, 0.3], [0.3, 0.9, 0.1]]
+ loss = log_loss(y_true, y_score2, labels=[1, 2, 3])
+ assert_almost_equal(loss, 1.0630345, decimal=6)
+
 
 def test_log_loss_pandas_input():
  # case when input is a pandas series and dataframe gh-5715