pandas-dev · jreback · Jun 27, 2019 · May 31, 2019 · Jun 24, 2019 · Jun 24, 2019
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -568,6 +568,29 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
  'mean': 'bar',
  'std': 'baz'}))
 
+.. note::
+
+ In general, the output column names should be unique. You can't apply
+ the same function (or two functions with the same name) to the same
+ column.
+
+ .. ipython:: python
+ :okexcept:
+
+ grouped['C'].agg(['sum', 'sum'])
+
+
+ Pandas *does* allow you to provide multiple lambdas. In this case, pandas
+ will mangle the name of the (nameless) lambda functions, appending ``_<i>``
+ to each subsequent lambda.
+
+ .. ipython:: python
+
+ grouped['C'].agg([lambda x: x.max() - x.min(),
+ lambda x: x.median() - x.mean()])
+
+
+
 .. _groupby.aggregate.named:
 
 Named Aggregation

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -95,6 +95,7 @@ Other Enhancements
 - :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
 - :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
 - :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
+- Supported for multiple lambdas in the same aggregation for :meth:`GroupBy.aggregate` (:issue:`26430`).
 - :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
 - Error message for missing required imports now includes the original import error's text (:issue:`23868`)
 - :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -25,6 +25,7 @@
 from pandas.core.dtypes.common import (
  ensure_int64, ensure_platform_int, is_bool, is_datetimelike,
  is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar)
+from pandas.core.dtypes.inference import is_dict_like, is_list_like
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas._typing import FrameOrSeries
@@ -208,6 +209,8 @@ def aggregate(self, func, *args, **kwargs):
  raise TypeError("Must provide 'func' or tuples of "
  "'(column, aggfunc).")
 
+ func = _maybe_mangle_lambdas(func)
+
  result, how = self._aggregate(func, _level=_level, *args, **kwargs)
  if how is None:
  return result
@@ -830,6 +833,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs):
  if isinstance(func_or_funcs, abc.Iterable):
  # Catch instances of lists / tuples
  # but not the class list / tuple itself.
+ func_or_funcs = _maybe_mangle_lambdas(func_or_funcs)
  ret = self._aggregate_multiple_funcs(func_or_funcs,
  (_level or 0) + 1)
  if relabeling:
@@ -1710,3 +1714,69 @@ def _normalize_keyword_aggregation(kwargs):
  order.append((column,
  com.get_callable_name(aggfunc) or aggfunc))
  return aggspec, columns, order
+
+
+def _make_lambda(func, i):
+ def f(*args, **kwargs):
+ return func(*args, **kwargs)
+ f.__name__ = "<lambda_{}>".format(i)
+ return f
+
+
+def _managle_lambda_list(aggfuncs):
+ i = 0
+ aggfuncs2 = []
+ for aggfunc in aggfuncs:
+ if com.get_callable_name(aggfunc) == "<lambda>":
+ if i > 0:
+ aggfunc = _make_lambda(aggfunc, i)
+ i += 1
+ aggfuncs2.append(aggfunc)
+
+ return aggfuncs2
+
+
+def _maybe_mangle_lambdas(agg_spec):
+ """
+ Make new lambdas with unique names.
+
+ Parameters
+ ----------
+ agg_spec : Any
+ An argument to NDFrameGroupBy.agg.
+ Non-dict-like `agg_spec` are pass through as is.
+ For dict-like `agg_spec` a new spec is returned
+ with name-mangled lambdas.
+
+ Returns
+ -------
+ mangled : Any
+ Same type as the input.
+
+ Examples
+ --------
+ >>> _maybe_mangle_lambdas('sum')
+ 'sum'
+
+ >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP
+ [<function __main__.<lambda>()>,
+ <function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
+ """
+ is_dict = is_dict_like(agg_spec)
+ if not (is_dict or is_list_like(agg_spec)):
+ return agg_spec
+ agg_spec2 = type(agg_spec)() # dict or OrderdDict
+
+ if is_dict:
+ for key in agg_spec:
+ aggfuncs = agg_spec[key]
+ if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
+ aggfuncs2 = _managle_lambda_list(aggfuncs)
+ else:
+ aggfuncs2 = aggfuncs
+
+ agg_spec2[key] = aggfuncs2 or aggfuncs
+ else:
+ agg_spec2 = _managle_lambda_list(agg_spec)
+
+ return agg_spec2
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -10,6 +10,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 from pandas.core.base import SpecificationError
+from pandas.core.groupby.generic import _maybe_mangle_lambdas
 from pandas.core.groupby.grouper import Grouping
 import pandas.util.testing as tm
 
@@ -210,15 +211,6 @@ def test_multiple_functions_tuples_and_non_tuples(df):
  tm.assert_frame_equal(result, expected)
 
 
-def test_agg_multiple_functions_too_many_lambdas(df):
- grouped = df.groupby('A')
- funcs = ['mean', lambda x: x.mean(), lambda x: x.std()]
-
- msg = 'Function names must be unique, found multiple named <lambda>'
- with pytest.raises(SpecificationError, match=msg):
- grouped.agg(funcs)
-
-
 def test_more_flexible_frame_multi_function(df):
  grouped = df.groupby('A')
 
@@ -458,3 +450,74 @@ def test_agg_namedtuple(self):
  expected = df.groupby("A").agg(b=("B", "sum"),
  c=("B", "count"))
  tm.assert_frame_equal(result, expected)
+
+
+class TestLambdaMangling:
+
+ def test_maybe_mangle_lambdas_passthrough(self):
+ assert _maybe_mangle_lambdas('mean') == 'mean'
+ assert _maybe_mangle_lambdas(lambda x: x).__name__ == '<lambda>'
+ assert [x.__name__ for x in _maybe_mangle_lambdas([lambda x: x])
+ ] == ['<lambda>']
+
+ def test_maybe_mangle_lambdas_listlike(self):
+ aggfuncs = [lambda x: 1, lambda x: 2]
+ result = _maybe_mangle_lambdas(aggfuncs)
+ assert result[0].__name__ == '<lambda>'
+ assert result[1].__name__ == '<lambda_1>'
+ assert aggfuncs[0](None) == result[0](None)
+ assert aggfuncs[1](None) == result[1](None)
+
+ def test_maybe_mangle_lambdas(self):
+ func = {
+ 'A': [lambda x: 0, lambda x: 1]
+ }
+ result = _maybe_mangle_lambdas(func)
+ assert result['A'][0].__name__ == '<lambda>'
+ assert result['A'][1].__name__ == '<lambda_1>'
+
+ def test_maybe_mangle_lambdas_args(self):
+ func = {
+ 'A': [lambda x, a, b=1: (0, a, b), lambda x: 1]
+ }
+ result = _maybe_mangle_lambdas(func)
+ assert result['A'][0].__name__ == '<lambda>'
+ assert result['A'][1].__name__ == '<lambda_1>'
+
+ assert func['A'][0](0, 1) == (0, 1, 1)
+ assert func['A'][0](0, 1, 2) == (0, 1, 2)
+ assert func['A'][0](0, 2, b=3) == (0, 2, 3)
+
+ def test_maybe_mangle_lambdas_named(self):
+ func = OrderedDict([('C', np.mean),
+ ('D', OrderedDict([('foo', np.mean),
+ ('bar', np.mean)]))])
+ result = _maybe_mangle_lambdas(func)
+ assert result == func
+
+ def test_basic(self):
+ df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
+ result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]})
+
+ expected = pd.DataFrame({("B", "<lambda>"): [0, 0],
+ ("B", "<lambda_1>"): [1, 1]},
+ index=pd.Index([0, 1], name='A'))
+ tm.assert_frame_equal(result, expected)
+
+ def test_mangle_series_groupby(self):
+ gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
+ result = gr.agg([lambda x: 0, lambda x: 1])
+ expected = pd.DataFrame({'<lambda>': [0, 0], '<lambda_1>': [1, 1]})
+ tm.assert_frame_equal(result, expected)
+
+ @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
+ def test_with_kwargs(self):
+ f1 = lambda x, y, b=1: x.sum() + y + b
+ f2 = lambda x, y, b=2: x.sum() + y * b
+ result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0)
+ expected = pd.DataFrame({'<lambda>': [4], '<lambda_1>': [6]})
+ tm.assert_frame_equal(result, expected)
+
+ result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
+ expected = pd.DataFrame({'<lambda>': [13], '<lambda_1>': [30]})
+ tm.assert_frame_equal(result, expected)