Skip to content
23 changes: 23 additions & 0 deletions doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,29 @@ For a grouped ``DataFrame``, you can rename in a similar manner:
'mean': 'bar',
'std': 'baz'}))

.. note::

In general, the output column names should be unique. You can't apply
the same function (or two functions with the same name) to the same
column.

.. ipython:: python
:okexcept:

grouped['C'].agg(['sum', 'sum'])


Pandas *does* allow you to provide multiple lambdas. In this case, pandas
will mangle the name of the (nameless) lambda functions, appending ``_<i>``
to each subsequent lambda.

.. ipython:: python

grouped['C'].agg([lambda x: x.max() - x.min(),
lambda x: x.median() - x.mean()])



.. _groupby.aggregate.named:

Named Aggregation
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ Other Enhancements
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
- :meth:`DataFrame.query` and :meth:`DataFrame.eval` now supports quoting column names with backticks to refer to names with spaces (:issue:`6508`)
- :func:`merge_asof` now gives a more clear error message when merge keys are categoricals that are not equal (:issue:`26136`)
- Supported for multiple lambdas in the same aggregation for :meth:`GroupBy.aggregate` (:issue:`26430`).
- :meth:`pandas.core.window.Rolling` supports exponential (or Poisson) window type (:issue:`21303`)
- Error message for missing required imports now includes the original import error's text (:issue:`23868`)
- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a ``mean`` method (:issue:`24757`)
Expand Down
70 changes: 70 additions & 0 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas.core.dtypes.common import (
ensure_int64, ensure_platform_int, is_bool, is_datetimelike,
is_integer_dtype, is_interval_dtype, is_numeric_dtype, is_scalar)
from pandas.core.dtypes.inference import is_dict_like, is_list_like
from pandas.core.dtypes.missing import isna, notna

from pandas._typing import FrameOrSeries
Expand Down Expand Up @@ -208,6 +209,8 @@ def aggregate(self, func, *args, **kwargs):
raise TypeError("Must provide 'func' or tuples of "
"'(column, aggfunc).")

func = _maybe_mangle_lambdas(func)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, the one on L810 is SeriesGroupBy.aggregate. I think it's entirely separate from NDFramGroupBy.aggregate.


result, how = self._aggregate(func, _level=_level, *args, **kwargs)
if how is None:
return result
Expand Down Expand Up @@ -830,6 +833,7 @@ def aggregate(self, func_or_funcs=None, *args, **kwargs):
if isinstance(func_or_funcs, abc.Iterable):
# Catch instances of lists / tuples
# but not the class list / tuple itself.
func_or_funcs = _maybe_mangle_lambdas(func_or_funcs)
ret = self._aggregate_multiple_funcs(func_or_funcs,
(_level or 0) + 1)
if relabeling:
Expand Down Expand Up @@ -1710,3 +1714,69 @@ def _normalize_keyword_aggregation(kwargs):
order.append((column,
com.get_callable_name(aggfunc) or aggfunc))
return aggspec, columns, order


def _make_lambda(func, i):
def f(*args, **kwargs):
return func(*args, **kwargs)
f.__name__ = "<lambda_{}>".format(i)
return f


def _managle_lambda_list(aggfuncs):
i = 0
aggfuncs2 = []
for aggfunc in aggfuncs:
if com.get_callable_name(aggfunc) == "<lambda>":
if i > 0:
aggfunc = _make_lambda(aggfunc, i)
i += 1
aggfuncs2.append(aggfunc)

return aggfuncs2


def _maybe_mangle_lambdas(agg_spec):
"""
Make new lambdas with unique names.

Parameters
----------
agg_spec : Any
An argument to NDFrameGroupBy.agg.
Non-dict-like `agg_spec` are pass through as is.
For dict-like `agg_spec` a new spec is returned
with name-mangled lambdas.

Returns
-------
mangled : Any
Same type as the input.

Examples
--------
>>> _maybe_mangle_lambdas('sum')
'sum'

>>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP
[<function __main__.<lambda>()>,
<function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
"""
is_dict = is_dict_like(agg_spec)
if not (is_dict or is_list_like(agg_spec)):
return agg_spec
agg_spec2 = type(agg_spec)() # dict or OrderdDict

if is_dict:
for key in agg_spec:
aggfuncs = agg_spec[key]
if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
aggfuncs2 = _managle_lambda_list(aggfuncs)
else:
aggfuncs2 = aggfuncs

agg_spec2[key] = aggfuncs2 or aggfuncs
else:
agg_spec2 = _managle_lambda_list(agg_spec)

return agg_spec2
81 changes: 72 additions & 9 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
from pandas.core.base import SpecificationError
from pandas.core.groupby.generic import _maybe_mangle_lambdas
from pandas.core.groupby.grouper import Grouping
import pandas.util.testing as tm

Expand Down Expand Up @@ -210,15 +211,6 @@ def test_multiple_functions_tuples_and_non_tuples(df):
tm.assert_frame_equal(result, expected)


def test_agg_multiple_functions_too_many_lambdas(df):
grouped = df.groupby('A')
funcs = ['mean', lambda x: x.mean(), lambda x: x.std()]

msg = 'Function names must be unique, found multiple named <lambda>'
with pytest.raises(SpecificationError, match=msg):
grouped.agg(funcs)


def test_more_flexible_frame_multi_function(df):
grouped = df.groupby('A')

Expand Down Expand Up @@ -458,3 +450,74 @@ def test_agg_namedtuple(self):
expected = df.groupby("A").agg(b=("B", "sum"),
c=("B", "count"))
tm.assert_frame_equal(result, expected)


class TestLambdaMangling:

def test_maybe_mangle_lambdas_passthrough(self):
assert _maybe_mangle_lambdas('mean') == 'mean'
assert _maybe_mangle_lambdas(lambda x: x).__name__ == '<lambda>'
assert [x.__name__ for x in _maybe_mangle_lambdas([lambda x: x])
] == ['<lambda>']

def test_maybe_mangle_lambdas_listlike(self):
aggfuncs = [lambda x: 1, lambda x: 2]
result = _maybe_mangle_lambdas(aggfuncs)
assert result[0].__name__ == '<lambda>'
assert result[1].__name__ == '<lambda_1>'
assert aggfuncs[0](None) == result[0](None)
assert aggfuncs[1](None) == result[1](None)

def test_maybe_mangle_lambdas(self):
func = {
'A': [lambda x: 0, lambda x: 1]
}
result = _maybe_mangle_lambdas(func)
assert result['A'][0].__name__ == '<lambda>'
assert result['A'][1].__name__ == '<lambda_1>'

def test_maybe_mangle_lambdas_args(self):
func = {
'A': [lambda x, a, b=1: (0, a, b), lambda x: 1]
}
result = _maybe_mangle_lambdas(func)
assert result['A'][0].__name__ == '<lambda>'
assert result['A'][1].__name__ == '<lambda_1>'

assert func['A'][0](0, 1) == (0, 1, 1)
assert func['A'][0](0, 1, 2) == (0, 1, 2)
assert func['A'][0](0, 2, b=3) == (0, 2, 3)

def test_maybe_mangle_lambdas_named(self):
func = OrderedDict([('C', np.mean),
('D', OrderedDict([('foo', np.mean),
('bar', np.mean)]))])
result = _maybe_mangle_lambdas(func)
assert result == func

def test_basic(self):
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]})

expected = pd.DataFrame({("B", "<lambda>"): [0, 0],
("B", "<lambda_1>"): [1, 1]},
index=pd.Index([0, 1], name='A'))
tm.assert_frame_equal(result, expected)

def test_mangle_series_groupby(self):
gr = pd.Series([1, 2, 3, 4]).groupby([0, 0, 1, 1])
result = gr.agg([lambda x: 0, lambda x: 1])
expected = pd.DataFrame({'<lambda>': [0, 0], '<lambda_1>': [1, 1]})
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.")
def test_with_kwargs(self):
f1 = lambda x, y, b=1: x.sum() + y + b
f2 = lambda x, y, b=2: x.sum() + y * b
result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0)
expected = pd.DataFrame({'<lambda>': [4], '<lambda_1>': [6]})
tm.assert_frame_equal(result, expected)

result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
expected = pd.DataFrame({'<lambda>': [13], '<lambda_1>': [30]})
tm.assert_frame_equal(result, expected)