-
- Notifications
You must be signed in to change notification settings - Fork 19.3k
REF: Decouple Series.apply from Series.agg #53400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
75ce829 52db878 fc26828 4d0db30 c521691 9353f06 e7e3433 755ec07 92e7a9f e68c46e 9af24b2 af0417d 8564968 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -99,6 +99,7 @@ Other enhancements | |
| - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`) | ||
| - :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) | ||
| - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`) | ||
| - Added a new parameter ``array_ops_only`` to :meth:`Series.apply`. When set to ``True`` the supplied callables will always operate on the whole Series (:issue:`53400`). | ||
| ||
| - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) | ||
| - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) | ||
| - | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -16,6 +16,7 @@ | |
| Iterable, | ||
| Iterator, | ||
| List, | ||
| Literal, | ||
| Sequence, | ||
| cast, | ||
| ) | ||
| | @@ -288,6 +289,11 @@ def agg_list_like(self) -> DataFrame | Series: | |
| ------- | ||
| Result of aggregation. | ||
| """ | ||
| return self.agg_or_apply_list_like(op_name="agg") | ||
| | ||
| def agg_or_apply_list_like( | ||
| self, op_name: Literal["agg", "apply"] | ||
| ) -> DataFrame | Series: | ||
| from pandas.core.groupby.generic import ( | ||
| DataFrameGroupBy, | ||
| SeriesGroupBy, | ||
| | @@ -296,6 +302,9 @@ def agg_list_like(self) -> DataFrame | Series: | |
| | ||
| obj = self.obj | ||
| func = cast(List[AggFuncTypeBase], self.func) | ||
| kwargs = self.kwargs | ||
| if op_name == "apply": | ||
| kwargs = {**kwargs, "by_row": False} | ||
| Comment on lines +306 to +307 Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @topper-123: shouldn't by_row here be True for backwards compatibility? Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On second thought, I'm thinking this should now be Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you are right. I'll make a new PR on that. | ||
| | ||
| if getattr(obj, "axis", 0) == 1: | ||
| raise NotImplementedError("axis other than 0 is not supported") | ||
| | @@ -313,8 +322,6 @@ def agg_list_like(self) -> DataFrame | Series: | |
| keys = [] | ||
| | ||
| is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) | ||
| is_ser_or_df = isinstance(obj, (ABCDataFrame, ABCSeries)) | ||
| this_args = [self.axis, *self.args] if is_ser_or_df else self.args | ||
| | ||
| context_manager: ContextManager | ||
| if is_groupby: | ||
| | @@ -323,12 +330,19 @@ def agg_list_like(self) -> DataFrame | Series: | |
| context_manager = com.temp_setattr(obj, "as_index", True) | ||
| else: | ||
| context_manager = nullcontext() | ||
| | ||
| def include_axis(colg) -> bool: | ||
| return isinstance(colg, ABCDataFrame) or ( | ||
| isinstance(colg, ABCSeries) and op_name == "agg" | ||
| ) | ||
| | ||
| with context_manager: | ||
| # degenerate case | ||
| if selected_obj.ndim == 1: | ||
| for a in func: | ||
| colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) | ||
| new_res = colg.aggregate(a, *this_args, **self.kwargs) | ||
| args = [self.axis, *self.args] if include_axis(colg) else self.args | ||
| new_res = getattr(colg, op_name)(a, *args, **kwargs) | ||
| results.append(new_res) | ||
| | ||
| # make sure we find a good name | ||
| | @@ -339,7 +353,8 @@ def agg_list_like(self) -> DataFrame | Series: | |
| indices = [] | ||
| for index, col in enumerate(selected_obj): | ||
| colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) | ||
| new_res = colg.aggregate(func, *this_args, **self.kwargs) | ||
| args = [self.axis, *self.args] if include_axis(colg) else self.args | ||
| new_res = getattr(colg, op_name)(func, *args, **kwargs) | ||
| results.append(new_res) | ||
| indices.append(index) | ||
| keys = selected_obj.columns.take(indices) | ||
| | @@ -366,15 +381,21 @@ def agg_dict_like(self) -> DataFrame | Series: | |
| ------- | ||
| Result of aggregation. | ||
| """ | ||
| return self._apply_dict_like(op_name="agg") | ||
| | ||
| def _apply_dict_like(self, op_name: Literal["agg", "apply"]) -> DataFrame | Series: | ||
| from pandas import Index | ||
| from pandas.core.groupby.generic import ( | ||
| DataFrameGroupBy, | ||
| SeriesGroupBy, | ||
| ) | ||
| from pandas.core.reshape.concat import concat | ||
| | ||
| assert op_name in ["agg", "apply"] | ||
| | ||
| obj = self.obj | ||
| func = cast(AggFuncTypeDict, self.func) | ||
| kwds = {"by_row": False} if op_name == "apply" else {} | ||
| | ||
| if getattr(obj, "axis", 0) == 1: | ||
| raise NotImplementedError("axis other than 0 is not supported") | ||
| | @@ -387,7 +408,7 @@ def agg_dict_like(self) -> DataFrame | Series: | |
| selected_obj = obj._selected_obj | ||
| selection = obj._selection | ||
| | ||
| func = self.normalize_dictlike_arg("agg", selected_obj, func) | ||
| func = self.normalize_dictlike_arg(op_name, selected_obj, func) | ||
| | ||
| is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) | ||
| context_manager: ContextManager | ||
| | @@ -407,7 +428,9 @@ def agg_dict_like(self) -> DataFrame | Series: | |
| if selected_obj.ndim == 1: | ||
| # key only used for output | ||
| colg = obj._gotitem(selection, ndim=1) | ||
| result_data = [colg.agg(how) for _, how in func.items()] | ||
| result_data = [ | ||
| getattr(colg, op_name)(how, **kwds) for _, how in func.items() | ||
| ] | ||
| result_index = list(func.keys()) | ||
| elif is_non_unique_col: | ||
| # key used for column selection and output | ||
| | @@ -422,7 +445,7 @@ def agg_dict_like(self) -> DataFrame | Series: | |
| label_to_indices[label].append(index) | ||
| | ||
| key_data = [ | ||
| selected_obj._ixs(indice, axis=1).agg(how) | ||
| getattr(selected_obj._ixs(indice, axis=1), op_name)(how, **kwds) | ||
| for label, indices in label_to_indices.items() | ||
| for indice in indices | ||
| ] | ||
| | @@ -432,7 +455,8 @@ def agg_dict_like(self) -> DataFrame | Series: | |
| else: | ||
| # key used for column selection and output | ||
| result_data = [ | ||
| obj._gotitem(key, ndim=1).agg(how) for key, how in func.items() | ||
| getattr(obj._gotitem(key, ndim=1), op_name)(how) | ||
| for key, how in func.items() | ||
| ] | ||
| result_index = list(func.keys()) | ||
| | ||
| | @@ -527,7 +551,7 @@ def apply_str(self) -> DataFrame | Series: | |
| self.kwargs["axis"] = self.axis | ||
| return self._apply_str(obj, func, *self.args, **self.kwargs) | ||
| | ||
| def apply_multiple(self) -> DataFrame | Series: | ||
| def apply_list_or_dict_like(self) -> DataFrame | Series: | ||
| """ | ||
| Compute apply in case of a list-like or dict-like. | ||
| | ||
| | @@ -543,9 +567,9 @@ def apply_multiple(self) -> DataFrame | Series: | |
| kwargs = self.kwargs | ||
| | ||
| if is_dict_like(func): | ||
| result = self.agg_dict_like() | ||
| result = self._apply_dict_like(op_name="apply") | ||
| else: | ||
| result = self.agg_list_like() | ||
| result = self.agg_or_apply_list_like(op_name="apply") | ||
| | ||
| result = reconstruct_and_relabel_result(result, func, **kwargs) | ||
| | ||
| | @@ -685,8 +709,8 @@ def values(self): | |
| def apply(self) -> DataFrame | Series: | ||
| """compute the results""" | ||
| # dispatch to agg | ||
| if is_list_like(self.func): | ||
| return self.apply_multiple() | ||
| if is_list_like(self.func) or is_dict_like(self.func): | ||
| ||
| return self.apply_list_or_dict_like() | ||
| | ||
| # all empty | ||
| if len(self.columns) == 0 and len(self.index) == 0: | ||
| | @@ -1033,13 +1057,15 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: | |
| class SeriesApply(NDFrameApply): | ||
| obj: Series | ||
| axis: AxisInt = 0 | ||
| by_row: bool # only relevant for apply() | ||
| | ||
| def __init__( | ||
| self, | ||
| obj: Series, | ||
| func: AggFuncType, | ||
| *, | ||
| convert_dtype: bool | lib.NoDefault = lib.no_default, | ||
| by_row: bool = True, | ||
| args, | ||
| kwargs, | ||
| ) -> None: | ||
| | @@ -1054,6 +1080,7 @@ def __init__( | |
| stacklevel=find_stack_level(), | ||
| ) | ||
| self.convert_dtype = convert_dtype | ||
| self.by_row = by_row | ||
| | ||
| super().__init__( | ||
| obj, | ||
| | @@ -1071,8 +1098,8 @@ def apply(self) -> DataFrame | Series: | |
| return self.apply_empty_result() | ||
| | ||
| # dispatch to agg | ||
| if is_list_like(self.func): | ||
| return self.apply_multiple() | ||
| if is_list_like(self.func) or is_dict_like(self.func): | ||
| ||
| return self.apply_list_or_dict_like() | ||
| | ||
| if isinstance(self.func, str): | ||
| # if we are a string, try to dispatch | ||
| | @@ -1118,6 +1145,8 @@ def apply_standard(self) -> DataFrame | Series: | |
| if isinstance(func, np.ufunc): | ||
| with np.errstate(all="ignore"): | ||
| return func(obj, *self.args, **self.kwargs) | ||
| elif not self.by_row: | ||
| return func(obj, *self.args, **self.kwargs) | ||
| | ||
| if self.args or self.kwargs: | ||
| # _map_values does not support args/kwargs | ||
| | ||
Uh oh!
There was an error while loading. Please reload this page.