5858 validate_func_kwargs ,
5959)
6060import pandas .core .algorithms as algorithms
61- from pandas .core .arrays import ExtensionArray
61+ from pandas .core .arrays import Categorical , ExtensionArray
6262from pandas .core .base import DataError , SpecificationError
6363import pandas .core .common as com
6464from pandas .core .construction import create_series_with_explicit_dtype
@@ -1026,38 +1026,64 @@ def _cython_agg_blocks(
10261026 if numeric_only :
10271027 data = data .get_numeric_data (copy = False )
10281028
1029- no_result = object ()
1030-
10311029 def cast_agg_result (result , values : ArrayLike , how : str ) -> ArrayLike :
10321030 # see if we can cast the values to the desired dtype
10331031 # this may not be the original dtype
10341032 assert not isinstance (result , DataFrame )
1035- assert result is not no_result
10361033
10371034 dtype = maybe_cast_result_dtype (values .dtype , how )
10381035 result = maybe_downcast_numeric (result , dtype )
10391036
1040- if isinstance (values , ExtensionArray ) and isinstance (result , np .ndarray ):
1041- # e.g. values was an IntegerArray
1042- # (1, N) case can occur if values was Categorical
1043- # and result is ndarray[object]
1044- # TODO(EA2D): special casing not needed with 2D EAs
1045- assert result .ndim == 1 or result .shape [0 ] == 1
1046- try :
1047- # Cast back if feasible
1048- result = type (values )._from_sequence (
1049- result .ravel (), dtype = values .dtype
1050- )
1051- except (ValueError , TypeError ):
1052- # reshape to be valid for non-Extension Block
1053- result = result .reshape (1 , - 1 )
1037+ if isinstance (values , Categorical ) and isinstance (result , np .ndarray ):
1038+ # If the Categorical op didn't raise, it is dtype-preserving
1039+ result = type (values )._from_sequence (result .ravel (), dtype = values .dtype )
1040+ # Note this will have result.dtype == dtype from above
10541041
10551042 elif isinstance (result , np .ndarray ) and result .ndim == 1 :
10561043 # We went through a SeriesGroupByPath and need to reshape
1044+ # GH#32223 includes case with IntegerArray values
10571045 result = result .reshape (1 , - 1 )
1046+ # test_groupby_duplicate_columns gets here with
1047+ # result.dtype == int64, values.dtype=object, how="min"
10581048
10591049 return result
10601050
1051+ def py_fallback (bvalues : ArrayLike ) -> ArrayLike :
1052+ # if self.grouper.aggregate fails, we fall back to a pure-python
1053+ # solution
1054+
1055+ # We get here with a) EADtypes and b) object dtype
1056+ obj : FrameOrSeriesUnion
1057+
1058+ # call our grouper again with only this block
1059+ if isinstance (bvalues , ExtensionArray ):
1060+ # TODO(EA2D): special case not needed with 2D EAs
1061+ obj = Series (bvalues )
1062+ else :
1063+ obj = DataFrame (bvalues .T )
1064+ if obj .shape [1 ] == 1 :
1065+ # Avoid call to self.values that can occur in DataFrame
1066+ # reductions; see GH#28949
1067+ obj = obj .iloc [:, 0 ]
1068+
1069+ # Create SeriesGroupBy with observed=True so that it does
1070+ # not try to add missing categories if grouping over multiple
1071+ # Categoricals. This will done by later self._reindex_output()
1072+ # Doing it here creates an error. See GH#34951
1073+ sgb = get_groupby (obj , self .grouper , observed = True )
1074+ result = sgb .aggregate (lambda x : alt (x , axis = self .axis ))
1075+
1076+ assert isinstance (result , (Series , DataFrame )) # for mypy
1077+ # In the case of object dtype block, it may have been split
1078+ # in the operation. We un-split here.
1079+ result = result ._consolidate ()
1080+ assert isinstance (result , (Series , DataFrame )) # for mypy
1081+ assert len (result ._mgr .blocks ) == 1
1082+
1083+ # unwrap DataFrame to get array
1084+ result = result ._mgr .blocks [0 ].values
1085+ return result
1086+
10611087 def blk_func (bvalues : ArrayLike ) -> ArrayLike :
10621088
10631089 try :
@@ -1075,35 +1101,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
10751101 assert how == "ohlc"
10761102 raise
10771103
1078- # We get here with a) EADtypes and b) object dtype
1079- obj : FrameOrSeriesUnion
1080- # call our grouper again with only this block
1081- if isinstance (bvalues , ExtensionArray ):
1082- # TODO(EA2D): special case not needed with 2D EAs
1083- obj = Series (bvalues )
1084- else :
1085- obj = DataFrame (bvalues .T )
1086- if obj .shape [1 ] == 1 :
1087- # Avoid call to self.values that can occur in DataFrame
1088- # reductions; see GH#28949
1089- obj = obj .iloc [:, 0 ]
1090-
1091- # Create SeriesGroupBy with observed=True so that it does
1092- # not try to add missing categories if grouping over multiple
1093- # Categoricals. This will done by later self._reindex_output()
1094- # Doing it here creates an error. See GH#34951
1095- sgb = get_groupby (obj , self .grouper , observed = True )
1096- result = sgb .aggregate (lambda x : alt (x , axis = self .axis ))
1097-
1098- assert isinstance (result , (Series , DataFrame )) # for mypy
1099- # In the case of object dtype block, it may have been split
1100- # in the operation. We un-split here.
1101- result = result ._consolidate ()
1102- assert isinstance (result , (Series , DataFrame )) # for mypy
1103- assert len (result ._mgr .blocks ) == 1
1104-
1105- # unwrap DataFrame to get array
1106- result = result ._mgr .blocks [0 ].values
1104+ result = py_fallback (bvalues )
11071105
11081106 return cast_agg_result (result , bvalues , how )
11091107
0 commit comments