Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -734,6 +734,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, :meth:`SeriesGroupBy.head`, and :meth:`SeriesGroupBy.tail` would raise when used with ``axis=1`` (:issue:`9772`)
- Bug in :meth:`.DataFrameGroupBy.transform` would raise when used with ``axis=1`` and a transformation kernel (e.g. "shift") (:issue:`36308`)
- Bug in :meth:`.DataFrameGroupBy.quantile` couldn't handle with arraylike ``q`` when grouping by columns (:issue:`35269`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extra period snuck in


Reshaping
^^^^^^^^^
Expand Down
17 changes: 17 additions & 0 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2227,6 +2227,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
)
for qi in q
]
if self.axis == 0:
result = concat(results, axis=0, keys=q)
# fix levels to place quantiles on the inside
# TODO(GH-10710): Ideally, we could write this as
Expand All @@ -2250,6 +2251,22 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
# reorder rows to keep things sorted
indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten()
return result.take(indices)
else:
result = concat(results, axis=1, keys=q)

order = list(range(1, result.columns.nlevels)) + [0]
index_names = np.array(result.columns.names)
result.columns.names = np.arange(len(index_names))
result = result.reorder_levels(order, axis=1)
result.columns.names = index_names[order]
indices = (
np.arange(result.shape[1])
.reshape(
[len(q), self.ngroups],
)
.T.flatten()
)
return result.take(indices, axis=1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could a lot of this be shared with the axis=0 version by replacing result.columns with result.axes[1] and above replacing result.index with result.axes[0]? (which in both cases would then become result.axes[axis])

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sounds good, I will change this.


@Substitution(name="groupby")
def ngroup(self, ascending: bool = True):
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/groupby/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,3 +254,26 @@ def test_groupby_timedelta_quantile():
index=Index([1, 2], name="group"),
)
tm.assert_frame_equal(result, expected)


def test_columns_groupby_quantile():
# GH 33795
df = DataFrame(
np.arange(12).reshape(3, -1),
index=list("XYZ"),
columns=pd.Series(list("ABAB"), name="col"),
)
result = df.groupby("col", axis=1).quantile(q=[0.8, 0.2])
expected = DataFrame(
[
[1.6, 0.4, 2.6, 1.4],
[5.6, 4.4, 6.6, 5.4],
[9.6, 8.4, 10.6, 9.4],
],
index=list("XYZ"),
columns=Index(
[("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None]
),
)

tm.assert_frame_equal(result, expected)