Skip to content
Prev Previous commit
Next Next commit
BinGrouper holds the sorted binners, give the indexer to reorder axis…
… if needed, eg. build grouping labels
  • Loading branch information
ruiann authored and jreback committed Oct 1, 2017
commit 66febe4e54260f0875526fa193423360715e2421
9 changes: 7 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1730,13 +1730,14 @@ class BaseGrouper(object):
"""

def __init__(self, axis, groupings, sort=True, group_keys=True,
mutated=False):
mutated=False, indexer=None):
self._filter_empty_groups = self.compressed = len(groupings) != 1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string explaining params (I know you just added 1 but good time)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sorry I don't really understand all parameters, I've added those I know😂

self.axis = axis
self.groupings = groupings
self.sort = sort
self.group_keys = group_keys
self.mutated = mutated
self.indexer = indexer

@property
def shape(self):
Expand Down Expand Up @@ -2282,11 +2283,12 @@ def generate_bins_generic(values, binner, closed):

class BinGrouper(BaseGrouper):

def __init__(self, bins, binlabels, filter_empty=False, mutated=False):
def __init__(self, bins, binlabels, filter_empty=False, mutated=False, indexer=None):
self.bins = _ensure_int64(bins)
self.binlabels = _ensure_index(binlabels)
self._filter_empty_groups = filter_empty
self.mutated = mutated
self.indexer = indexer

@cache_readonly
def groups(self):
Expand Down Expand Up @@ -2554,6 +2556,9 @@ def _make_labels(self):
if isinstance(self.grouper, BaseGrouper):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same

labels, _, _ = self.grouper.group_info
uniques = self.grouper.result_index
if self.grouper.indexer is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

refactor to make a method on the BaseGrouper itself and override in BinGrouper; same for indices

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm afraid cannot use this way. Grouping use the group_info for unsorted axis, while some other scenarios call the group_info to group sorted axis, I think it's better to keep the group_info as sorted, and reorder to get the unsorted label sequence.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fine, but not my point. I don't want these if/else in the properties, rather they should simply be overriden methods on the grouper type

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK I get your point.

sorter = np.lexsort((labels, self.grouper.indexer))
labels = labels[sorter]
else:
labels, uniques = algorithms.factorize(
self.grouper, sort=self.sort)
Expand Down
11 changes: 9 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def _get_binner(self):
"""

binner, bins, binlabels = self._get_binner_for_time()
bin_grouper = BinGrouper(bins, binlabels)
bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
return binner, bin_grouper

def _assure_grouper(self):
Expand Down Expand Up @@ -879,7 +879,14 @@ def _downsample(self, how, **kwargs):

if is_subperiod(ax.freq, self.freq):
# Downsampling
return self._groupby_and_aggregate(how, grouper=self.grouper)
if len(new_index) == 0:
bins = []
else:
i8 = memb.asi8
rng = np.arange(i8[0], i8[-1] + 1)
bins = memb.searchsorted(rng, side='right')
grouper = BinGrouper(bins, new_index, indexer=self.groupby.indexer)
return self._groupby_and_aggregate(how, grouper=grouper)
elif is_superperiod(ax.freq, self.freq):
if how == 'ohlc':
# GH #13083
Expand Down