pandas-dev · jreback · Oct 1, 2017 · Apr 26, 2017 · Apr 26, 2017 · Apr 26, 2017
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
@@ -171,6 +171,82 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _whatsnew_0210.api_breaking.period_index_resampling:
+
+``PeriodIndex`` resampling
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions of pandas, resampling a ``Series``/``DataFrame`` indexed by a ``PeriodIndex`` returned a ``DatetimeIndex`` in some cases (:issue:`12884`). Resampling to a multiplied frequency now returns a ``PeriodIndex`` (:issue:`15944`). As a minor enhancement, resampling a ``PeriodIndex`` can now handle ``NaT`` values (:issue:`13224`)
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+ In [1]: pi = pd.period_range('2017-01', periods=12, freq='M')
+
+ In [2]: s = pd.Series(np.arange(12), index=pi)
+
+ In [3]: resampled = s.resample('2Q').mean()
+
+ In [4]: resampled
+ Out[4]:
+ 2017-03-31 1.0
+ 2017-09-30 5.5
+ 2018-03-31 10.0
+ Freq: 2Q-DEC, dtype: float64
+
+ In [5]: resampled.index
+ Out[5]: DatetimeIndex(['2017-03-31', '2017-09-30', '2018-03-31'], dtype='datetime64[ns]', freq='2Q-DEC')
+
+New Behavior:
+
+.. ipython:: python
+
+ pi = pd.period_range('2017-01', periods=12, freq='M')
+
+ s = pd.Series(np.arange(12), index=pi)
+
+ resampled = s.resample('2Q').mean()
+
+ resampled
+
+ resampled.index
+
+
+Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior.
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+ In [1]: pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
+
+ In [2]: s = pd.Series(np.arange(10), index=pi)
+
+ In [3]: s.resample('H').ohlc()
+ Out[3]:
+ 2000-01-01 00:00 0.0
+ ...
+ 2000-01-10 23:00 NaN
+ Freq: H, Length: 240, dtype: float64
+
+ In [4]: s.resample('M').ohlc()
+ Out[4]:
+ open high low close
+ 2000-01 0 9 0 9
+
+New Behavior:
+
+.. ipython:: python
+
+ pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
+
+ s = pd.Series(np.arange(10), index=pi)
+
+ s.resample('H').ohlc()
+
+ s.resample('M').ohlc()
+
 
 .. _whatsnew_0210.api_breaking.deps:
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -14,7 +14,7 @@
 from pandas.core.indexes.datetimes import DatetimeIndex, date_range
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds
-from pandas.core.indexes.period import PeriodIndex, period_range
+from pandas.core.indexes.period import PeriodIndex
 import pandas.core.common as com
 import pandas.core.algorithms as algos
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -834,53 +834,32 @@ class PeriodIndexResampler(DatetimeIndexResampler):
  def _resampler_for_grouping(self):
  return PeriodIndexResamplerGroupby
 
+ def _get_binner_for_time(self):
+ if self.kind == 'timestamp':
+ return super(PeriodIndexResampler, self)._get_binner_for_time()
+ return self.groupby._get_period_bins(self.ax)
+
  def _convert_obj(self, obj):
  obj = super(PeriodIndexResampler, self)._convert_obj(obj)
 
- offset = to_offset(self.freq)
- if offset.n > 1:
- if self.kind == 'period': # pragma: no cover
- print('Warning: multiple of frequency -> timestamps')
-
- # Cannot have multiple of periods, convert to timestamp
+ if self._from_selection:
+ # see GH 14008, GH 12871
+ msg = ("Resampling from level= or on= selection"
+ " with a PeriodIndex is not currently supported,"
+ " use .set_index(...) to explicitly set index")
+ raise NotImplementedError(msg)
+
+ if self.loffset is not None:
+ # Cannot apply loffset/timedelta to PeriodIndex -> convert to
+ # timestamps
  self.kind = 'timestamp'
 
  # convert to timestamp
- if not (self.kind is None or self.kind == 'period'):
- if self._from_selection:
- # see GH 14008, GH 12871
- msg = ("Resampling from level= or on= selection"
- " with a PeriodIndex is not currently supported,"
- " use .set_index(...) to explicitly set index")
- raise NotImplementedError(msg)
- else:
- obj = obj.to_timestamp(how=self.convention)
+ if self.kind == 'timestamp':
+ obj = obj.to_timestamp(how=self.convention)
 
  return obj
 
- def aggregate(self, arg, *args, **kwargs):
- result, how = self._aggregate(arg, *args, **kwargs)
- if result is None:
- result = self._downsample(arg, *args, **kwargs)
-
- result = self._apply_loffset(result)
- return result
-
- agg = aggregate
-
- def _get_new_index(self):
- """ return our new index """
- ax = self.ax
-
- if len(ax) == 0:
- values = []
- else:
- start = ax[0].asfreq(self.freq, how=self.convention)
- end = ax[-1].asfreq(self.freq, how='end')
- values = period_range(start, end, freq=self.freq).asi8
-
- return ax._shallow_copy(values, freq=self.freq)
-
  def _downsample(self, how, **kwargs):
  """
  Downsample the cython defined function
@@ -898,22 +877,17 @@ def _downsample(self, how, **kwargs):
  how = self._is_cython_func(how) or how
  ax = self.ax
 
- new_index = self._get_new_index()
-
- # Start vs. end of period
- memb = ax.asfreq(self.freq, how=self.convention)
-
  if is_subperiod(ax.freq, self.freq):
  # Downsampling
- if len(new_index) == 0:
- bins = []
- else:
- i8 = memb.asi8
- rng = np.arange(i8[0], i8[-1] + 1)
- bins = memb.searchsorted(rng, side='right')
- grouper = BinGrouper(bins, new_index)
- return self._groupby_and_aggregate(how, grouper=grouper)
+ return self._groupby_and_aggregate(how, grouper=self.grouper)
  elif is_superperiod(ax.freq, self.freq):
+ if how == 'ohlc':
+ # GH #13083
+ # upsampling to subperiods is handled as an asfreq, which works
+ # for pure aggregating/reducing methods
+ # OHLC reduces along the time dimension, but creates multiple
+ # values for each period -> handle by _groupby_and_aggregate()
+ return self._groupby_and_aggregate(how, grouper=self.grouper)
  return self.asfreq()
  elif ax.freq == self.freq:
  return self.asfreq()
@@ -936,19 +910,16 @@ def _upsample(self, method, limit=None, fill_value=None):
  .fillna
 
  """
- if self._from_selection:
- raise ValueError("Upsampling from level= or on= selection"
- " is not supported, use .set_index(...)"
- " to explicitly set index to"
- " datetime-like")
+
  # we may need to actually resample as if we are timestamps
  if self.kind == 'timestamp':
  return super(PeriodIndexResampler, self)._upsample(
  method, limit=limit, fill_value=fill_value)
 
+ self._set_binner()
  ax = self.ax
  obj = self.obj
- new_index = self._get_new_index()
+ new_index = self.binner
 
  # Start vs. end of period
  memb = ax.asfreq(self.freq, how=self.convention)
@@ -1293,6 +1264,51 @@ def _get_time_period_bins(self, ax):
 
  return binner, bins, labels
 
+ def _get_period_bins(self, ax):
+ if not isinstance(ax, PeriodIndex):
+ raise TypeError('axis must be a PeriodIndex, but got '
+ 'an instance of %r' % type(ax).__name__)
+
+ memb = ax.asfreq(self.freq, how=self.convention)
+
+ # NaT handling as in pandas._lib.lib.generate_bins_dt64()
+ nat_count = 0
+ if memb.hasnans:
+ nat_count = np.sum(memb._isnan)
+ memb = memb[~memb._isnan]
+
+ # if index contains no valid (non-NaT) values, return empty index
+ if not len(memb):
+ binner = labels = PeriodIndex(
+ data=[], freq=self.freq, name=ax.name)
+ return binner, [], labels
+
+ start = ax.min().asfreq(self.freq, how=self.convention)
+ end = ax.max().asfreq(self.freq, how='end')
+
+ labels = binner = PeriodIndex(start=start, end=end,
+ freq=self.freq, name=ax.name)
+
+ i8 = memb.asi8
+ freq_mult = self.freq.n
+
+ # when upsampling to subperiods, we need to generate enough bins
+ expected_bins_count = len(binner) * freq_mult
+ i8_extend = expected_bins_count - (i8[-1] - i8[0])
+ rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
+ rng += freq_mult
+ bins = memb.searchsorted(rng, side='left')
+
+ if nat_count > 0:
+ # NaT handling as in pandas._lib.lib.generate_bins_dt64()
+ # shift bins by the number of NaT
+ bins += nat_count
+ bins = np.insert(bins, 0, nat_count)
+ binner = binner.insert(0, tslib.NaT)
+ labels = labels.insert(0, tslib.NaT)
+
+ return binner, bins, labels
+
 
 def _take_new_index(obj, indexer, new_index, axis=0):
  from pandas.core.api import Series, DataFrame