Skip to content
133 changes: 63 additions & 70 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1448,94 +1448,53 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None):
qs: a scalar or list of the quantiles to be computed
interpolation: type of interpolation, default 'linear'
axis: axis to compute, default 0
axes : BlockManager.axes

Returns
-------
tuple of (axis, block)

Block
"""
kw = {'interpolation': interpolation}
values = self.get_values()
values, _ = self._try_coerce_args(values, values)

def _nanpercentile1D(values, mask, q, **kw):
# mask is Union[ExtensionArray, ndarray]
values = values[~mask]

if len(values) == 0:
if lib.is_scalar(q):
return self._na_value
else:
return np.array([self._na_value] * len(q),
dtype=values.dtype)

return np.percentile(values, q, **kw)

def _nanpercentile(values, q, axis, **kw):

mask = isna(self.values)
if not lib.is_scalar(mask) and mask.any():
if self.ndim == 1:
return _nanpercentile1D(values, mask, q, **kw)
else:
# for nonconsolidatable blocks mask is 1D, but values 2D
if mask.ndim < values.ndim:
mask = mask.reshape(values.shape)
if axis == 0:
values = values.T
mask = mask.T
result = [_nanpercentile1D(val, m, q, **kw) for (val, m)
in zip(list(values), list(mask))]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
else:
return np.percentile(values, q, axis=axis, **kw)

from pandas import Float64Index
is_empty = values.shape[axis] == 0
if is_list_like(qs):
ax = Float64Index(qs)
orig_scalar = not is_list_like(qs)
if orig_scalar:
# make list-like, unpack later
qs = [qs]

if is_empty:
if self.ndim == 1:
result = self._na_value
else:
# create the array of na_values
# 2d len(values) * len(qs)
result = np.repeat(np.array([self._na_value] * len(qs)),
len(values)).reshape(len(values),
len(qs))
if is_empty:
if self.ndim == 1:
result = self._na_value
else:
result = _nanpercentile(values, np.array(qs) * 100,
axis=axis, **kw)

result = np.array(result, copy=False)
if self.ndim > 1:
result = result.T

# create the array of na_values
# 2d len(values) * len(qs)
result = np.repeat(np.array([self._na_value] * len(qs)),
len(values)).reshape(len(values),
len(qs))
else:
mask = isna(self.values)
result = _nanpercentile(values, np.array(qs) * 100,
axis=axis, na_value=self._na_value,
mask=mask, ndim=self.ndim, **kw)

if self.ndim == 1:
ax = Float64Index([qs])
else:
ax = axes[0]
result = np.array(result, copy=False)
if self.ndim > 1:
result = result.T

if is_empty:
if self.ndim == 1:
result = self._na_value
else:
result = np.array([self._na_value] * len(self))
else:
result = _nanpercentile(values, qs * 100, axis=axis, **kw)
if orig_scalar and not lib.is_scalar(result):
# result could be scalar in case with is_empty and self.ndim == 1
assert result.shape[-1] == 1, result.shape
result = result[..., 0]
result = lib.item_from_zerodim(result)

ndim = getattr(result, 'ndim', None) or 0
result = self._try_coerce_result(result)
if lib.is_scalar(result):
return ax, self.make_block_scalar(result)
return ax, make_block(result,
placement=np.arange(len(result)),
ndim=ndim)
return self.make_block_scalar(result)
return make_block(result,
placement=np.arange(len(result)),
ndim=ndim)

def _replace_coerce(self, to_replace, value, inplace=True, regex=False,
convert=False, mask=None):
Expand Down Expand Up @@ -3365,3 +3324,37 @@ def _putmask_preserve(nv, n):
v = v.astype(dtype)

return _putmask_preserve(v, n)


# TODO: belongs elsewhere?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move to nanops

def _nanpercentile1D(values, mask, q, na_value, **kw):
# mask is Union[ExtensionArray, ndarray]
values = values[~mask]

if len(values) == 0:
if lib.is_scalar(q):
return na_value
else:
return np.array([na_value] * len(q),
dtype=values.dtype)

return np.percentile(values, q, **kw)


def _nanpercentile(values, q, axis, na_value, mask, ndim, **kw):
if not lib.is_scalar(mask) and mask.any():
if ndim == 1:
return _nanpercentile1D(values, mask, q, na_value, **kw)
else:
# for nonconsolidatable blocks mask is 1D, but values 2D
if mask.ndim < values.ndim:
mask = mask.reshape(values.shape)
if axis == 0:
values = values.T
mask = mask.T
result = [_nanpercentile1D(val, m, q, na_value, **kw) for (val, m)
in zip(list(values), list(mask))]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
else:
return np.percentile(values, q, axis=axis, **kw)
30 changes: 20 additions & 10 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
maybe_promote)
from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype,
is_extension_type, is_numeric_v_string_like, is_scalar)
is_extension_type, is_list_like, is_numeric_v_string_like, is_scalar)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -402,34 +402,47 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
bm._consolidate_inplace()
return bm

def reduction(self, f, axis=0, consolidate=True, transposed=False,
**kwargs):
def quantile(self, axis=0, consolidate=True, transposed=False,
interpolation='linear', qs=None, numeric_only=None):
"""
iterate over the blocks, collect and create a new block manager.
Iterate over blocks applying quantile reduction.
This routine is intended for reduction type operations and
will do inference on the generated blocks.

Parameters
----------
f: the callable or function name to operate on at the block level
axis: reduction axis, default 0
consolidate: boolean, default True. Join together blocks having same
dtype
transposed: boolean, default False
we are holding transposed data
interpolation : type of interpolation, default 'linear'
qs : a scalar or list of the quantiles to be computed
numeric_only : ignored

Returns
-------
Block Manager (new object)

"""

if consolidate:
self._consolidate_inplace()

def get_axe(block, qs, axes):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't think this adds anything to make it a function

from pandas import Float64Index
if is_list_like(qs):
ax = Float64Index(qs)
elif block.ndim == 1:
ax = Float64Index([qs])
else:
ax = axes[0]
return ax

axes, blocks = [], []
for b in self.blocks:
axe, block = getattr(b, f)(axis=axis, axes=self.axes, **kwargs)
block = b.quantile(axis=axis, qs=qs, interpolation=interpolation)

axe = get_axe(b, qs, axes=self.axes)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also doesn't need / take the bock arg

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it uses the block arg

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no i mean get_axe doens't

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(which is why it is a function instead of just done once outside the loop).

I'd rather keep it as a function than in-line it, but not a deal-breaker. There is another PR after this that will be ripping out a bunch of code regardless.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no i mean get_axe doens't

line 435 inside get_axe reads elif block.ndim == 1:

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

grr, ok, i c now


axes.append(axe)
blocks.append(block)
Expand Down Expand Up @@ -496,9 +509,6 @@ def isna(self, func, **kwargs):
def where(self, **kwargs):
return self.apply('where', **kwargs)

def quantile(self, **kwargs):
return self.reduction('quantile', **kwargs)

def setitem(self, **kwargs):
return self.apply('setitem', **kwargs)

Expand Down