Skip to content

Commit 54f1fc1

Browse files
committed
Merge pull request #3286 from jreback/GH3283
ENH/BUG: GH3283 allow pivot tables to downcast the output (e.g. float -> int) if possible
2 parents 9dcac7e + f5fb624 commit 54f1fc1

File tree

5 files changed

+61
-8
lines changed

5 files changed

+61
-8
lines changed

RELEASE.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,8 @@ pandas 0.11.0
171171
- added the method ``select_column`` to select a single column from a table as a Series.
172172
- deprecated the ``unique`` method, can be replicated by ``select_column(key,column).unique()``
173173

174+
- Downcast on pivot if possible (GH3283_), adds argument ``downcast`` to ``fillna``
175+
174176
**Bug Fixes**
175177

176178
- Fix seg fault on empty data frame when fillna with ``pad`` or ``backfill``
@@ -376,6 +378,7 @@ pandas 0.11.0
376378
.. _GH3222: https://github.com/pydata/pandas/issues/3222
377379
.. _GH2641: https://github.com/pydata/pandas/issues/2641
378380
.. _GH3238: https://github.com/pydata/pandas/issues/3238
381+
.. _GH3283: https://github.com/pydata/pandas/issues/3283
379382

380383
pandas 0.10.1
381384
=============

pandas/core/frame.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,7 +3231,7 @@ def reorder_levels(self, order, axis=0):
32313231
# Filling NA's
32323232

32333233
def fillna(self, value=None, method=None, axis=0, inplace=False,
3234-
limit=None):
3234+
limit=None, downcast=None):
32353235
"""
32363236
Fill NA/NaN values using the specified method
32373237
@@ -3255,6 +3255,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
32553255
a reference to the filled object, which is self if inplace=True
32563256
limit : int, default None
32573257
Maximum size gap to forward or backward fill
3258+
downcast : dict, default is None, a dict of item->dtype of what to
3259+
downcast if possible
32583260
32593261
See also
32603262
--------
@@ -3300,7 +3302,8 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
33003302
result[k].fillna(v, inplace=True)
33013303
return result
33023304
else:
3303-
new_data = self._data.fillna(value, inplace=inplace)
3305+
new_data = self._data.fillna(value, inplace=inplace,
3306+
downcast=downcast)
33043307

33053308
if inplace:
33063309
self._data = new_data

pandas/core/internals.py

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Block(object):
2828
is_bool = False
2929
is_object = False
3030
_can_hold_na = False
31+
_downcast_dtype = None
3132

3233
def __init__(self, values, items, ref_items, ndim=2):
3334
if issubclass(values.dtype.type, basestring):
@@ -205,7 +206,7 @@ def split_block_at(self, item):
205206
self.items[s:e].copy(),
206207
self.ref_items)
207208

208-
def fillna(self, value, inplace=False):
209+
def fillna(self, value, inplace=False, downcast=None):
209210
if not self._can_hold_na:
210211
if inplace:
211212
return self
@@ -216,10 +217,32 @@ def fillna(self, value, inplace=False):
216217
mask = com.isnull(new_values)
217218
np.putmask(new_values, mask, value)
218219

219-
if inplace:
220-
return self
221-
else:
222-
return make_block(new_values, self.items, self.ref_items)
220+
block = make_block(new_values, self.items, self.ref_items)
221+
if downcast:
222+
block = block.downcast()
223+
return block
224+
225+
def downcast(self, dtypes = None):
226+
""" try to downcast each item to the dict of dtypes if present """
227+
228+
if dtypes is None:
229+
dtypes = dict()
230+
231+
values = self.values
232+
blocks = []
233+
for i, item in enumerate(self.items):
234+
235+
dtype = dtypes.get(item,self._downcast_dtype)
236+
if dtype is None:
237+
nv = _block_shape(values[i])
238+
blocks.append(make_block(nv, [ item ], self.ref_items))
239+
continue
240+
241+
nv = _possibly_downcast_to_dtype(values[i], np.dtype(dtype))
242+
nv = _block_shape(nv)
243+
blocks.append(make_block(nv, [ item ], self.ref_items))
244+
245+
return blocks
223246

224247
def astype(self, dtype, copy = True, raise_on_error = True):
225248
"""
@@ -563,6 +586,7 @@ def _try_cast_result(self, result):
563586
return _possibly_downcast_to_dtype(result, self.dtype)
564587

565588
class FloatBlock(NumericBlock):
589+
_downcast_dtype = 'int64'
566590

567591
def _can_hold_element(self, element):
568592
if isinstance(element, np.ndarray):
@@ -974,6 +998,9 @@ def shift(self, *args, **kwargs):
974998
def fillna(self, *args, **kwargs):
975999
return self.apply('fillna', *args, **kwargs)
9761000

1001+
def downcast(self, *args, **kwargs):
1002+
return self.apply('downcast', *args, **kwargs)
1003+
9771004
def astype(self, *args, **kwargs):
9781005
return self.apply('astype', *args, **kwargs)
9791006

pandas/tools/pivot.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
111111
table = table.sort_index(axis=1)
112112

113113
if fill_value is not None:
114-
table = table.fillna(value=fill_value)
114+
table = table.fillna(value=fill_value, downcast=True)
115115

116116
if margins:
117117
table = _add_margins(table, data, values, rows=rows,

pandas/tools/tests/test_pivot.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,26 @@ def test_pivot_table_multiple(self):
6969
expected = self.data.groupby(rows + [cols]).agg(np.mean).unstack()
7070
tm.assert_frame_equal(table, expected)
7171

72+
def test_pivot_dtypes(self):
73+
74+
# can convert dtypes
75+
f = DataFrame({'a' : ['cat', 'bat', 'cat', 'bat'], 'v' : [1,2,3,4], 'i' : ['a','b','a','b']})
76+
self.assert_(f.dtypes['v'] == 'int64')
77+
78+
z = pivot_table(f, values='v', rows=['a'], cols=['i'], fill_value=0, aggfunc=np.sum)
79+
result = z.get_dtype_counts()
80+
expected = Series(dict(int64 = 2))
81+
tm.assert_series_equal(result, expected)
82+
83+
# cannot convert dtypes
84+
f = DataFrame({'a' : ['cat', 'bat', 'cat', 'bat'], 'v' : [1.5,2.5,3.5,4.5], 'i' : ['a','b','a','b']})
85+
self.assert_(f.dtypes['v'] == 'float64')
86+
87+
z = pivot_table(f, values='v', rows=['a'], cols=['i'], fill_value=0, aggfunc=np.mean)
88+
result = z.get_dtype_counts()
89+
expected = Series(dict(float64 = 2))
90+
tm.assert_series_equal(result, expected)
91+
7292
def test_pivot_multi_values(self):
7393
result = pivot_table(self.data, values=['D', 'E'],
7494
rows='A', cols=['B', 'C'], fill_value=0)

0 commit comments

Comments
 (0)