Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -589,3 +589,4 @@ Other
^^^^^

- Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`)
- Bug in `Series.memory_usage` which assumes series will always have more than one element (:issue:`19368`)
26 changes: 16 additions & 10 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,14 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
cdef Py_ssize_t i, n
cdef int64_t s = 0

# The problem here is that...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comments are not necessary

# A SparseArray of size 1 that has fill_value = the only value
# will cause this

# n = 1
#
n = len(arr)
for i from 0 <= i < n:
for i in range(n):
s += arr[i].__sizeof__()
return s

Expand Down Expand Up @@ -131,10 +137,10 @@ def fast_unique_multiple(list arrays):
dict table = {}
object val, stub = 0

for i from 0 <= i < k:
for i in range(k):
buf = arrays[i]
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand All @@ -158,10 +164,10 @@ def fast_unique_multiple_list(list lists):
dict table = {}
object val, stub = 0

for i from 0 <= i < k:
for i in range(k):
buf = lists[i]
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand Down Expand Up @@ -200,7 +206,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True):

for buf in gen:
n = len(buf)
for j from 0 <= j < n:
for j in range(n):
val = buf[j]
if val not in table:
table[val] = stub
Expand Down Expand Up @@ -830,15 +836,15 @@ def count_level_2d(ndarray[uint8_t, ndim=2, cast=True] mask,
if axis == 0:
counts = np.zeros((max_bin, k), dtype='i8')
with nogil:
for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(n):
counts[labels[i], j] += mask[i, j]

else: # axis == 1
counts = np.zeros((n, max_bin), dtype='i8')
with nogil:
for i from 0 <= i < n:
for j from 0 <= j < k:
for i in range(n):
for j in range(k):
counts[i, labels[j]] += mask[i, j]

return counts
Expand Down
14 changes: 10 additions & 4 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,12 +1069,18 @@ def memory_usage(self, deep=False):
--------
numpy.ndarray.nbytes
"""
if hasattr(self.values, 'memory_usage'):
return self.values.memory_usage(deep=deep)
# Use sparse values if they exist for memory consumption
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather override this in SparseSeries/SparseArray

if hasattr(self.values, 'sp_values'):
values = self.values.sp_values
else:
values = self.values

if hasattr(values, 'memory_usage'):
return values.memory_usage(deep=deep)

v = self.values.nbytes
v = values.nbytes
if deep and is_object_dtype(self) and not PYPY:
v += lib.memory_usage_of_objects(self.values)
v += lib.memory_usage_of_objects(values)
return v

def factorize(self, sort=False, na_sentinel=-1):
Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from pandas.core.sparse.api import SparseSeries
from pandas.tests.series.test_api import SharedWithSparse

from itertools import product


def _test_data1():
# nan-based
Expand Down Expand Up @@ -971,6 +973,17 @@ def test_combine_first(self):
tm.assert_sp_series_equal(result, result2)
tm.assert_sp_series_equal(result, expected)

@pytest.mark.parametrize('deep,fill_values', [([True, False],
[0, 1, np.nan, None])])
def test_memory_usage_deep(self, deep, fill_values):
for deep, fill_value in product(deep, fill_values):
sparse_series = SparseSeries(fill_values, fill_value=fill_value)
dense_series = Series(fill_values)
sparse_usage = sparse_series.memory_usage(deep=deep)
dense_usage = dense_series.memory_usage(deep=deep)

assert sparse_usage < dense_usage


class TestSparseHandlingMultiIndexes(object):

Expand Down