Skip to content

Commit 9987be5

Browse files
committed
Fixes nan values after pandas update, add documentation example to the unit test list
1 parent 5b8d9a4 commit 9987be5

File tree

2 files changed

+60
-7
lines changed

2 files changed

+60
-7
lines changed

_unittests/ut_df/test_pandas_groupbynan.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
# coding: utf-8
12
"""
23
@brief test log(time=1s)
34
"""
45
import unittest
56
import pandas
67
import numpy
78
from scipy.sparse.linalg import lsqr as sparse_lsqr
8-
from pyquickhelper.pycode import ExtTestCase
9+
from pyquickhelper.pycode import ExtTestCase, ignore_warnings
910
from pandas_streaming.df import pandas_groupby_nan, numpy_types
1011

1112

@@ -102,6 +103,40 @@ def test_pandas_groupbynan_regular_nanback(self):
102103
lambda: pandas_groupby_nan(df, ["a", "cc"], nanback=True).sum(),
103104
NotImplementedError)
104105

106+
def test_pandas_groupbynan_doc(self):
107+
data = [dict(a=2, ind="a", n=1),
108+
dict(a=2, ind="a"),
109+
dict(a=3, ind="b"),
110+
dict(a=30)]
111+
df = pandas.DataFrame(data)
112+
gr2 = pandas_groupby_nan(df, ["ind"]).sum()
113+
ind = list(gr2['ind'])
114+
self.assertTrue(numpy.isnan(ind[-1]))
115+
val = list(gr2['a'])
116+
self.assertEqual(val[-1], 30)
117+
118+
@ignore_warnings(UserWarning)
119+
def test_pandas_groupbynan_doc2(self):
120+
data = [dict(a=2, ind="a", n=1),
121+
dict(a=2, ind="a"),
122+
dict(a=3, ind="b"),
123+
dict(a=30)]
124+
df = pandas.DataFrame(data)
125+
gr2 = pandas_groupby_nan(df, ["ind", "a"], nanback=False).sum()
126+
ind = list(gr2['ind'])
127+
self.assertEqual(ind[-1], "²nan")
128+
129+
def test_pandas_groupbynan_doc3(self):
130+
data = [dict(a=2, ind="a", n=1),
131+
dict(a=2, ind="a"),
132+
dict(a=3, ind="b"),
133+
dict(a=30)]
134+
df = pandas.DataFrame(data)
135+
self.assertRaise(lambda: pandas_groupby_nan(df, ["ind", "n"]).sum(),
136+
NotImplementedError)
137+
# ind = list(gr2['ind'])
138+
# self.assertTrue(numpy.isnan(ind[-1]))
139+
105140

106141
if __name__ == "__main__":
107142
unittest.main()

pandas_streaming/df/dataframe_helpers.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def pandas_fillna(df, by, hasna=None, suffix=None):
289289
:param suffix: use a prefix for the NaN value
290290
:return: list of values chosen for each column, new dataframe (new copy)
291291
"""
292-
suffix = suffix if suffix else "²"
292+
suffix = suffix if suffix else nan"
293293
df = df.copy()
294294
rep = {}
295295
for c in by:
@@ -364,7 +364,10 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
364364
365365
from pandas import DataFrame
366366
367-
data = [dict(a=2, ind="a", n=1), dict(a=2, ind="a"), dict(a=3, ind="b"), dict(a=30)]
367+
data = [dict(a=2, ind="a", n=1),
368+
dict(a=2, ind="a"),
369+
dict(a=3, ind="b"),
370+
dict(a=30)]
368371
df = DataFrame(data)
369372
print(df)
370373
gr = df.groupby(["ind"]).sum()
@@ -378,7 +381,10 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
378381
from pandas import DataFrame
379382
from pandas_streaming.df import pandas_groupby_nan
380383
381-
data = [dict(a=2, ind="a", n=1), dict(a=2, ind="a"), dict(a=3, ind="b"), dict(a=30)]
384+
data = [dict(a=2, ind="a", n=1),
385+
dict(a=2, ind="a"),
386+
dict(a=3, ind="b"),
387+
dict(a=30)]
382388
df = DataFrame(data)
383389
gr2 = pandas_groupby_nan(df, ["ind"]).sum()
384390
print(gr2)
@@ -436,10 +442,22 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True
436442
res.grouper.groupings[0].grouping_vector = arr
437443
if (hasattr(res.grouper.groupings[0], '_cache') and
438444
'result_index' in res.grouper.groupings[0]._cache):
439-
res.grouper.groupings[0]._cache = {}
445+
index = res.grouper.groupings[0]._cache['result_index']
446+
if len(rep) == 1:
447+
key = list(rep.values())[0]
448+
new_index = numpy.array(index)
449+
for i in range(0, len(new_index)): # pylint: disable=C0200
450+
if new_index[i] == key:
451+
new_index[i] = numpy.nan
452+
res.grouper.groupings[0]._cache['result_index'] = (
453+
index.__class__(new_index))
454+
else:
455+
raise NotImplementedError(
456+
"NaN values not implemented for multiindex.")
440457
else:
441-
raise NotImplementedError("Not implemented for type: {0}".format(
442-
type(res.grouper.groupings[0].grouper)))
458+
raise NotImplementedError(
459+
"Not implemented for type: {0}".format(
460+
type(res.grouper.groupings[0].grouper)))
443461
res.grouper._cache['result_index'] = res.grouper.groupings[0]._group_index
444462
else:
445463
if not nanback:

0 commit comments

Comments
 (0)