@@ -217,6 +217,16 @@ def test_describe(self):
217217 ).set_index ('levels' )
218218 tm .assert_frame_equal (desc , expected )
219219
220+ # check unused levels
221+ cat = self .factor .copy ()
222+ cat .levels = ["a" ,"b" ,"c" ,"d" ]
223+ desc = cat .describe ()
224+ expected = DataFrame .from_dict (dict (counts = [3 , 2 , 3 , np .nan ],
225+ freqs = [3 / 8. , 2 / 8. , 3 / 8. , np .nan ],
226+ levels = ['a' , 'b' , 'c' , 'd' ])
227+ ).set_index ('levels' )
228+ tm .assert_frame_equal (desc , expected )
229+
220230 # check an integer one
221231 desc = Categorical ([1 ,2 ,3 ,1 ,2 ,3 ,3 ,2 ,1 ,1 ,1 ]).describe ()
222232 expected = DataFrame .from_dict (dict (counts = [5 , 3 , 3 ],
@@ -226,6 +236,29 @@ def test_describe(self):
226236 ).set_index ('levels' )
227237 tm .assert_frame_equal (desc , expected )
228238
239+ # https://github.com/pydata/pandas/issues/3678
240+ # describe should work with NaN
241+ cat = pd .Categorical ([np .nan ,1 , 2 , 2 ])
242+ desc = cat .describe ()
243+ expected = DataFrame .from_dict (dict (counts = [1 , 2 , 1 ],
244+ freqs = [1 / 4. , 2 / 4. , 1 / 4. ],
245+ levels = [1 ,2 ,np .nan ]
246+ )
247+ ).set_index ('levels' )
248+ tm .assert_frame_equal (desc , expected )
249+
250+ # having NaN as level and as "not available" should also print two NaNs in describe!
251+ cat = pd .Categorical ([np .nan ,1 , 2 , 2 ])
252+ cat .levels = [1 ,2 ,np .nan ]
253+ desc = cat .describe ()
254+ expected = DataFrame .from_dict (dict (counts = [1 , 2 , np .nan , 1 ],
255+ freqs = [1 / 4. , 2 / 4. , np .nan , 1 / 4. ],
256+ levels = [1 ,2 ,np .nan ,np .nan ]
257+ )
258+ ).set_index ('levels' )
259+ tm .assert_frame_equal (desc , expected )
260+
261+
229262 def test_print (self ):
230263 expected = [" a" , " b" , " b" , " a" , " a" , " c" , " c" , " c" ,
231264 "Levels (3, object): [a < b < c]" ]
0 commit comments