|
9 | 9 | from numpy import nan |
10 | 10 |
|
11 | 11 | from pandas import (date_range, bdate_range, Timestamp, |
12 | | - isnull, Index, MultiIndex, DataFrame, Series, |
| 12 | + Index, MultiIndex, DataFrame, Series, |
13 | 13 | concat, Panel, DatetimeIndex) |
14 | 14 | from pandas.errors import UnsupportedFunctionCall, PerformanceWarning |
15 | 15 | from pandas.util.testing import (assert_panel_equal, assert_frame_equal, |
@@ -87,229 +87,6 @@ def test_select_bad_cols(self): |
87 | 87 | # will have to rethink regex if you change message! |
88 | 88 | g[['A', 'C']] |
89 | 89 |
|
90 | | - def test_first_last_nth(self): |
91 | | - # tests for first / last / nth |
92 | | - grouped = self.df.groupby('A') |
93 | | - first = grouped.first() |
94 | | - expected = self.df.loc[[1, 0], ['B', 'C', 'D']] |
95 | | - expected.index = Index(['bar', 'foo'], name='A') |
96 | | - expected = expected.sort_index() |
97 | | - assert_frame_equal(first, expected) |
98 | | - |
99 | | - nth = grouped.nth(0) |
100 | | - assert_frame_equal(nth, expected) |
101 | | - |
102 | | - last = grouped.last() |
103 | | - expected = self.df.loc[[5, 7], ['B', 'C', 'D']] |
104 | | - expected.index = Index(['bar', 'foo'], name='A') |
105 | | - assert_frame_equal(last, expected) |
106 | | - |
107 | | - nth = grouped.nth(-1) |
108 | | - assert_frame_equal(nth, expected) |
109 | | - |
110 | | - nth = grouped.nth(1) |
111 | | - expected = self.df.loc[[2, 3], ['B', 'C', 'D']].copy() |
112 | | - expected.index = Index(['foo', 'bar'], name='A') |
113 | | - expected = expected.sort_index() |
114 | | - assert_frame_equal(nth, expected) |
115 | | - |
116 | | - # it works! |
117 | | - grouped['B'].first() |
118 | | - grouped['B'].last() |
119 | | - grouped['B'].nth(0) |
120 | | - |
121 | | - self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan |
122 | | - self.assertTrue(isnull(grouped['B'].first()['foo'])) |
123 | | - self.assertTrue(isnull(grouped['B'].last()['foo'])) |
124 | | - self.assertTrue(isnull(grouped['B'].nth(0)['foo'])) |
125 | | - |
126 | | - # v0.14.0 whatsnew |
127 | | - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
128 | | - g = df.groupby('A') |
129 | | - result = g.first() |
130 | | - expected = df.iloc[[1, 2]].set_index('A') |
131 | | - assert_frame_equal(result, expected) |
132 | | - |
133 | | - expected = df.iloc[[1, 2]].set_index('A') |
134 | | - result = g.nth(0, dropna='any') |
135 | | - assert_frame_equal(result, expected) |
136 | | - |
137 | | - def test_first_last_nth_dtypes(self): |
138 | | - |
139 | | - df = self.df_mixed_floats.copy() |
140 | | - df['E'] = True |
141 | | - df['F'] = 1 |
142 | | - |
143 | | - # tests for first / last / nth |
144 | | - grouped = df.groupby('A') |
145 | | - first = grouped.first() |
146 | | - expected = df.loc[[1, 0], ['B', 'C', 'D', 'E', 'F']] |
147 | | - expected.index = Index(['bar', 'foo'], name='A') |
148 | | - expected = expected.sort_index() |
149 | | - assert_frame_equal(first, expected) |
150 | | - |
151 | | - last = grouped.last() |
152 | | - expected = df.loc[[5, 7], ['B', 'C', 'D', 'E', 'F']] |
153 | | - expected.index = Index(['bar', 'foo'], name='A') |
154 | | - expected = expected.sort_index() |
155 | | - assert_frame_equal(last, expected) |
156 | | - |
157 | | - nth = grouped.nth(1) |
158 | | - expected = df.loc[[3, 2], ['B', 'C', 'D', 'E', 'F']] |
159 | | - expected.index = Index(['bar', 'foo'], name='A') |
160 | | - expected = expected.sort_index() |
161 | | - assert_frame_equal(nth, expected) |
162 | | - |
163 | | - # GH 2763, first/last shifting dtypes |
164 | | - idx = lrange(10) |
165 | | - idx.append(9) |
166 | | - s = Series(data=lrange(11), index=idx, name='IntCol') |
167 | | - self.assertEqual(s.dtype, 'int64') |
168 | | - f = s.groupby(level=0).first() |
169 | | - self.assertEqual(f.dtype, 'int64') |
170 | | - |
171 | | - def test_nth(self): |
172 | | - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
173 | | - g = df.groupby('A') |
174 | | - |
175 | | - assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A')) |
176 | | - assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A')) |
177 | | - assert_frame_equal(g.nth(2), df.loc[[]].set_index('A')) |
178 | | - assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A')) |
179 | | - assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A')) |
180 | | - assert_frame_equal(g.nth(-3), df.loc[[]].set_index('A')) |
181 | | - assert_series_equal(g.B.nth(0), df.set_index('A').B.iloc[[0, 2]]) |
182 | | - assert_series_equal(g.B.nth(1), df.set_index('A').B.iloc[[1]]) |
183 | | - assert_frame_equal(g[['B']].nth(0), |
184 | | - df.loc[[0, 2], ['A', 'B']].set_index('A')) |
185 | | - |
186 | | - exp = df.set_index('A') |
187 | | - assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]]) |
188 | | - assert_frame_equal(g.nth(-1, dropna='any'), exp.iloc[[1, 2]]) |
189 | | - |
190 | | - exp['B'] = np.nan |
191 | | - assert_frame_equal(g.nth(7, dropna='any'), exp.iloc[[1, 2]]) |
192 | | - assert_frame_equal(g.nth(2, dropna='any'), exp.iloc[[1, 2]]) |
193 | | - |
194 | | - # out of bounds, regression from 0.13.1 |
195 | | - # GH 6621 |
196 | | - df = DataFrame({'color': {0: 'green', |
197 | | - 1: 'green', |
198 | | - 2: 'red', |
199 | | - 3: 'red', |
200 | | - 4: 'red'}, |
201 | | - 'food': {0: 'ham', |
202 | | - 1: 'eggs', |
203 | | - 2: 'eggs', |
204 | | - 3: 'ham', |
205 | | - 4: 'pork'}, |
206 | | - 'two': {0: 1.5456590000000001, |
207 | | - 1: -0.070345000000000005, |
208 | | - 2: -2.4004539999999999, |
209 | | - 3: 0.46206000000000003, |
210 | | - 4: 0.52350799999999997}, |
211 | | - 'one': {0: 0.56573799999999996, |
212 | | - 1: -0.9742360000000001, |
213 | | - 2: 1.033801, |
214 | | - 3: -0.78543499999999999, |
215 | | - 4: 0.70422799999999997}}).set_index(['color', |
216 | | - 'food']) |
217 | | - |
218 | | - result = df.groupby(level=0, as_index=False).nth(2) |
219 | | - expected = df.iloc[[-1]] |
220 | | - assert_frame_equal(result, expected) |
221 | | - |
222 | | - result = df.groupby(level=0, as_index=False).nth(3) |
223 | | - expected = df.loc[[]] |
224 | | - assert_frame_equal(result, expected) |
225 | | - |
226 | | - # GH 7559 |
227 | | - # from the vbench |
228 | | - df = DataFrame(np.random.randint(1, 10, (100, 2)), dtype='int64') |
229 | | - s = df[1] |
230 | | - g = df[0] |
231 | | - expected = s.groupby(g).first() |
232 | | - expected2 = s.groupby(g).apply(lambda x: x.iloc[0]) |
233 | | - assert_series_equal(expected2, expected, check_names=False) |
234 | | - self.assertTrue(expected.name, 0) |
235 | | - self.assertEqual(expected.name, 1) |
236 | | - |
237 | | - # validate first |
238 | | - v = s[g == 1].iloc[0] |
239 | | - self.assertEqual(expected.iloc[0], v) |
240 | | - self.assertEqual(expected2.iloc[0], v) |
241 | | - |
242 | | - # this is NOT the same as .first (as sorted is default!) |
243 | | - # as it keeps the order in the series (and not the group order) |
244 | | - # related GH 7287 |
245 | | - expected = s.groupby(g, sort=False).first() |
246 | | - result = s.groupby(g, sort=False).nth(0, dropna='all') |
247 | | - assert_series_equal(result, expected) |
248 | | - |
249 | | - # doc example |
250 | | - df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) |
251 | | - g = df.groupby('A') |
252 | | - result = g.B.nth(0, dropna=True) |
253 | | - expected = g.B.first() |
254 | | - assert_series_equal(result, expected) |
255 | | - |
256 | | - # test multiple nth values |
257 | | - df = DataFrame([[1, np.nan], [1, 3], [1, 4], [5, 6], [5, 7]], |
258 | | - columns=['A', 'B']) |
259 | | - g = df.groupby('A') |
260 | | - |
261 | | - assert_frame_equal(g.nth(0), df.iloc[[0, 3]].set_index('A')) |
262 | | - assert_frame_equal(g.nth([0]), df.iloc[[0, 3]].set_index('A')) |
263 | | - assert_frame_equal(g.nth([0, 1]), df.iloc[[0, 1, 3, 4]].set_index('A')) |
264 | | - assert_frame_equal( |
265 | | - g.nth([0, -1]), df.iloc[[0, 2, 3, 4]].set_index('A')) |
266 | | - assert_frame_equal( |
267 | | - g.nth([0, 1, 2]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) |
268 | | - assert_frame_equal( |
269 | | - g.nth([0, 1, -1]), df.iloc[[0, 1, 2, 3, 4]].set_index('A')) |
270 | | - assert_frame_equal(g.nth([2]), df.iloc[[2]].set_index('A')) |
271 | | - assert_frame_equal(g.nth([3, 4]), df.loc[[]].set_index('A')) |
272 | | - |
273 | | - business_dates = pd.date_range(start='4/1/2014', end='6/30/2014', |
274 | | - freq='B') |
275 | | - df = DataFrame(1, index=business_dates, columns=['a', 'b']) |
276 | | - # get the first, fourth and last two business days for each month |
277 | | - key = (df.index.year, df.index.month) |
278 | | - result = df.groupby(key, as_index=False).nth([0, 3, -2, -1]) |
279 | | - expected_dates = pd.to_datetime( |
280 | | - ['2014/4/1', '2014/4/4', '2014/4/29', '2014/4/30', '2014/5/1', |
281 | | - '2014/5/6', '2014/5/29', '2014/5/30', '2014/6/2', '2014/6/5', |
282 | | - '2014/6/27', '2014/6/30']) |
283 | | - expected = DataFrame(1, columns=['a', 'b'], index=expected_dates) |
284 | | - assert_frame_equal(result, expected) |
285 | | - |
286 | | - def test_nth_multi_index(self): |
287 | | - # PR 9090, related to issue 8979 |
288 | | - # test nth on MultiIndex, should match .first() |
289 | | - grouped = self.three_group.groupby(['A', 'B']) |
290 | | - result = grouped.nth(0) |
291 | | - expected = grouped.first() |
292 | | - assert_frame_equal(result, expected) |
293 | | - |
294 | | - def test_nth_multi_index_as_expected(self): |
295 | | - # PR 9090, related to issue 8979 |
296 | | - # test nth on MultiIndex |
297 | | - three_group = DataFrame( |
298 | | - {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', |
299 | | - 'foo', 'foo', 'foo'], |
300 | | - 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', |
301 | | - 'two', 'two', 'one'], |
302 | | - 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', |
303 | | - 'dull', 'shiny', 'shiny', 'shiny']}) |
304 | | - grouped = three_group.groupby(['A', 'B']) |
305 | | - result = grouped.nth(0) |
306 | | - expected = DataFrame( |
307 | | - {'C': ['dull', 'dull', 'dull', 'dull']}, |
308 | | - index=MultiIndex.from_arrays([['bar', 'bar', 'foo', 'foo'], |
309 | | - ['one', 'two', 'one', 'two']], |
310 | | - names=['A', 'B'])) |
311 | | - assert_frame_equal(result, expected) |
312 | | - |
313 | 90 | def test_group_selection_cache(self): |
314 | 91 | # GH 12839 nth, head, and tail should return same result consistently |
315 | 92 | df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B']) |
|
0 commit comments