-
- Notifications
You must be signed in to change notification settings - Fork 19.2k
pivot_table very slow on Categorical data; how about an observed keyword argument? #24923 #24953
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d1554c2 0662fa3 6121313 9f93ab9 ebe5972 416e9c8 5c62063 8663be2 a1e3afe 22637a3 088f277 672847b d97a077 9de99fa 9a9569f c8e085d 2516386 0efeed8 13168d2 8518833 58a8f6e 12b8fac 09af30b 6df9e6d a23b5d0 8d50e85 3d39dff ee696d9 12c0f82 f586e42 cf7e8f5 a3bcf1a bb7cfef 5921646 3c1720c File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…eed faster than those which are set to False.
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -2,6 +2,7 @@ | |
| | ||
| from collections import OrderedDict | ||
| from datetime import date, datetime, timedelta | ||
| import time | ||
| | ||
| import numpy as np | ||
| import pytest | ||
| | @@ -65,7 +66,7 @@ def test_pivot_table(self, observed): | |
| index + [columns])['D'].agg(np.mean).unstack() | ||
| tm.assert_frame_equal(table, expected) | ||
| | ||
| def test_pivot_table_categorical_observed(self, observed): | ||
| def test_pivot_table_categorical_observed_equal(self, observed): | ||
| # issue #24923 | ||
| df = pd.DataFrame({'col1': list('abcde'), | ||
jreback marked this conversation as resolved. Show resolved Hide resolved | ||
| 'col2': list('fghij'), | ||
| | @@ -84,6 +85,29 @@ def test_pivot_table_categorical_observed(self, observed): | |
| | ||
| tm.assert_frame_equal(result, expected) | ||
| | ||
| def test_pivot_table_categorical_observed_speed(self): | ||
| # issue #24923 | ||
| df = pd.DataFrame({'col1': list('abcde'), | ||
| 'col2': list('fghij'), | ||
| 'col3': [1, 2, 3, 4, 5]}) | ||
| | ||
| df.col1 = df.col1.astype('category') | ||
| df.col2 = df.col1.astype('category') | ||
| | ||
| start_time_observed_false = time.time() | ||
| ||
| df.pivot_table(index='col1', values='col3', | ||
| columns='col2', aggfunc=np.sum, | ||
| fill_value=0, observed=False) | ||
| total_time_observed_false = time.time() - start_time_observed_false | ||
| | ||
| start_time_observed_true = time.time() | ||
| df.pivot_table(index='col1', values='col3', | ||
| columns='col2', aggfunc=np.sum, | ||
| fill_value=0, observed=True) | ||
| total_time_observed_true = time.time() - start_time_observed_true | ||
| | ||
| assert total_time_observed_true < total_time_observed_false | ||
| | ||
| def test_pivot_table_nocols(self): | ||
| df = DataFrame({'rows': ['a', 'b', 'c'], | ||
| 'cols': ['x', 'y', 'z'], | ||
| | ||
Uh oh!
There was an error while loading. Please reload this page.