|
2 | 2 |
|
3 | 3 | from __future__ import print_function |
4 | 4 |
|
5 | | -from datetime import timedelta, datetime |
| 5 | +from datetime import timedelta |
6 | 6 | from distutils.version import LooseVersion |
7 | 7 | import sys |
8 | 8 | import pytest |
@@ -642,173 +642,6 @@ def test_cumprod(self): |
642 | 642 | df.cumprod(0) |
643 | 643 | df.cumprod(1) |
644 | 644 |
|
645 | | - def test_rank(self): |
646 | | - tm._skip_if_no_scipy() |
647 | | - from scipy.stats import rankdata |
648 | | - |
649 | | - self.frame['A'][::2] = np.nan |
650 | | - self.frame['B'][::3] = np.nan |
651 | | - self.frame['C'][::4] = np.nan |
652 | | - self.frame['D'][::5] = np.nan |
653 | | - |
654 | | - ranks0 = self.frame.rank() |
655 | | - ranks1 = self.frame.rank(1) |
656 | | - mask = np.isnan(self.frame.values) |
657 | | - |
658 | | - fvals = self.frame.fillna(np.inf).values |
659 | | - |
660 | | - exp0 = np.apply_along_axis(rankdata, 0, fvals) |
661 | | - exp0[mask] = np.nan |
662 | | - |
663 | | - exp1 = np.apply_along_axis(rankdata, 1, fvals) |
664 | | - exp1[mask] = np.nan |
665 | | - |
666 | | - tm.assert_almost_equal(ranks0.values, exp0) |
667 | | - tm.assert_almost_equal(ranks1.values, exp1) |
668 | | - |
669 | | - # integers |
670 | | - df = DataFrame(np.random.randint(0, 5, size=40).reshape((10, 4))) |
671 | | - |
672 | | - result = df.rank() |
673 | | - exp = df.astype(float).rank() |
674 | | - tm.assert_frame_equal(result, exp) |
675 | | - |
676 | | - result = df.rank(1) |
677 | | - exp = df.astype(float).rank(1) |
678 | | - tm.assert_frame_equal(result, exp) |
679 | | - |
680 | | - def test_rank2(self): |
681 | | - df = DataFrame([[1, 3, 2], [1, 2, 3]]) |
682 | | - expected = DataFrame([[1.0, 3.0, 2.0], [1, 2, 3]]) / 3.0 |
683 | | - result = df.rank(1, pct=True) |
684 | | - tm.assert_frame_equal(result, expected) |
685 | | - |
686 | | - df = DataFrame([[1, 3, 2], [1, 2, 3]]) |
687 | | - expected = df.rank(0) / 2.0 |
688 | | - result = df.rank(0, pct=True) |
689 | | - tm.assert_frame_equal(result, expected) |
690 | | - |
691 | | - df = DataFrame([['b', 'c', 'a'], ['a', 'c', 'b']]) |
692 | | - expected = DataFrame([[2.0, 3.0, 1.0], [1, 3, 2]]) |
693 | | - result = df.rank(1, numeric_only=False) |
694 | | - tm.assert_frame_equal(result, expected) |
695 | | - |
696 | | - expected = DataFrame([[2.0, 1.5, 1.0], [1, 1.5, 2]]) |
697 | | - result = df.rank(0, numeric_only=False) |
698 | | - tm.assert_frame_equal(result, expected) |
699 | | - |
700 | | - df = DataFrame([['b', np.nan, 'a'], ['a', 'c', 'b']]) |
701 | | - expected = DataFrame([[2.0, nan, 1.0], [1.0, 3.0, 2.0]]) |
702 | | - result = df.rank(1, numeric_only=False) |
703 | | - tm.assert_frame_equal(result, expected) |
704 | | - |
705 | | - expected = DataFrame([[2.0, nan, 1.0], [1.0, 1.0, 2.0]]) |
706 | | - result = df.rank(0, numeric_only=False) |
707 | | - tm.assert_frame_equal(result, expected) |
708 | | - |
709 | | - # f7u12, this does not work without extensive workaround |
710 | | - data = [[datetime(2001, 1, 5), nan, datetime(2001, 1, 2)], |
711 | | - [datetime(2000, 1, 2), datetime(2000, 1, 3), |
712 | | - datetime(2000, 1, 1)]] |
713 | | - df = DataFrame(data) |
714 | | - |
715 | | - # check the rank |
716 | | - expected = DataFrame([[2., nan, 1.], |
717 | | - [2., 3., 1.]]) |
718 | | - result = df.rank(1, numeric_only=False, ascending=True) |
719 | | - tm.assert_frame_equal(result, expected) |
720 | | - |
721 | | - expected = DataFrame([[1., nan, 2.], |
722 | | - [2., 1., 3.]]) |
723 | | - result = df.rank(1, numeric_only=False, ascending=False) |
724 | | - tm.assert_frame_equal(result, expected) |
725 | | - |
726 | | - # mixed-type frames |
727 | | - self.mixed_frame['datetime'] = datetime.now() |
728 | | - self.mixed_frame['timedelta'] = timedelta(days=1, seconds=1) |
729 | | - |
730 | | - result = self.mixed_frame.rank(1) |
731 | | - expected = self.mixed_frame.rank(1, numeric_only=True) |
732 | | - tm.assert_frame_equal(result, expected) |
733 | | - |
734 | | - df = DataFrame({"a": [1e-20, -5, 1e-20 + 1e-40, 10, |
735 | | - 1e60, 1e80, 1e-30]}) |
736 | | - exp = DataFrame({"a": [3.5, 1., 3.5, 5., 6., 7., 2.]}) |
737 | | - tm.assert_frame_equal(df.rank(), exp) |
738 | | - |
739 | | - def test_rank_na_option(self): |
740 | | - tm._skip_if_no_scipy() |
741 | | - from scipy.stats import rankdata |
742 | | - |
743 | | - self.frame['A'][::2] = np.nan |
744 | | - self.frame['B'][::3] = np.nan |
745 | | - self.frame['C'][::4] = np.nan |
746 | | - self.frame['D'][::5] = np.nan |
747 | | - |
748 | | - # bottom |
749 | | - ranks0 = self.frame.rank(na_option='bottom') |
750 | | - ranks1 = self.frame.rank(1, na_option='bottom') |
751 | | - |
752 | | - fvals = self.frame.fillna(np.inf).values |
753 | | - |
754 | | - exp0 = np.apply_along_axis(rankdata, 0, fvals) |
755 | | - exp1 = np.apply_along_axis(rankdata, 1, fvals) |
756 | | - |
757 | | - tm.assert_almost_equal(ranks0.values, exp0) |
758 | | - tm.assert_almost_equal(ranks1.values, exp1) |
759 | | - |
760 | | - # top |
761 | | - ranks0 = self.frame.rank(na_option='top') |
762 | | - ranks1 = self.frame.rank(1, na_option='top') |
763 | | - |
764 | | - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values |
765 | | - fval1 = self.frame.T |
766 | | - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T |
767 | | - fval1 = fval1.fillna(np.inf).values |
768 | | - |
769 | | - exp0 = np.apply_along_axis(rankdata, 0, fval0) |
770 | | - exp1 = np.apply_along_axis(rankdata, 1, fval1) |
771 | | - |
772 | | - tm.assert_almost_equal(ranks0.values, exp0) |
773 | | - tm.assert_almost_equal(ranks1.values, exp1) |
774 | | - |
775 | | - # descending |
776 | | - |
777 | | - # bottom |
778 | | - ranks0 = self.frame.rank(na_option='top', ascending=False) |
779 | | - ranks1 = self.frame.rank(1, na_option='top', ascending=False) |
780 | | - |
781 | | - fvals = self.frame.fillna(np.inf).values |
782 | | - |
783 | | - exp0 = np.apply_along_axis(rankdata, 0, -fvals) |
784 | | - exp1 = np.apply_along_axis(rankdata, 1, -fvals) |
785 | | - |
786 | | - tm.assert_almost_equal(ranks0.values, exp0) |
787 | | - tm.assert_almost_equal(ranks1.values, exp1) |
788 | | - |
789 | | - # descending |
790 | | - |
791 | | - # top |
792 | | - ranks0 = self.frame.rank(na_option='bottom', ascending=False) |
793 | | - ranks1 = self.frame.rank(1, na_option='bottom', ascending=False) |
794 | | - |
795 | | - fval0 = self.frame.fillna((self.frame.min() - 1).to_dict()).values |
796 | | - fval1 = self.frame.T |
797 | | - fval1 = fval1.fillna((fval1.min() - 1).to_dict()).T |
798 | | - fval1 = fval1.fillna(np.inf).values |
799 | | - |
800 | | - exp0 = np.apply_along_axis(rankdata, 0, -fval0) |
801 | | - exp1 = np.apply_along_axis(rankdata, 1, -fval1) |
802 | | - |
803 | | - tm.assert_numpy_array_equal(ranks0.values, exp0) |
804 | | - tm.assert_numpy_array_equal(ranks1.values, exp1) |
805 | | - |
806 | | - def test_rank_axis(self): |
807 | | - # check if using axes' names gives the same result |
808 | | - df = pd.DataFrame([[2, 1], [4, 3]]) |
809 | | - tm.assert_frame_equal(df.rank(axis=0), df.rank(axis='index')) |
810 | | - tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns')) |
811 | | - |
812 | 645 | def test_sem(self): |
813 | 646 | alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) |
814 | 647 | self._check_stat_op('sem', alt) |
|
0 commit comments