@@ -901,14 +901,78 @@ def test_merge_on_multikey(self):
901901 # TODO: columns aren't in the same order yet
902902 assert_frame_equal (joined , expected .ix [:, joined .columns ])
903903
904+ left = self .data .join (self .to_join , on = ['key1' , 'key2' ], sort = True )
905+ right = expected .ix [:, joined .columns ].sort (['key1' , 'key2' ],
906+ kind = 'mergesort' )
907+ assert_frame_equal (left , right )
908+
909+ def test_left_join_multi_index (self ):
910+ icols = ['1st' , '2nd' , '3rd' ]
911+
912+ def bind_cols (df ):
913+ iord = lambda a : 0 if a != a else ord (a )
914+ f = lambda ts : ts .map (iord ) - ord ('a' )
915+ return f (df ['1st' ]) + f (df ['3rd' ])* 1e2 + df ['2nd' ].fillna (0 ) * 1e4
916+
917+ def run_asserts (left , right ):
918+ for sort in [False , True ]:
919+ res = left .join (right , on = icols , how = 'left' , sort = sort )
920+
921+ self .assertTrue (len (left ) < len (res ) + 1 )
922+ self .assertFalse (res ['4th' ].isnull ().any ())
923+ self .assertFalse (res ['5th' ].isnull ().any ())
924+
925+ tm .assert_series_equal (res ['4th' ], - res ['5th' ])
926+ tm .assert_series_equal (res ['4th' ], bind_cols (res .iloc [:, :- 2 ]))
927+
928+ if sort :
929+ tm .assert_frame_equal (res ,
930+ res .sort (icols , kind = 'mergesort' ))
931+
932+ out = merge (left , right .reset_index (), on = icols ,
933+ sort = sort , how = 'left' )
934+
935+ res .index = np .arange (len (res ))
936+ tm .assert_frame_equal (out , res )
937+
938+ lc = list (map (chr , np .arange (ord ('a' ), ord ('z' ) + 1 )))
939+ left = DataFrame (np .random .choice (lc , (5000 , 2 )),
940+ columns = ['1st' , '3rd' ])
941+ left .insert (1 , '2nd' , np .random .randint (0 , 1000 , len (left )))
942+
943+ i = np .random .permutation (len (left ))
944+ right = left .iloc [i ].copy ()
945+
946+ left ['4th' ] = bind_cols (left )
947+ right ['5th' ] = - bind_cols (right )
948+ right .set_index (icols , inplace = True )
949+
950+ run_asserts (left , right )
951+
952+ # inject some nulls
953+ left .loc [1 ::23 , '1st' ] = np .nan
954+ left .loc [2 ::37 , '2nd' ] = np .nan
955+ left .loc [3 ::43 , '3rd' ] = np .nan
956+ left ['4th' ] = bind_cols (left )
957+
958+ i = np .random .permutation (len (left ))
959+ right = left .iloc [i , :- 1 ]
960+ right ['5th' ] = - bind_cols (right )
961+ right .set_index (icols , inplace = True )
962+
963+ run_asserts (left , right )
964+
904965 def test_merge_right_vs_left (self ):
905966 # compare left vs right merge with multikey
906- merged1 = self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
907- right_index = True , how = 'left' )
908- merged2 = self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
909- left_index = True , how = 'right' )
910- merged2 = merged2 .ix [:, merged1 .columns ]
911- assert_frame_equal (merged1 , merged2 )
967+ for sort in [False , True ]:
968+ merged1 = self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
969+ right_index = True , how = 'left' , sort = sort )
970+
971+ merged2 = self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
972+ left_index = True , how = 'right' , sort = sort )
973+
974+ merged2 = merged2 .ix [:, merged1 .columns ]
975+ assert_frame_equal (merged1 , merged2 )
912976
913977 def test_compress_group_combinations (self ):
914978
@@ -943,6 +1007,8 @@ def test_left_join_index_preserve_order(self):
9431007 expected .loc [(expected .k1 == 1 ) & (expected .k2 == 'foo' ),'v2' ] = 7
9441008
9451009 tm .assert_frame_equal (result , expected )
1010+ tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1011+ left .join (right , on = ['k1' , 'k2' ], sort = True ))
9461012
9471013 # test join with multi dtypes blocks
9481014 left = DataFrame ({'k1' : [0 , 1 , 2 ] * 8 ,
@@ -961,6 +1027,8 @@ def test_left_join_index_preserve_order(self):
9611027 expected .loc [(expected .k1 == 1 ) & (expected .k2 == 'foo' ),'v2' ] = 7
9621028
9631029 tm .assert_frame_equal (result , expected )
1030+ tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1031+ left .join (right , on = ['k1' , 'k2' ], sort = True ))
9641032
9651033 # do a right join for an extra test
9661034 joined = merge (right , left , left_index = True ,
@@ -1022,6 +1090,12 @@ def test_left_join_index_multi_match_multiindex(self):
10221090
10231091 tm .assert_frame_equal (result , expected )
10241092
1093+ result = left .join (right , on = ['cola' , 'colb' , 'colc' ],
1094+ how = 'left' , sort = True )
1095+
1096+ tm .assert_frame_equal (result ,
1097+ expected .sort (['cola' , 'colb' , 'colc' ], kind = 'mergesort' ))
1098+
10251099 # GH7331 - maintain left frame order in left merge
10261100 right .reset_index (inplace = True )
10271101 right .columns = left .columns [:3 ].tolist () + right .columns [- 1 :].tolist ()
@@ -1066,6 +1140,9 @@ def test_left_join_index_multi_match(self):
10661140
10671141 tm .assert_frame_equal (result , expected )
10681142
1143+ result = left .join (right , on = 'tag' , how = 'left' , sort = True )
1144+ tm .assert_frame_equal (result , expected .sort ('tag' , kind = 'mergesort' ))
1145+
10691146 # GH7331 - maintain left frame order in left merge
10701147 result = merge (left , right .reset_index (), how = 'left' , on = 'tag' )
10711148 expected .index = np .arange (len (expected ))
@@ -1094,6 +1171,10 @@ def _test(dtype1,dtype2):
10941171
10951172 tm .assert_frame_equal (result , expected )
10961173
1174+ result = left .join (right , on = ['k1' , 'k2' ], sort = True )
1175+ expected .sort (['k1' , 'k2' ], kind = 'mergesort' , inplace = True )
1176+ tm .assert_frame_equal (result , expected )
1177+
10971178 for d1 in [np .int64 ,np .int32 ,np .int16 ,np .int8 ,np .uint8 ]:
10981179 for d2 in [np .int64 ,np .float64 ,np .float32 ,np .float16 ]:
10991180 _test (np .dtype (d1 ),np .dtype (d2 ))
0 commit comments