@@ -901,14 +901,78 @@ def test_merge_on_multikey(self):
901901 # TODO: columns aren't in the same order yet 
902902 assert_frame_equal (joined , expected .ix [:, joined .columns ])
903903
904+  left  =  self .data .join (self .to_join , on = ['key1' , 'key2' ], sort = True )
905+  right  =  expected .ix [:, joined .columns ].sort (['key1' , 'key2' ],
906+  kind = 'mergesort' )
907+  assert_frame_equal (left , right )
908+ 
909+  def  test_left_join_multi_index (self ):
910+  icols  =  ['1st' , '2nd' , '3rd' ]
911+ 
912+  def  bind_cols (df ):
913+  iord  =  lambda  a : 0  if  a  !=  a  else  ord (a )
914+  f  =  lambda  ts : ts .map (iord ) -  ord ('a' )
915+  return  f (df ['1st' ]) +  f (df ['3rd' ])*  1e2  +  df ['2nd' ].fillna (0 ) *  1e4 
916+ 
917+  def  run_asserts (left , right ):
918+  for  sort  in  [False , True ]:
919+  res  =  left .join (right , on = icols , how = 'left' , sort = sort )
920+ 
921+  self .assertTrue (len (left ) <  len (res ) +  1 )
922+  self .assertFalse (res ['4th' ].isnull ().any ())
923+  self .assertFalse (res ['5th' ].isnull ().any ())
924+ 
925+  tm .assert_series_equal (res ['4th' ], -  res ['5th' ])
926+  tm .assert_series_equal (res ['4th' ], bind_cols (res .iloc [:, :- 2 ]))
927+ 
928+  if  sort :
929+  tm .assert_frame_equal (res ,
930+  res .sort (icols , kind = 'mergesort' ))
931+ 
932+  out  =  merge (left , right .reset_index (), on = icols ,
933+  sort = sort , how = 'left' )
934+ 
935+  res .index  =  np .arange (len (res ))
936+  tm .assert_frame_equal (out , res )
937+ 
938+  lc  =  list (map (chr , np .arange (ord ('a' ), ord ('z' ) +  1 )))
939+  left  =  DataFrame (np .random .choice (lc , (5000 , 2 )),
940+  columns = ['1st' , '3rd' ])
941+  left .insert (1 , '2nd' , np .random .randint (0 , 1000 , len (left )))
942+ 
943+  i  =  np .random .permutation (len (left ))
944+  right  =  left .iloc [i ].copy ()
945+ 
946+  left ['4th' ] =  bind_cols (left )
947+  right ['5th' ] =  -  bind_cols (right )
948+  right .set_index (icols , inplace = True )
949+ 
950+  run_asserts (left , right )
951+ 
952+  # inject some nulls 
953+  left .loc [1 ::23 , '1st' ] =  np .nan 
954+  left .loc [2 ::37 , '2nd' ] =  np .nan 
955+  left .loc [3 ::43 , '3rd' ] =  np .nan 
956+  left ['4th' ] =  bind_cols (left )
957+ 
958+  i  =  np .random .permutation (len (left ))
959+  right  =  left .iloc [i , :- 1 ]
960+  right ['5th' ] =  -  bind_cols (right )
961+  right .set_index (icols , inplace = True )
962+ 
963+  run_asserts (left , right )
964+ 
904965 def  test_merge_right_vs_left (self ):
905966 # compare left vs right merge with multikey 
906-  merged1  =  self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
907-  right_index = True , how = 'left' )
908-  merged2  =  self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
909-  left_index = True , how = 'right' )
910-  merged2  =  merged2 .ix [:, merged1 .columns ]
911-  assert_frame_equal (merged1 , merged2 )
967+  for  sort  in  [False , True ]:
968+  merged1  =  self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
969+  right_index = True , how = 'left' , sort = sort )
970+ 
971+  merged2  =  self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
972+  left_index = True , how = 'right' , sort = sort )
973+ 
974+  merged2  =  merged2 .ix [:, merged1 .columns ]
975+  assert_frame_equal (merged1 , merged2 )
912976
913977 def  test_compress_group_combinations (self ):
914978
@@ -943,6 +1007,8 @@ def test_left_join_index_preserve_order(self):
9431007 expected .loc [(expected .k1  ==  1 ) &  (expected .k2  ==  'foo' ),'v2' ] =  7 
9441008
9451009 tm .assert_frame_equal (result , expected )
1010+  tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1011+  left .join (right , on = ['k1' , 'k2' ], sort = True ))
9461012
9471013 # test join with multi dtypes blocks 
9481014 left  =  DataFrame ({'k1' : [0 , 1 , 2 ] *  8 ,
@@ -961,6 +1027,8 @@ def test_left_join_index_preserve_order(self):
9611027 expected .loc [(expected .k1  ==  1 ) &  (expected .k2  ==  'foo' ),'v2' ] =  7 
9621028
9631029 tm .assert_frame_equal (result , expected )
1030+  tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1031+  left .join (right , on = ['k1' , 'k2' ], sort = True ))
9641032
9651033 # do a right join for an extra test 
9661034 joined  =  merge (right , left , left_index = True ,
@@ -1022,6 +1090,12 @@ def test_left_join_index_multi_match_multiindex(self):
10221090
10231091 tm .assert_frame_equal (result , expected )
10241092
1093+  result  =  left .join (right , on = ['cola' , 'colb' , 'colc' ],
1094+  how = 'left' , sort = True )
1095+ 
1096+  tm .assert_frame_equal (result ,
1097+  expected .sort (['cola' , 'colb' , 'colc' ], kind = 'mergesort' ))
1098+ 
10251099 # GH7331 - maintain left frame order in left merge 
10261100 right .reset_index (inplace = True )
10271101 right .columns  =  left .columns [:3 ].tolist () +  right .columns [- 1 :].tolist ()
@@ -1066,6 +1140,9 @@ def test_left_join_index_multi_match(self):
10661140
10671141 tm .assert_frame_equal (result , expected )
10681142
1143+  result  =  left .join (right , on = 'tag' , how = 'left' , sort = True )
1144+  tm .assert_frame_equal (result , expected .sort ('tag' , kind = 'mergesort' ))
1145+ 
10691146 # GH7331 - maintain left frame order in left merge 
10701147 result  =  merge (left , right .reset_index (), how = 'left' , on = 'tag' )
10711148 expected .index  =  np .arange (len (expected ))
@@ -1094,6 +1171,10 @@ def _test(dtype1,dtype2):
10941171
10951172 tm .assert_frame_equal (result , expected )
10961173
1174+  result  =  left .join (right , on = ['k1' , 'k2' ], sort = True )
1175+  expected .sort (['k1' , 'k2' ], kind = 'mergesort' , inplace = True )
1176+  tm .assert_frame_equal (result , expected )
1177+ 
10971178 for  d1  in  [np .int64 ,np .int32 ,np .int16 ,np .int8 ,np .uint8 ]:
10981179 for  d2  in  [np .int64 ,np .float64 ,np .float32 ,np .float16 ]:
10991180 _test (np .dtype (d1 ),np .dtype (d2 ))
0 commit comments