Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1815,6 +1815,7 @@ Reshaping
- Bug in :func:`DataFrame.unstack` where a ``ValueError`` was raised when unstacking timezone aware values (:issue:`18338`)
- Bug in :func:`DataFrame.stack` where timezone aware values were converted to timezone naive values (:issue:`19420`)
- Bug in :func:`merge_asof` where a ``TypeError`` was raised when ``by_col`` were timezone aware values (:issue:`21184`)
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)

.. _whatsnew_0240.bug_fixes.sparse:

Expand Down
28 changes: 26 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,13 +757,15 @@ def _get_join_info(self):

if self.right_index:
if len(self.left) > 0:
join_index = self.left.index.take(left_indexer)
join_index = self._create_join_index(left_indexer,
using_left=True)
else:
join_index = self.right.index.take(right_indexer)
left_indexer = np.array([-1] * len(join_index))
elif self.left_index:
if len(self.right) > 0:
join_index = self.right.index.take(right_indexer)
join_index = self._create_join_index(right_indexer,
using_left=False)
else:
join_index = self.left.index.take(left_indexer)
right_indexer = np.array([-1] * len(join_index))
Expand All @@ -774,6 +776,28 @@ def _get_join_info(self):
join_index = join_index.astype(object)
return join_index, left_indexer, right_indexer

def _create_join_index(self, indexer, using_left=True):
if using_left:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a doc-string

index = self.left.index
other_index = self.right.index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe to make this more clear, pass how=, index, other_index as well (then you don't need the switch

how_check = 'right'
else:
index = self.right.index
other_index = self.left.index
how_check = 'left'

join_index = index.take(indexer)
if self.how == how_check and not isinstance(other_index, MultiIndex):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment here on what you are doing

absent = indexer == -1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

absent -> mask

if any(absent):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use np.any

# if values missing (-1) from target index,
# take from other index instead
join_list = join_index.to_numpy()
join_list[absent] = other_index.to_numpy()[absent]
join_index = Index(join_list, dtype=join_index.dtype,
name=join_index.name)
return join_index

def _get_merge_keys(self):
"""
Note: has side effects (copy/delete key columns)
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,25 @@ def test_merge_two_empty_df_no_division_error(self):
with np.errstate(divide='raise'):
merge(a, a, on=('a', 'b'))

def test_merge_on_index_with_more_values(self):
# GH 24212
# pd.merge gets [-1, -1, 0, 1] as right_indexer, ensure that -1 is
# interpreted as a missing value instead of the last element
df1 = pd.DataFrame([[1, 2], [2, 4], [3, 6], [4, 8]],
columns=['a', 'b'])
df2 = pd.DataFrame([[3, 30], [4, 40]],
columns=['a', 'c'])
df1.set_index('a', drop=False, inplace=True)
df2.set_index('a', inplace=True)
result = pd.merge(df1, df2, left_index=True, right_on='a', how='left')
expected = pd.DataFrame([[1, 2, np.nan],
[2, 4, np.nan],
[3, 6, 30.0],
[4, 8, 40.0]],
columns=['a', 'b', 'c'])
expected.set_index('a', drop=False, inplace=True)
assert_frame_equal(result, expected)


def _check_merge(x, y):
for how in ['inner', 'left', 'outer']:
Expand Down