pandas-dev · jreback · Feb 6, 2019 · Feb 2, 2019 · Feb 2, 2019 · Feb 2, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -181,6 +181,7 @@ Groupby/Resample/Rolling
 Reshaping
 ^^^^^^^^^
 
+- Bug in :func:`pandas.merge` doesn't work correctly if None is in suffixes (:issue: `24782`).
 - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
 - :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
 -

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -1971,16 +1971,19 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix):
  raise ValueError('columns overlap but no suffix specified: '
  '{rename}'.format(rename=to_rename))
 
- def lrenamer(x):
- if x in to_rename:
- return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix)
- return x
+ def renamer(x, suffix):
+ """Rename the left and right indices.
 
- def rrenamer(x):
- if x in to_rename:
- return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix)
+ If there is overlap, and suffix is not None, add
+ suffix, otherwise, leave it as-is.
+ """
+ if x in to_rename and suffix is not None:
+ return '{x}{suffix}'.format(x=x, suffix=suffix)
  return x
 
+ lrenamer = partial(renamer, suffix=lsuffix)
+ rrenamer = partial(renamer, suffix=rsuffix)
+
  return (_transform_index(left, lrenamer),
  _transform_index(right, rrenamer))
 

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -159,9 +159,13 @@ def merge_ordered(left, right, on=None,
  left DataFrame
  fill_method : {'ffill', None}, default None
  Interpolation method for data
- suffixes : 2-length sequence (tuple, list, ...)
- Suffix to apply to overlapping column names in the left and right
- side, respectively
+ suffixes : Sequence or None, default is ("_x", "_y")
+ A length-2 sequence where each element is optionally a string
+ indicating the suffix to add to overlapping column names in
+ `left` and `right` respectively. Pass a value of `None` instead
+ of a string to indicate that the column name from `left` or
+ `right` should be left as-is, with no suffix. At least one of the
+ values must not be None.
  how : {'left', 'right', 'outer', 'inner'}, default 'outer'
  * left: use only keys from left frame (SQL: left outer join)
  * right: use only keys from right frame (SQL: right outer join)

diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
@@ -1526,3 +1526,49 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm):
  with pytest.raises(ValueError, match=msg):
  result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on,
  left_index=left_index, right_index=right_index)
+
+
+@pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [
+ (0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]),
+ (0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]),
+ (0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]),
+ ("a", 0, dict(suffixes=(None, "_y")), ["a", 0]),
+ (0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]),
+ ("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]),
+ ("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]),
+ ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]),
+ ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]),
+ (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]),
+ ("a", "a", dict(), ["a_x", "a_y"]),
+ (0, 0, dict(), ["0_x", "0_y"])
+])
+def test_merge_suffix(col1, col2, kwargs, expected_cols):
+ # issue: 24782
+ a = pd.DataFrame({col1: [1, 2, 3]})
+ b = pd.DataFrame({col2: [4, 5, 6]})
+
+ expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]],
+ columns=expected_cols)
+
+ result = a.merge(b, left_index=True, right_index=True, **kwargs)
+ tm.assert_frame_equal(result, expected)
+
+ result = pd.merge(a, b, left_index=True, right_index=True, **kwargs)
+ tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("col1, col2, suffixes", [
+ ("a", "a", [None, None]),
+ ("a", "a", (None, None)),
+ ("a", "a", ("", None)),
+ (0, 0, [None, None]),
+ (0, 0, (None, ""))
+])
+def test_merge_error(col1, col2, suffixes):
+ # issue: 24782
+ a = pd.DataFrame({col1: [1, 2, 3]})
+ b = pd.DataFrame({col2: [3, 4, 5]})
+
+ msg = "columns overlap but no suffix specified"
+ with pytest.raises(ValueError, match=msg):
+ pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes)