-
- Notifications
You must be signed in to change notification settings - Fork 19.2k
BUG: DataFrame.merge(suffixes=) does not respect None #24819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
82c52a4 dd605e0 af7f9ad 4d5e1a9 3f65bf1 ce7e4b8 90ca9cd e995a04 9c3dfbd 441e9a5 71729b2 File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -1971,16 +1971,19 @@ def items_overlap_with_suffix(left, lsuffix, right, rsuffix): | |
| raise ValueError('columns overlap but no suffix specified: ' | ||
| '{rename}'.format(rename=to_rename)) | ||
| | ||
| def lrenamer(x): | ||
| if x in to_rename: | ||
| return '{x}{lsuffix}'.format(x=x, lsuffix=lsuffix) | ||
| return x | ||
| def renamer(x, suffix): | ||
| """Rename the left and right indices. | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make a proper doc-string (Parameters / Returns) | ||
| | ||
| def rrenamer(x): | ||
| if x in to_rename: | ||
| return '{x}{rsuffix}'.format(x=x, rsuffix=rsuffix) | ||
| If there is overlap, and suffix is not None, add | ||
| suffix, otherwise, leave it as-is. | ||
| """ | ||
| if x in to_rename and suffix is not None: | ||
| return '{x}{suffix}'.format(x=x, suffix=suffix) | ||
| return x | ||
| | ||
| lrenamer = partial(renamer, suffix=lsuffix) | ||
| rrenamer = partial(renamer, suffix=rsuffix) | ||
| | ||
| return (_transform_index(left, lrenamer), | ||
| _transform_index(right, rrenamer)) | ||
| | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -159,9 +159,13 @@ def merge_ordered(left, right, on=None, | |
| left DataFrame | ||
| fill_method : {'ffill', None}, default None | ||
| Interpolation method for data | ||
| suffixes : 2-length sequence (tuple, list, ...) | ||
| Suffix to apply to overlapping column names in the left and right | ||
| side, respectively | ||
| suffixes : Sequence or None, default is ("_x", "_y") | ||
| ||
| A length-2 sequence where each element is optionally a string | ||
| indicating the suffix to add to overlapping column names in | ||
| `left` and `right` respectively. Pass a value of `None` instead | ||
| of a string to indicate that the column name from `left` or | ||
| `right` should be left as-is, with no suffix. At least one of the | ||
| values must not be None. | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a versionchanged 0.25.0 here | ||
| how : {'left', 'right', 'outer', 'inner'}, default 'outer' | ||
| * left: use only keys from left frame (SQL: left outer join) | ||
| * right: use only keys from right frame (SQL: right outer join) | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -1526,3 +1526,49 @@ def test_merge_series(on, left_on, right_on, left_index, right_index, nm): | |
| with pytest.raises(ValueError, match=msg): | ||
| result = pd.merge(a, b, on=on, left_on=left_on, right_on=right_on, | ||
| left_index=left_index, right_index=right_index) | ||
| | ||
| | ||
| @pytest.mark.parametrize("col1, col2, kwargs, expected_cols", [ | ||
| (0, 0, dict(suffixes=("", "_dup")), ["0", "0_dup"]), | ||
| (0, 0, dict(suffixes=(None, "_dup")), [0, "0_dup"]), | ||
| (0, 0, dict(suffixes=("_x", "_y")), ["0_x", "0_y"]), | ||
| ("a", 0, dict(suffixes=(None, "_y")), ["a", 0]), | ||
| (0.0, 0.0, dict(suffixes=("_x", None)), ["0.0_x", 0.0]), | ||
| ("b", "b", dict(suffixes=(None, "_y")), ["b", "b_y"]), | ||
| ("a", "a", dict(suffixes=("_x", None)), ["a_x", "a"]), | ||
| ("a", "b", dict(suffixes=("_x", None)), ["a", "b"]), | ||
| ("a", "a", dict(suffixes=[None, "_x"]), ["a", "a_x"]), | ||
| (0, 0, dict(suffixes=["_a", None]), ["0_a", 0]), | ||
| ("a", "a", dict(), ["a_x", "a_y"]), | ||
| (0, 0, dict(), ["0_x", "0_y"]) | ||
| ]) | ||
| def test_merge_suffix(col1, col2, kwargs, expected_cols): | ||
| # issue: 24782 | ||
| a = pd.DataFrame({col1: [1, 2, 3]}) | ||
jschendel marked this conversation as resolved. Show resolved Hide resolved | ||
| b = pd.DataFrame({col2: [4, 5, 6]}) | ||
| | ||
| expected = pd.DataFrame([[1, 4], [2, 5], [3, 6]], | ||
| columns=expected_cols) | ||
| | ||
| result = a.merge(b, left_index=True, right_index=True, **kwargs) | ||
| tm.assert_frame_equal(result, expected) | ||
| | ||
| result = pd.merge(a, b, left_index=True, right_index=True, **kwargs) | ||
| tm.assert_frame_equal(result, expected) | ||
| | ||
| | ||
| @pytest.mark.parametrize("col1, col2, suffixes", [ | ||
| ("a", "a", [None, None]), | ||
| ("a", "a", (None, None)), | ||
| ("a", "a", ("", None)), | ||
| (0, 0, [None, None]), | ||
| (0, 0, (None, "")) | ||
| ]) | ||
| def test_merge_error(col1, col2, suffixes): | ||
| ||
| # issue: 24782 | ||
| a = pd.DataFrame({col1: [1, 2, 3]}) | ||
| b = pd.DataFrame({col2: [3, 4, 5]}) | ||
| | ||
| msg = "columns overlap but no suffix specified" | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add here a comment like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thanks, added! | ||
| with pytest.raises(ValueError, match=msg): | ||
| pd.merge(a, b, left_index=True, right_index=True, suffixes=suffixes) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you be a bit more clear on what the previous sympton was, instead of 'doesn't work correctly'
double backticks on None
no space after the colon
(:issue:`24782`)