Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,26 @@ def _str_title(self) -> Self:

def _str_swapcase(self) -> Self:
return self._from_pyarrow_array(pc.utf8_swapcase(self._pa_array))
def _str_removeprefix(self, prefix: str):
if prefix == "":
return self
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
result = pc.if_else(starts_with, removed, self._pa_array)
return self._from_pyarrow_array(result)


def _str_removesuffix(self, suffix: str):
if suffix == "":
return self
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
result = pc.if_else(ends_with, removed, self._pa_array)
return self._from_pyarrow_array(result)



def _str_removeprefix(self, prefix: str):
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
result = pc.if_else(starts_with, removed, self._pa_array)
return self._from_pyarrow_array(result)

def _str_removesuffix(self, suffix: str):
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
result = pc.if_else(ends_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
Copy link
Member

@jorisvandenbossche jorisvandenbossche Dec 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def _str_removeprefix(self, prefix: str):
if prefix == "":
return self
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
result = pc.if_else(starts_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
def _str_removesuffix(self, suffix: str):
if suffix == "":
return self
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
result = pc.if_else(ends_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
def _str_removeprefix(self, prefix: str):
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
result = pc.if_else(starts_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
def _str_removesuffix(self, suffix: str):
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
result = pc.if_else(ends_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
def _str_removeprefix(self, prefix: str):
if prefix == "":
return self
starts_with = pc.starts_with(self._pa_array, pattern=prefix)
removed = pc.utf8_slice_codeunits(self._pa_array, len(prefix))
result = pc.if_else(starts_with, removed, self._pa_array)
return self._from_pyarrow_array(result)
def _str_removesuffix(self, suffix: str):
if prefix == "":
return self
ends_with = pc.ends_with(self._pa_array, pattern=suffix)
removed = pc.utf8_slice_codeunits(self._pa_array, 0, stop=-len(suffix))
result = pc.if_else(ends_with, removed, self._pa_array)
return self._from_pyarrow_array(result)

I think this should fix it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made those fixes as you suggested (added suffix=="" condition in _str_removesuffix ) still two checks seem to be failing, is that okay?


def _str_startswith(
self, pat: str | tuple[str, ...], na: Scalar | lib.NoDefault = lib.no_default
Expand Down
16 changes: 14 additions & 2 deletions pandas/tests/strings/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,12 @@ def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp):


@pytest.mark.parametrize(
"prefix, expected", [("a", ["b", " b c", "bc"]), ("ab", ["", "a b c", "bc"])]
"prefix, expected",
[
("a", ["b", " b c", "bc"]),
("ab", ["", "a b c", "bc"]),
("", ["ab", "a b c", "bc"]),
],
)
def test_removeprefix(any_string_dtype, prefix, expected):
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
Expand All @@ -554,7 +559,12 @@ def test_removeprefix(any_string_dtype, prefix, expected):


@pytest.mark.parametrize(
"suffix, expected", [("c", ["ab", "a b ", "b"]), ("bc", ["ab", "a b c", ""])]
"suffix, expected",
[
("c", ["ab", "a b ", "b"]),
("bc", ["ab", "a b c", ""]),
("", ["ab", "a b c", "bc"]),
],
)
def test_removesuffix(any_string_dtype, suffix, expected):
ser = Series(["ab", "a b c", "bc"], dtype=any_string_dtype)
Expand All @@ -563,6 +573,8 @@ def test_removesuffix(any_string_dtype, suffix, expected):
tm.assert_series_equal(result, ser_expected)




def test_string_slice_get_syntax(any_string_dtype):
ser = Series(
["YYY", "B", "C", "YYYYYYbYYY", "BYYYcYYY", np.nan, "CYYYBYYY", "dog", "cYYYt"],
Expand Down
Loading