Skip to content
Next Next commit
avoid floating points for integral floor division
  • Loading branch information
Rohan Jain committed Dec 29, 2023
commit aad3b2e16c1e8b71763cbe7514a81611cceead53
20 changes: 15 additions & 5 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,22 @@ def floordiv_compat(
left: pa.ChunkedArray | pa.Array | pa.Scalar,
right: pa.ChunkedArray | pa.Array | pa.Scalar,
) -> pa.ChunkedArray:
# Ensure int // int -> int mirroring Python/Numpy behavior
# as pc.floor(pc.divide_checked(int, int)) -> float
converted_left = cast_for_truediv(left, right)
result = pc.floor(pc.divide(converted_left, right))
if pa.types.is_integer(left.type) and pa.types.is_integer(right.type):
divided = pc.divide(left, right)
if pa.types.is_integer(divided.type):
has_remainder = pc.not_equal(pc.multiply(divided, right), left)
result = pc.if_else(
pc.less(divided, 0),
pc.if_else(has_remainder, pc.subtract(divided, 1), divided),
divided,
)
# Ensure compatibility with older versions of pandas where
# int8 // int64 returned int8 rather than int64.
result = result.cast(left.type)
else:
assert pa.types.is_floating(divided.type) or pa.types.is_decimal(
divided.type
)
result = pc.floor(divided)
return result

ARROW_ARITHMETIC_FUNCS = {
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3246,6 +3246,14 @@ def test_arrow_floordiv_large_values():
tm.assert_series_equal(result, expected)


def test_arrow_floordiv_large_integral_result():
# GH XXXXX
a = pd.Series([18014398509481983], dtype="int64[pyarrow]")
expected = pd.Series([18014398509481983], dtype="int64[pyarrow]")
result = a // 1
tm.assert_series_equal(result, expected)


def test_string_to_datetime_parsing_cast():
# GH 56266
string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]
Expand Down