pandas-dev · rohanjain101 · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023 · Dec 29, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -776,6 +776,7 @@ Timezones
 Numeric
 ^^^^^^^
 - Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`)
+- Bug in :meth:`Series.__floordiv__` and :meth:`Series.__truediv__` for :class:`ArrowDtype` with integral dtypes raising for large divisors (:issue:`56706`)
 - Bug in :meth:`Series.__floordiv__` for :class:`ArrowDtype` with integral dtypes raising for large values (:issue:`56645`)
 - Bug in :meth:`Series.pow` not filling missing values correctly (:issue:`55512`)
 

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -109,7 +109,7 @@
 
  def cast_for_truediv(
  arrow_array: pa.ChunkedArray, pa_object: pa.Array | pa.Scalar
- ) -> pa.ChunkedArray:
+ ) -> tuple[pa.ChunkedArray, pa.Array | pa.Scalar]:
  # Ensure int / int -> float mirroring Python/Numpy behavior
  # as pc.divide_checked(int, int) -> int
  if pa.types.is_integer(arrow_array.type) and pa.types.is_integer(
@@ -120,19 +120,51 @@ def cast_for_truediv(
  # Intentionally not using arrow_array.cast because it could be a scalar
  # value in reflected case, and safe=False only added to
  # scalar cast in pyarrow 13.
- return pc.cast(arrow_array, pa.float64(), safe=False)
- return arrow_array
+ # In arrow, common type between integral and float64 is float64,
+ # but integral type is safe casted to float64, to mirror python
+ # and numpy, we want an unsafe cast, so we cast both operands to
+ # to float64 before invoking arrow.
+ return pc.cast(arrow_array, pa.float64(), safe=False), pc.cast(
+ pa_object, pa.float64(), safe=False
+ )
+
+ return arrow_array, pa_object
 
  def floordiv_compat(
  left: pa.ChunkedArray | pa.Array | pa.Scalar,
  right: pa.ChunkedArray | pa.Array | pa.Scalar,
  ) -> pa.ChunkedArray:
- # Ensure int // int -> int mirroring Python/Numpy behavior
- # as pc.floor(pc.divide_checked(int, int)) -> float
- converted_left = cast_for_truediv(left, right)
- result = pc.floor(pc.divide(converted_left, right))
  if pa.types.is_integer(left.type) and pa.types.is_integer(right.type):
- result = result.cast(left.type)
+ # Use divide_checked to ensure cases like -9223372036854775808 // -1
+ # don't silently overflow.
+ divided = pc.divide_checked(left, right)
+ # GH 56676: avoid storing intermediate calculating in floating point type.
+ has_remainder = pc.not_equal(pc.multiply(divided, right), left)
+ result = pc.if_else(
+ # Pass a typed arrow scalar rather than stdlib int
+ # which always inferred as int64, to prevent overflow
+ # in case of large uint64 values.
+ pc.and_(
+ pc.less(
+ pc.bit_wise_xor(left, right), pa.scalar(0, type=divided.type)
+ ),
+ has_remainder,
+ ),
+ # GH 55561: floordiv should round towards negative infinity.
+ # pc.divide_checked for integral types rounds towards 0.
+ # Avoid using subtract_checked which would incorrectly raise
+ # for -9223372036854775808 // 1, because if integer overflow
+ # occurs, then has_remainder should be false, and overflowed
+ # value is discarded.
+ pc.subtract(divided, pa.scalar(1, type=divided.type)),
+ divided,
+ )
+ else:
+ # Use divide instead of divide_checked to match numpy
+ # floordiv where divide by 0 returns infinity for floating
+ # point types.
+ divided = pc.divide(left, right)
+ result = pc.floor(divided)
  return result
 
  ARROW_ARITHMETIC_FUNCS = {
@@ -142,8 +174,8 @@ def floordiv_compat(
  "rsub": lambda x, y: pc.subtract_checked(y, x),
  "mul": pc.multiply_checked,
  "rmul": lambda x, y: pc.multiply_checked(y, x),
- "truediv": lambda x, y: pc.divide(cast_for_truediv(x, y), y),
- "rtruediv": lambda x, y: pc.divide(y, cast_for_truediv(x, y)),
+ "truediv": lambda x, y: pc.divide(*cast_for_truediv(x, y)),
+ "rtruediv": lambda x, y: pc.divide(*cast_for_truediv(y, x)),
  "floordiv": lambda x, y: floordiv_compat(x, y),
  "rfloordiv": lambda x, y: floordiv_compat(y, x),
  "mod": NotImplemented,

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -905,8 +905,9 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
  else:
  assert pa.types.is_decimal(alt_dtype.pyarrow_dtype)
  return expected.astype(alt_dtype)
-
- else:
+ elif op_name not in ["__floordiv__", "__rfloordiv__"] or isinstance(
+ other, pd.Series
+ ):
  pa_expected = pa_expected.cast(orig_pa_type)
 
  pd_expected = type(expected_data._values)(pa_expected)
@@ -3239,13 +3240,71 @@ def test_arrow_floordiv():
 
 
 def test_arrow_floordiv_large_values():
- # GH 55561
+ # GH 56645
  a = pd.Series([1425801600000000000], dtype="int64[pyarrow]")
  expected = pd.Series([1425801600000], dtype="int64[pyarrow]")
  result = a // 1_000_000
  tm.assert_series_equal(result, expected)
 
 
+def test_arrow_floordiv_large_integral_result():
+ # GH 56676
+ a = pd.Series([18014398509481983, -9223372036854775808], dtype="int64[pyarrow]")
+ result = a // 1
+ tm.assert_series_equal(result, a)
+
+
+def test_arrow_floordiv_larger_divisor():
+ # GH 56676
+ a = pd.Series([-23], dtype="int64[pyarrow]")
+ result = a // 24
+ expected = pd.Series([-1], dtype="int64[pyarrow]")
+ tm.assert_series_equal(result, expected)
+
+
+def test_arrow_floordiv_integral_invalid():
+ # GH 56676
+ a = pd.Series([-9223372036854775808], dtype="int64[pyarrow]")
+ with pytest.raises(pa.lib.ArrowInvalid, match="overflow"):
+ a // -1
+ with pytest.raises(pa.lib.ArrowInvalid, match="divide by zero"):
+ a // 0
+
+
+def test_arrow_floordiv_floating_0_divisor():
+ # GH 56676
+ a = pd.Series([2], dtype="double[pyarrow]")
+ result = a // 0
+ expected = pd.Series([float("inf")], dtype="double[pyarrow]")
+ tm.assert_series_equal(result, expected)
+
+
+def test_arrow_floordiv_no_overflow():
+ # GH 56676
+ a = pd.Series([9223372036854775808], dtype="uint64[pyarrow]")
+ b = pd.Series([1], dtype="uint64[pyarrow]")
+ result = a // b
+ tm.assert_series_equal(result, a)
+
+
+def test_arrow_true_division_large_divisor():
+ # GH 56706
+ a = pd.Series([0], dtype="int64[pyarrow]")
+ b = pd.Series([18014398509481983], dtype="int64[pyarrow]")
+ expected = pd.Series([0], dtype="float64[pyarrow]")
+ result = a / b
+ tm.assert_series_equal(result, expected)
+
+
+def test_arrow_floor_division_large_divisor():
+ # GH 56706
+ a = pd.Series([0], dtype="int64[pyarrow]")
+ b = pd.Series([18014398509481983], dtype="int64[pyarrow]")
+ expected = pd.Series([0], dtype="int64[pyarrow]")
+ result = a // b
+ tm.assert_series_equal(result, expected)
+
+
 def test_string_to_datetime_parsing_cast():
  # GH 56266
  string_dates = ["2020-01-01 04:30:00", "2020-01-02 00:00:00", "2020-01-03 00:00:00"]