pandas-dev
diff --git a/‎doc/source/whatsnew/v0.25.0.rst‎
Lines changed: 2 additions & 0 deletions b/‎doc/source/whatsnew/v0.25.0.rst‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎pandas/_libs/lib.pyx‎
Lines changed: 15 additions & 10 deletions b/‎pandas/_libs/lib.pyx‎
Lines changed: 15 additions & 10 deletions
diff --git a/‎pandas/core/tools/numeric.py‎
Lines changed: 8 additions & 0 deletions b/‎pandas/core/tools/numeric.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎pandas/tests/tools/test_numeric.py‎
Lines changed: 133 additions & 15 deletions b/‎pandas/tests/tools/test_numeric.py‎
Lines changed: 133 additions & 15 deletions
@@ -110,6 +110,8 @@ Timezones
 Numeric
 ^^^^^^^
 
+- Bug in :meth:`to_numeric` in which large negative numbers were being improperly handled (:issue:`24910`)
+- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`)
 -
 -
 -
 
@@ -1828,7 +1828,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
  except (ValueError, OverflowError, TypeError):
  pass
 
- # otherwise, iterate and do full infererence
+ # Otherwise, iterate and do full inference.
  cdef:
  int status, maybe_int
  Py_ssize_t i, n = values.size
@@ -1865,10 +1865,10 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
  else:
  seen.float_ = True
 
- if val <= oINT64_MAX:
+ if oINT64_MIN <= val <= oINT64_MAX:
  ints[i] = val
 
- if seen.sint_ and seen.uint_:
+ if val < oINT64_MIN or (seen.sint_ and seen.uint_):
  seen.float_ = True
 
  elif util.is_bool_object(val):
@@ -1910,23 +1910,28 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
  else:
  seen.saw_int(as_int)
 
- if not (seen.float_ or as_int in na_values):
+ if as_int not in na_values:
  if as_int < oINT64_MIN or as_int > oUINT64_MAX:
- raise ValueError('Integer out of range.')
+ if seen.coerce_numeric:
+ seen.float_ = True
+ else:
+ raise ValueError("Integer out of range.")
+ else:
+ if as_int >= 0:
+ uints[i] = as_int
 
- if as_int >= 0:
- uints[i] = as_int
- if as_int <= oINT64_MAX:
- ints[i] = as_int
+ if as_int <= oINT64_MAX:
+ ints[i] = as_int
 
  seen.float_ = seen.float_ or (seen.uint_ and seen.sint_)
  else:
  seen.float_ = True
  except (TypeError, ValueError) as e:
  if not seen.coerce_numeric:
- raise type(e)(str(e) + ' at position {pos}'.format(pos=i))
+ raise type(e)(str(e) + " at position {pos}".format(pos=i))
  elif "uint64" in str(e): # Exception from check functions.
  raise
+
  seen.saw_null()
  floats[i] = NaN
 
 
@@ -19,6 +19,14 @@ def to_numeric(arg, errors='raise', downcast=None):
  depending on the data supplied. Use the `downcast` parameter
  to obtain other dtypes.
 
+ Please note that precision loss may occur if really large numbers
+ are passed in. Due to the internal limitations of `ndarray`, if
+ numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
+ or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
+ passed in, it is very likely they will be converted to float so that
+ they can stored in an `ndarray`. These warnings apply similarly to
+ `Series` since it internally leverages `ndarray`.
+
  Parameters
  ----------
  arg : scalar, list, tuple, 1-d array, or Series
 
@@ -4,11 +4,50 @@
 from numpy import iinfo
 import pytest
 
+import pandas.compat as compat
+
 import pandas as pd
 from pandas import DataFrame, Index, Series, to_numeric
 from pandas.util import testing as tm
 
 
+@pytest.fixture(params=[None, "ignore", "raise", "coerce"])
+def errors(request):
+ return request.param
+
+
+@pytest.fixture(params=[True, False])
+def signed(request):
+ return request.param
+
+
+@pytest.fixture(params=[lambda x: x, str], ids=["identity", "str"])
+def transform(request):
+ return request.param
+
+
+@pytest.fixture(params=[
+ 47393996303418497800,
+ 100000000000000000000
+])
+def large_val(request):
+ return request.param
+
+
+@pytest.fixture(params=[True, False])
+def multiple_elts(request):
+ return request.param
+
+
+@pytest.fixture(params=[
+ (lambda x: Index(x, name="idx"), tm.assert_index_equal),
+ (lambda x: Series(x, name="ser"), tm.assert_series_equal),
+ (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal)
+])
+def transform_assert_equal(request):
+ return request.param
+
+
 @pytest.mark.parametrize("input_kwargs,result_kwargs", [
  (dict(), dict(dtype=np.int64)),
  (dict(errors="coerce", downcast="integer"), dict(dtype=np.int8))
@@ -172,7 +211,6 @@ def test_all_nan():
  tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize("errors", [None, "ignore", "raise", "coerce"])
 def test_type_check(errors):
  # see gh-11776
  df = DataFrame({"a": [1, -3.14, 7], "b": ["4", "5", "6"]})
@@ -183,11 +221,100 @@ def test_type_check(errors):
  to_numeric(df, **kwargs)
 
 
-@pytest.mark.parametrize("val", [
- 1, 1.1, "1", "1.1", -1.5, "-1.5"
-])
-def test_scalar(val):
- assert to_numeric(val) == float(val)
+@pytest.mark.parametrize("val", [1, 1.1, 20001])
+def test_scalar(val, signed, transform):
+ val = -val if signed else val
+ assert to_numeric(transform(val)) == float(val)
+
+
+def test_really_large_scalar(large_val, signed, transform, errors):
+ # see gh-24910
+ kwargs = dict(errors=errors) if errors is not None else dict()
+ val = -large_val if signed else large_val
+
+ val = transform(val)
+ val_is_string = isinstance(val, str)
+
+ if val_is_string and errors in (None, "raise"):
+ msg = "Integer out of range. at position 0"
+ with pytest.raises(ValueError, match=msg):
+ to_numeric(val, **kwargs)
+ else:
+ expected = float(val) if (errors == "coerce" and
+ val_is_string) else val
+ assert tm.assert_almost_equal(to_numeric(val, **kwargs), expected)
+
+
+def test_really_large_in_arr(large_val, signed, transform,
+ multiple_elts, errors):
+ # see gh-24910
+ kwargs = dict(errors=errors) if errors is not None else dict()
+ val = -large_val if signed else large_val
+ val = transform(val)
+
+ extra_elt = "string"
+ arr = [val] + multiple_elts * [extra_elt]
+
+ val_is_string = isinstance(val, str)
+ coercing = errors == "coerce"
+
+ if errors in (None, "raise") and (val_is_string or multiple_elts):
+ if val_is_string:
+ msg = "Integer out of range. at position 0"
+ else:
+ msg = 'Unable to parse string "string" at position 1'
+
+ with pytest.raises(ValueError, match=msg):
+ to_numeric(arr, **kwargs)
+ else:
+ result = to_numeric(arr, **kwargs)
+
+ exp_val = float(val) if (coercing and val_is_string) else val
+ expected = [exp_val]
+
+ if multiple_elts:
+ if coercing:
+ expected.append(np.nan)
+ exp_dtype = float
+ else:
+ expected.append(extra_elt)
+ exp_dtype = object
+ else:
+ exp_dtype = float if isinstance(exp_val, (
+ int, compat.long, float)) else object
+
+ tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
+
+
+def test_really_large_in_arr_consistent(large_val, signed,
+ multiple_elts, errors):
+ # see gh-24910
+ #
+ # Even if we discover that we have to hold float, does not mean
+ # we should be lenient on subsequent elements that fail to be integer.
+ kwargs = dict(errors=errors) if errors is not None else dict()
+ arr = [str(-large_val if signed else large_val)]
+
+ if multiple_elts:
+ arr.insert(0, large_val)
+
+ if errors in (None, "raise"):
+ index = int(multiple_elts)
+ msg = "Integer out of range. at position {index}".format(index=index)
+
+ with pytest.raises(ValueError, match=msg):
+ to_numeric(arr, **kwargs)
+ else:
+ result = to_numeric(arr, **kwargs)
+
+ if errors == "coerce":
+ expected = [float(i) for i in arr]
+ exp_dtype = float
+ else:
+ expected = arr
+ exp_dtype = object
+
+ tm.assert_almost_equal(result, np.array(expected, dtype=exp_dtype))
 
 
 @pytest.mark.parametrize("errors,checker", [
@@ -205,15 +332,6 @@ def test_scalar_fail(errors, checker):
  assert checker(to_numeric(scalar, errors=errors))
 
 
-@pytest.fixture(params=[
- (lambda x: Index(x, name="idx"), tm.assert_index_equal),
- (lambda x: Series(x, name="ser"), tm.assert_series_equal),
- (lambda x: np.array(Index(x).values), tm.assert_numpy_array_equal)
-])
-def transform_assert_equal(request):
- return request.param
-
-
 @pytest.mark.parametrize("data", [
  [1, 2, 3],
  [1., np.nan, 3, np.nan]
Original file line number	Diff line number	Diff line change
`@@ -110,6 +110,8 @@ Timezones`
`110`	`110`	`Numeric`
`111`	`111`	`^^^^^^^`
`112`	`112`
	`113`	+- Bug in :meth:`to_numeric` in which large negative numbers were being improperly handled (:issue:`24910`)
	`114`	+- Bug in :meth:`to_numeric` in which numbers were being coerced to float, even though ``errors`` was not ``coerce`` (:issue:`24910`)
`113`	`115`	`-`
`114`	`116`	`-`
`115`	`117`	`-`