@@ -349,41 +349,8 @@ def __new__(
349349 # they are actually ints, e.g. '0' and 0.0
350350 # should not be coerced
351351 # GH 11836
352- if is_integer_dtype (dtype ):
353- inferred = lib .infer_dtype (data , skipna = False )
354- if inferred == "integer" :
355- data = maybe_cast_to_integer_array (data , dtype , copy = copy )
356- elif inferred in ["floating" , "mixed-integer-float" ]:
357- if isna (data ).any ():
358- raise ValueError ("cannot convert float NaN to integer" )
359-
360- if inferred == "mixed-integer-float" :
361- data = maybe_cast_to_integer_array (data , dtype )
362-
363- # If we are actually all equal to integers,
364- # then coerce to integer.
365- try :
366- return cls ._try_convert_to_int_index (
367- data , copy , name , dtype
368- )
369- except ValueError :
370- pass
371-
372- # Return an actual float index.
373- return Float64Index (data , copy = copy , name = name )
374-
375- elif inferred == "string" :
376- pass
377- else :
378- data = data .astype (dtype )
379- elif is_float_dtype (dtype ):
380- inferred = lib .infer_dtype (data , skipna = False )
381- if inferred == "string" :
382- pass
383- else :
384- data = data .astype (dtype )
385- else :
386- data = np .array (data , dtype = dtype , copy = copy )
352+ data = _maybe_cast_with_dtype (data , dtype , copy )
353+ dtype = data .dtype # TODO: maybe not for object?
387354
388355 # maybe coerce to a sub-class
389356 if is_signed_integer_dtype (data .dtype ):
@@ -5486,3 +5453,101 @@ def maybe_extract_name(name, obj, cls) -> Optional[Hashable]:
54865453 raise TypeError (f"{ cls .__name__ } .name must be a hashable type" )
54875454
54885455 return name
5456+
5457+
5458+ def _maybe_cast_with_dtype (data : np .ndarray , dtype : np .dtype , copy : bool ) -> np .ndarray :
5459+ """
5460+ If a dtype is passed, cast to the closest matching dtype that is supported
5461+ by Index.
5462+
5463+ Parameters
5464+ ----------
5465+ data : np.ndarray
5466+ dtype : np.dtype
5467+ copy : bool
5468+
5469+ Returns
5470+ -------
5471+ np.ndarray
5472+ """
5473+ # we need to avoid having numpy coerce
5474+ # things that look like ints/floats to ints unless
5475+ # they are actually ints, e.g. '0' and 0.0
5476+ # should not be coerced
5477+ # GH 11836
5478+ if is_integer_dtype (dtype ):
5479+ inferred = lib .infer_dtype (data , skipna = False )
5480+ if inferred == "integer" :
5481+ data = maybe_cast_to_integer_array (data , dtype , copy = copy )
5482+ elif inferred in ["floating" , "mixed-integer-float" ]:
5483+ if isna (data ).any ():
5484+ raise ValueError ("cannot convert float NaN to integer" )
5485+
5486+ if inferred == "mixed-integer-float" :
5487+ data = maybe_cast_to_integer_array (data , dtype )
5488+
5489+ # If we are actually all equal to integers,
5490+ # then coerce to integer.
5491+ try :
5492+ data = _try_convert_to_int_array (data , copy , dtype )
5493+ except ValueError :
5494+ data = np .array (data , dtype = np .float64 , copy = copy )
5495+
5496+ elif inferred == "string" :
5497+ pass
5498+ else :
5499+ data = data .astype (dtype )
5500+ elif is_float_dtype (dtype ):
5501+ inferred = lib .infer_dtype (data , skipna = False )
5502+ if inferred == "string" :
5503+ pass
5504+ else :
5505+ data = data .astype (dtype )
5506+ else :
5507+ data = np .array (data , dtype = dtype , copy = copy )
5508+
5509+ return data
5510+
5511+
5512+ def _try_convert_to_int_array (
5513+ data : np .ndarray , copy : bool , dtype : np .dtype
5514+ ) -> np .ndarray :
5515+ """
5516+ Attempt to convert an array of data into an integer array.
5517+
5518+ Parameters
5519+ ----------
5520+ data : The data to convert.
5521+ copy : bool
5522+ Whether to copy the data or not.
5523+ dtype : np.dtype
5524+
5525+ Returns
5526+ -------
5527+ int_array : data converted to either an ndarray[int64] or ndarray[uint64]
5528+
5529+ Raises
5530+ ------
5531+ ValueError if the conversion was not successful.
5532+ """
5533+
5534+ if not is_unsigned_integer_dtype (dtype ):
5535+ # skip int64 conversion attempt if uint-like dtype is passed, as
5536+ # this could return Int64Index when UInt64Index is what's desired
5537+ try :
5538+ res = data .astype ("i8" , copy = False )
5539+ if (res == data ).all ():
5540+ return res # TODO: might still need to copy
5541+ except (OverflowError , TypeError , ValueError ):
5542+ pass
5543+
5544+ # Conversion to int64 failed (possibly due to overflow) or was skipped,
5545+ # so let's try now with uint64.
5546+ try :
5547+ res = data .astype ("u8" , copy = False )
5548+ if (res == data ).all ():
5549+ return res # TODO: might still need to copy
5550+ except (OverflowError , TypeError , ValueError ):
5551+ pass
5552+
5553+ raise ValueError
0 commit comments