pandas-dev · lfffkh · Nov 6, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -509,15 +509,20 @@ def ensure_wrapped_if_datetimelike(arr):
 
 def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
  """
- Convert numpy MaskedArray to ensure mask is softened.
+ Convert numpy MaskedArray to ensure mask is softened,
+
  """
  mask = ma.getmaskarray(data)
  if mask.any():
- dtype, fill_value = maybe_promote(data.dtype, np.nan)
- dtype = cast(np.dtype, dtype)
- data = ma.asarray(data.astype(dtype, copy=True))
- data.soften_mask() # set hardmask False if it was True
- data[mask] = fill_value
+ dtype = cast(np.dtype, data.dtype)
+ if isinstance(dtype, ExtensionDtype) and dtype.name.startswith("Masked"):
+ data = ma.asarray(data.astype(dtype, copy=True))
+ data.soften_mask() # If the data is a Masked EA, directly soften the mask.
+ else:
+ dtype, fill_value = maybe_promote(data.dtype, np.nan)
+ data = ma.asarray(data.astype(dtype, copy=True))
+ data.soften_mask() # set hardmask False if it was True
+ data[mask] = fill_value
  else:
  data = data.copy()
  return data

diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py
@@ -34,3 +34,38 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype):
 
  result = sanitize_array(arr, index=None, dtype=np.dtype(object))
  tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+ "values, dtype, expected",
+ [
+ (
+ np.ma.masked_array([1, 2, 3], mask=[False, True, False]),
+ "int64",
+ np.array([1, 2, 3], dtype=np.int64),
+ ),
+ (
+ np.ma.masked_array([1, 2, 3], mask=[False, True, False]),
+ "float64",
+ np.array([1, 2, 3], dtype=np.float64),
+ ),
+ (
+ np.ma.masked_array([1, 2, 3], mask=[False, True, False]),
+ "UInt64",
+ np.array([1, 2, 3], dtype=np.uint64),
+ ),
+ (
+ np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]),
+ "float64",
+ np.array([1.0, 2.0, 3.0], dtype=np.float64),
+ ),
+ (
+ np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]),
+ "Int64",
+ np.array([1, 2, 3], dtype=np.int64),
+ ),
+ ],
+)
+def test_sanitize_masked_array_with_masked_ea(values, dtype, expected):
+ result = sanitize_array(values, index=None, dtype=dtype)
+ tm.assert_masked_array_equal(result, expected)