Skip to content
Prev Previous commit
trying new patch location and logic & revamped test infra
  • Loading branch information
akkik04 committed Nov 24, 2025
commit e2ad3fba60b832ba21dde24063006be479d6a40b
44 changes: 23 additions & 21 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5501,29 +5501,31 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:
return _reindex_for_setitem(value, self.index)

if is_list_like(value):
# GH#61026: this method is only used for *single-column* assignment.
# Reject 2D/3D arrays here, except the (n, 1) case which we treat as 1D.
if isinstance(value, np.ndarray) and value.ndim > 1:
if value.ndim == 2:
if value.shape[1] == 1:
# (n, 1) → length-n 1D array
value = value[:, 0]
else:
# More than one column: users should use df[[...]] = value
raise ValueError(
"Setting a DataFrame column with a 2D array requires "
f"shape (n, 1); got shape {value.shape}."
)
else:
# ndim >= 3
raise ValueError(
f"Setting a DataFrame column with ndim {value.ndim} "
"array is not supported."
)

com.require_length_match(value, self.index)

return sanitize_array(value, self.index, copy=True, allow_2d=True), None
# GH#61026: special-case 2D inputs for single-column assignment.
# - accept shape (n, 1) by flattening to 1D
# - disallow 2D *object* arrays with more than one column, since those
# correspond to a single column key and should be rejected
arr = value

# np.matrix is always 2D; gonna convert to regular ndarray
if isinstance(arr, np.matrix):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what case do we get a matrix here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_sanitize_column(...) can see an np.matrix when the user assigns one directly. for example: df["col"] = np.matrix([[1], [2], [3]]).

Since, np.matrix is always 2D and preserves its 2D shape under the slicing operation, calling arr[:, 0] (which occurs on line 5517) on a matrix still gives the shape (n, 1) rather than (n,). Essentially, this would mean that we wouldn't actually end up producing a 1D array for matrices in that case.

Hence, I thought converting matrics to a regular ndarray first will ensure that the upcoming blocks behave consistently for both np.ndarray and np.matrix.

arr = np.asarray(arr)

if isinstance(arr, np.ndarray) and arr.ndim == 2:
if arr.shape[1] == 1:
# treating (n, 1) as a length-n 1D array
arr = arr[:, 0]
elif arr.dtype == object:
# single-column setitem with a 2D object array is not allowed.
Comment on lines +5520 to +5521
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why only object dtype here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dtype == object guard is there to keep this bugfix scoped tightly to the case that regressed in issue #61026.

The problematic behaviour (ValueError: Buffer has wrong number of dimensions (expected 1, got 2)) only arose when assigning a 2D dtype=object array to a single column. For other dtypes, assigning a 2D array either already behaves correctly or raises a clearer, existing error, so this change leaves those paths alone to avoid altering semantics outside this issue.

msg = (
"Setting a DataFrame column with a 2D array requires "
f"shape (n, 1); got shape {arr.shape}."
)
raise ValueError(msg)
subarr = sanitize_array(arr, self.index, copy=True, allow_2d=True)
return subarr, None

@property
def _series(self):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -816,6 +816,24 @@ def test_setitem_index_object_dtype_not_inferring(self):
)
tm.assert_frame_equal(df, expected)

def test_setitem_2d_object_array(self):
# GH#61026
df = DataFrame(
{
"c1": [1, 2, 3, 4, 5],
}
)

arr = np.array([["A"], ["B"], ["C"], ["D"], ["E"]], dtype=object)
df["c1"] = arr

expected = DataFrame(
{
"c1": ["A", "B", "C", "D", "E"],
}
)
tm.assert_frame_equal(df, expected)


class TestSetitemTZAwareValues:
@pytest.fixture
Expand Down
53 changes: 0 additions & 53 deletions pandas/tests/frame/indexing/test_setitem_2d_object.py

This file was deleted.

Loading