pandas-dev · kastkeepitjumpinlikekangaroos · Oct 25, 2024 · Oct 25, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst
@@ -130,7 +130,7 @@ MultiIndex
 
 I/O
 ^^^
--
+- Bug in :func:`read_sql` causing an unintended exception when byte data was being converted to string when using the pyarrow dtype_backend (:issue:`59242`)
 -
 
 Period

diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
@@ -970,7 +970,17 @@ def convert(arr):
  if dtype_backend != "numpy" and arr.dtype == np.dtype("O"):
  new_dtype = StringDtype()
  arr_cls = new_dtype.construct_array_type()
- arr = arr_cls._from_sequence(arr, dtype=new_dtype)
+ try:
+ # Addressing (#59242)
+ # Byte data that could not be decoded into
+ # a string would throw a UnicodeDecodeError exception
+
+ # Try and greedily convert to string
+ # Will fail if the object is bytes
+ arr = arr_cls._from_sequence(arr, dtype=new_dtype)
+ except UnicodeDecodeError:
+ pass
+
  elif dtype_backend != "numpy" and isinstance(arr, np.ndarray):
  if arr.dtype.kind in "iufb":
  arr = pd_array(arr, copy=False)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -4352,3 +4352,18 @@ def test_xsqlite_if_exists(sqlite_buildin):
  (5, "E"),
  ]
  drop_table(table_name, sqlite_buildin)
+
+
+def test_bytes_column(sqlite_buildin):
+ pytest.importorskip("pyarrow")
+ """
+ Regression test for (#59242)
+ Bytes being returned in a column that could not be converted
+ to a string would raise a UnicodeDecodeError
+ when using dtype_backend='pyarrow'
+ """
+ query = """
+ select cast(x'0123456789abcdef0123456789abcdef' as blob) a
+ """
+ df = pd.read_sql(query, sqlite_buildin, dtype_backend="pyarrow")
+ assert df.a.values[0] == b"\x01#Eg\x89\xab\xcd\xef\x01#Eg\x89\xab\xcd\xef"
-Original file line number
+Diff line change
@@ Expand Up / @@ -130,7 +130,7 @@ MultiIndex @@
  I/O
  ^^^
- -
+ - Bug in :func:`read_sql` causing an unintended exception when byte data was being converted to string when using the pyarrow dtype_backend (:issue:`59242`)
  -
  Period
@@ Expand Down @@