@@ -74,7 +74,7 @@ class ListDtype(ArrowDtype):
7474 An ExtensionDtype suitable for storing homogeneous lists of data.
7575 """
7676
77- _is_immutable = True # TODO(wayd): should we allow mutability?
77+ _is_immutable = True
7878
7979 def __init__ (self , value_dtype : pa .DataType ) -> None :
8080 super ().__init__ (pa .large_list (value_dtype ))
@@ -100,10 +100,7 @@ def name(self) -> str: # type: ignore[override]
100100 """
101101 A string identifying the data type.
102102 """
103- # TODO: reshaping tests require the name list to match the large_list
104- # implementation; assumedly there are some astype(str(dtype)) casts
105- # going on. Should fix so this can just be "list[...]" for end user
106- return f"large_list[{ self .pyarrow_dtype .value_type !s} ]"
103+ return f"list[{ self .pyarrow_dtype .value_type !s} ]"
107104
108105 @property
109106 def kind (self ) -> str :
@@ -124,7 +121,6 @@ def construct_array_type(cls) -> type_t[ListArray]:
124121 return ListArray
125122
126123 def _get_common_dtype (self , dtypes : list [DtypeObj ]) -> DtypeObj | None :
127- # TODO(wayd): should we implemented value type support?
128124 for dtype in dtypes :
129125 if (
130126 isinstance (dtype , ListDtype )
@@ -153,8 +149,7 @@ def __init__(
153149 if isinstance (values , (pa .Array , pa .ChunkedArray )):
154150 parent_type = values .type
155151 if not isinstance (parent_type , (pa .ListType , pa .LargeListType )):
156- # Ideally could cast here, but I don't think pyarrow implements
157- # many list casts
152+ # TODO: maybe implement native casts in pyarrow
158153 new_values = [
159154 [x .as_py ()] if x .is_valid else None for x in values
160155 ]
@@ -164,12 +159,10 @@ def __init__(
164159 else :
165160 value_type = pa .array (values ).type .value_type
166161
167- # Internally always use large_string instead of string
168162 if value_type == pa .string ():
169163 value_type = pa .large_string ()
170164
171165 if not isinstance (values , pa .ChunkedArray ):
172- # To support NA, we need to create an Array first :-(
173166 arr = pa .array (values , type = pa .large_list (value_type ), from_pandas = True )
174167 self ._pa_array = pa .chunked_array (arr , type = pa .large_list (value_type ))
175168 else :
@@ -200,8 +193,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
200193 values = pa .array (scalars , from_pandas = True )
201194
202195 if values .type == "null" and dtype is not None :
203- # TODO: the sequencing here seems wrong; just making the tests pass for now
204- # but this needs a comprehensive review
205196 pa_type = string_to_pyarrow_type (str (dtype ))
206197 values = pa .array (values , type = pa_type )
207198
@@ -232,8 +223,6 @@ def _box_pa(
232223 return cls ._box_pa_array (value , pa_type )
233224
234225 def __getitem__ (self , item ):
235- # PyArrow does not support NumPy's selection with an equal length
236- # mask, so let's convert those to integral positions if needed
237226 if isinstance (item , (np .ndarray , ExtensionArray )):
238227 if is_bool_dtype (item .dtype ):
239228 mask_len = len (item )
@@ -305,9 +294,6 @@ def _empty(cls, shape: Shape, dtype: ExtensionDtype):
305294 ExtensionDtype.empty
306295 ExtensionDtype.empty is the 'official' public version of this API.
307296 """
308- # Implementer note: while ExtensionDtype.empty is the public way to
309- # call this method, it is still required to implement this `_empty`
310- # method as well (it is called internally in pandas)
311297 if isinstance (shape , tuple ):
312298 if len (shape ) > 1 :
313299 raise ValueError ("ListArray may only be 1-D" )
@@ -334,9 +320,9 @@ def __eq__(self, other):
334320 elif isinstance (other , (pa .ListScalar , pa .LargeListScalar )):
335321 from pandas .arrays import BooleanArray
336322
337- # TODO: pyarrow.compute does not implement broadcasting equality
338- # for an array of lists to a listscalar
339- # TODO: pyarrow doesn't compare missing values as missing???
323+ # TODO: pyarrow.compute does not implement equal for lists
324+ # https://github.com/apache/arrow/issues/45167
325+ # TODO: pyarrow doesn't compare missing values in Python as missing???
340326 # arr = pa.array([1, 2, None])
341327 # pc.equal(arr, arr[2]) returns all nulls but
342328 # arr[2] == arr[2] returns True
0 commit comments