@@ -179,10 +179,8 @@ def get_column_metadata(column, name, arrow_type, field_name):
179179 }
180180
181181
182- index_level_name = '__index_level_{:d}__' .format
183-
184-
185- def construct_metadata (df , column_names , index_levels , preserve_index , types ):
182+ def construct_metadata (df , column_names , index_levels , index_column_names ,
183+ preserve_index , types ):
186184 """Returns a dictionary containing enough metadata to reconstruct a pandas
187185 DataFrame as an Arrow Table, including index columns.
188186
@@ -197,9 +195,8 @@ def construct_metadata(df, column_names, index_levels, preserve_index, types):
197195 -------
198196 dict
199197 """
200- ncolumns = len (column_names )
201- df_types = types [:ncolumns - len (index_levels )]
202- index_types = types [ncolumns - len (index_levels ):]
198+ df_types = types [:- len (index_levels )]
199+ index_types = types [- len (index_levels ):]
203200
204201 column_metadata = [
205202 get_column_metadata (
@@ -213,9 +210,6 @@ def construct_metadata(df, column_names, index_levels, preserve_index, types):
213210 ]
214211
215212 if preserve_index :
216- index_column_names = list (map (
217- index_level_name , range (len (index_levels ))
218- ))
219213 index_column_metadata = [
220214 get_column_metadata (
221215 level ,
@@ -294,9 +288,29 @@ def _column_name_to_strings(name):
294288 return str (name )
295289
296290
291+ def _index_level_name (index , i , column_names ):
292+ """Return the name of an index level or a default name if `index.name` is
293+ None or is already a column name.
294+
295+ Parameters
296+ ----------
297+ index : pandas.Index
298+ i : int
299+
300+ Returns
301+ -------
302+ name : str
303+ """
304+ if index .name is not None and index .name not in column_names :
305+ return index .name
306+ else :
307+ return '__index_level_{:d}__' .format (i )
308+
309+
297310def dataframe_to_arrays (df , schema , preserve_index , nthreads = 1 ):
298- names = []
311+ column_names = []
299312 index_columns = []
313+ index_column_names = []
300314 type = None
301315
302316 if preserve_index :
@@ -324,12 +338,13 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1):
324338
325339 columns_to_convert .append (col )
326340 convert_types .append (type )
327- names .append (name )
341+ column_names .append (name )
328342
329343 for i , column in enumerate (index_columns ):
330344 columns_to_convert .append (column )
331345 convert_types .append (None )
332- names .append (index_level_name (i ))
346+ name = _index_level_name (column , i , column_names )
347+ index_column_names .append (name )
333348
334349 # NOTE(wesm): If nthreads=None, then we use a heuristic to decide whether
335350 # using a thread pool is worth it. Currently the heuristic is whether the
@@ -358,8 +373,10 @@ def convert_column(col, ty):
358373 types = [x .type for x in arrays ]
359374
360375 metadata = construct_metadata (
361- df , names , index_columns , preserve_index , types
376+ df , column_names , index_columns , index_column_names , preserve_index ,
377+ types
362378 )
379+ names = column_names + index_column_names
363380 return names , arrays , metadata
364381
365382
0 commit comments