googleapis
diff --git a/‎google/cloud/bigquery/_pandas_helpers.py‎
Lines changed: 18 additions & 2 deletions b/‎google/cloud/bigquery/_pandas_helpers.py‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎google/cloud/bigquery/client.py‎
Lines changed: 36 additions & 13 deletions b/‎google/cloud/bigquery/client.py‎
Lines changed: 36 additions & 13 deletions
@@ -530,7 +530,13 @@ def dataframe_to_arrow(dataframe, bq_schema):
  return pyarrow.Table.from_arrays(arrow_arrays, names=arrow_names)
 
 
-def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SNAPPY"):
+def dataframe_to_parquet(
+ dataframe,
+ bq_schema,
+ filepath,
+ parquet_compression="SNAPPY",
+ parquet_use_compliant_nested_type=True,
+):
  """Write dataframe as a Parquet file, according to the desired BQ schema.
 
  This function requires the :mod:`pyarrow` package. Arrow is used as an
@@ -551,14 +557,24 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN
  The compression codec to use by the the ``pyarrow.parquet.write_table``
  serializing method. Defaults to "SNAPPY".
  https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+ parquet_use_compliant_nested_type (bool):
+ Whether the ``pyarrow.parquet.write_table`` serializing method should write
+ compliant Parquet nested type (lists). Defaults to ``True``.
+ https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#nested-types
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
  """
  pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
 
  import pyarrow.parquet
 
  bq_schema = schema._to_schema_fields(bq_schema)
  arrow_table = dataframe_to_arrow(dataframe, bq_schema)
- pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression)
+ pyarrow.parquet.write_table(
+ arrow_table,
+ filepath,
+ compression=parquet_compression,
+ use_compliant_nested_type=parquet_use_compliant_nested_type,
+ )
 
 
 def _row_iterator_page_to_arrow(page, column_names, arrow_types):
 
@@ -2456,6 +2456,7 @@ def load_table_from_dataframe(
  project: str = None,
  job_config: LoadJobConfig = None,
  parquet_compression: str = "snappy",
+ parquet_use_compliant_nested_type: bool = True,
  timeout: float = DEFAULT_TIMEOUT,
  ) -> job.LoadJob:
  """Upload the contents of a table from a pandas DataFrame.
@@ -2519,18 +2520,34 @@ def load_table_from_dataframe(
  :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are
  supported.
  parquet_compression (Optional[str]):
- [Beta] The compression method to use if intermittently
- serializing ``dataframe`` to a parquet file.
-
- The argument is directly passed as the ``compression``
- argument to the underlying ``pyarrow.parquet.write_table()``
- method (the default value "snappy" gets converted to uppercase).
- https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
-
- If the job config schema is missing, the argument is directly
- passed as the ``compression`` argument to the underlying
- ``DataFrame.to_parquet()`` method.
- https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
+ [Beta] The compression method to use if intermittently
+ serializing ``dataframe`` to a parquet file.
+
+ The argument is directly passed as the ``compression``
+ argument to the underlying ``pyarrow.parquet.write_table()``
+ method (the default value "snappy" gets converted to uppercase).
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+
+ If the job config schema is missing, the argument is directly
+ passed as the ``compression`` argument to the underlying
+ ``DataFrame.to_parquet()`` method.
+ https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
+ parquet_use_compliant_nested_type (bool):
+ Whether the ``pyarrow.parquet.write_table`` serializing method should write
+ compliant Parquet nested type (lists). Defaults to ``True``.
+
+ The argument is directly passed as the ``use_compliant_nested_type``
+ argument to the underlying ``pyarrow.parquet.write_table()``
+ method.
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
+
+ If the job config schema is missing, the argument is directly
+ passed as an additonal ``kwarg`` argument to the underlying
+ ``DataFrame.to_parquet()`` method.
+ https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
+
+ This argument is only present to allow for backwards compatibility with
+ tables created using an old version of this method.
  timeout (Optional[float]):
  The number of seconds to wait for the underlying HTTP transport
  before using ``retry``.
@@ -2647,9 +2664,15 @@ def load_table_from_dataframe(
  job_config.schema,
  tmppath,
  parquet_compression=parquet_compression,
+ parquet_use_compliant_nested_type=parquet_use_compliant_nested_type,
  )
  else:
- dataframe.to_parquet(tmppath, compression=parquet_compression)
+ dataframe.to_parquet(
+ tmppath,
+ engine="pyarrow",
+ compression=parquet_compression,
+ use_compliant_nested_type=parquet_use_compliant_nested_type,
+ )
 
  else: