Skip to content
This repository was archived by the owner on Sep 12, 2025. It is now read-only.

Commit fe09e3b

Browse files
feat: add ability to request compressed ReadRowsResponse rows (#728)
* feat: add ability to request compressed ReadRowsResponse rows This change allows the client to request raw lz4 compression of the ReadRowsResponse rows data for both ArrowRecordBatches and Avro rows. PiperOrigin-RevId: 597000088 Source-Link: googleapis/googleapis@341d70f Source-Link: googleapis/googleapis-gen@01713f3 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMDE3MTNmM2Y1NTM0YWNjNzhmMDRkNTllMTNjMDY2OGM4MTI5YmYwMyJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 90cef51 commit fe09e3b

File tree

4 files changed

+59
-2
lines changed

4 files changed

+59
-2
lines changed

google/cloud/bigquery_storage_v1/types/storage.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,27 @@ class ReadRowsResponse(proto.Message):
233233
Output only. Arrow schema.
234234
235235
This field is a member of `oneof`_ ``schema``.
236+
uncompressed_byte_size (int):
237+
Optional. If the row data in this ReadRowsResponse is
238+
compressed, then uncompressed byte size is the original size
239+
of the uncompressed row data. If it is set to a value
240+
greater than 0, then decompress into a buffer of size
241+
uncompressed_byte_size using the compression codec that was
242+
requested during session creation time and which is
243+
specified in TableReadOptions.response_compression_codec in
244+
ReadSession. This value is not set if no
245+
response_compression_codec was not requested and it is -1 if
246+
the requested compression would not have reduced the size of
247+
this ReadRowsResponse's row data. This attempts to match
248+
Apache Arrow's behavior described here
249+
https://github.com/apache/arrow/issues/15102 where the
250+
uncompressed length may be set to -1 to indicate that the
251+
data that follows is not compressed, which can be useful for
252+
cases where compression does not yield appreciable savings.
253+
When uncompressed_byte_size is not greater than 0, the
254+
client should skip decompression.
255+
256+
This field is a member of `oneof`_ ``_uncompressed_byte_size``.
236257
"""
237258

238259
avro_rows: avro.AvroRows = proto.Field(
@@ -273,6 +294,11 @@ class ReadRowsResponse(proto.Message):
273294
oneof="schema",
274295
message=arrow.ArrowSchema,
275296
)
297+
uncompressed_byte_size: int = proto.Field(
298+
proto.INT64,
299+
number=9,
300+
optional=True,
301+
)
276302

277303

278304
class SplitReadStreamRequest(proto.Message):

google/cloud/bigquery_storage_v1/types/stream.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,33 @@ class TableReadOptions(proto.Message):
249249
https://cloud.google.com/bigquery/docs/table-sampling)
250250
251251
This field is a member of `oneof`_ ``_sample_percentage``.
252+
response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec):
253+
Optional. Set response_compression_codec when creating a
254+
read session to enable application-level compression of
255+
ReadRows responses.
256+
257+
This field is a member of `oneof`_ ``_response_compression_codec``.
252258
"""
253259

260+
class ResponseCompressionCodec(proto.Enum):
261+
r"""Specifies which compression codec to attempt on the entire
262+
serialized response payload (either Arrow record batch or Avro
263+
rows). This is not to be confused with the Apache Arrow native
264+
compression codecs specified in ArrowSerializationOptions. For
265+
performance reasons, when creating a read session requesting
266+
Arrow responses, setting both native Arrow compression and
267+
application-level response compression will not be allowed -
268+
choose, at most, one kind of compression.
269+
270+
Values:
271+
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0):
272+
Default is no compression.
273+
RESPONSE_COMPRESSION_CODEC_LZ4 (2):
274+
Use raw LZ4 compression.
275+
"""
276+
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0
277+
RESPONSE_COMPRESSION_CODEC_LZ4 = 2
278+
254279
selected_fields: MutableSequence[str] = proto.RepeatedField(
255280
proto.STRING,
256281
number=1,
@@ -276,6 +301,12 @@ class TableReadOptions(proto.Message):
276301
number=5,
277302
optional=True,
278303
)
304+
response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field(
305+
proto.ENUM,
306+
number=6,
307+
optional=True,
308+
enum="ReadSession.TableReadOptions.ResponseCompressionCodec",
309+
)
279310

280311
name: str = proto.Field(
281312
proto.STRING,

samples/generated_samples/snippet_metadata_google.cloud.bigquery.storage.v1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-bigquery-storage",
11-
"version": "2.24.0"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

samples/generated_samples/snippet_metadata_google.cloud.bigquery.storage.v1beta2.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-bigquery-storage",
11-
"version": "2.24.0"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

0 commit comments

Comments
 (0)