Skip to content
This repository was archived by the owner on Sep 12, 2025. It is now read-only.

Commit ad342fe

Browse files
feat: add estimated physical file sizes to ReadAPI v1 (#605)
* feat: add estimated physical file sizes to ReadAPI v1 PiperOrigin-RevId: 542350532 Source-Link: googleapis/googleapis@a4ff1c2 Source-Link: googleapis/googleapis-gen@b5b5fe5 Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiYjViNWZlNWRjZDRiY2UxNWI3YjkwMzViOTI1NDUyZWU3Y2FmNDg5YiJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: Anthonios Partheniou <partheniou@google.com>
1 parent 4884cd2 commit ad342fe

File tree

4 files changed

+24
-7
lines changed

4 files changed

+24
-7
lines changed

google/cloud/bigquery_storage_v1/types/stream.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,15 @@ class ReadSession(proto.Message):
134134
are completely consumed. This estimate is based
135135
on metadata from the table which might be
136136
incomplete or stale.
137+
estimated_total_physical_file_size (int):
138+
Output only. A pre-projected estimate of the
139+
total physical size (in bytes) of files this
140+
session will scan when all streams are
141+
completely consumed. This estimate does not
142+
depend on the selected columns and can be based
143+
on metadata from the table which might be
144+
incomplete or stale. Only set for BigLake
145+
tables.
137146
estimated_row_count (int):
138147
Output only. An estimate on the number of
139148
rows present in this session's streams. This
@@ -233,11 +242,11 @@ class TableReadOptions(proto.Message):
233242
sample_percentage (float):
234243
Optional. Specifies a table sampling percentage.
235244
Specifically, the query planner will use TABLESAMPLE SYSTEM
236-
(sample_percentage PERCENT). This samples at the file-level.
237-
It will randomly choose for each file whether to include
238-
that file in the sample returned. Note, that if the table
239-
only has one file, then TABLESAMPLE SYSTEM will select that
240-
file and return all returnable rows contained within.
245+
(sample_percentage PERCENT). The sampling percentage is
246+
applied at the data block granularity. It will randomly
247+
choose for each data block whether to read the rows in that
248+
data block. For more details, see
249+
https://cloud.google.com/bigquery/docs/table-sampling)
241250
242251
This field is a member of `oneof`_ ``_sample_percentage``.
243252
"""
@@ -317,6 +326,10 @@ class TableReadOptions(proto.Message):
317326
proto.INT64,
318327
number=12,
319328
)
329+
estimated_total_physical_file_size: int = proto.Field(
330+
proto.INT64,
331+
number=15,
332+
)
320333
estimated_row_count: int = proto.Field(
321334
proto.INT64,
322335
number=14,

samples/generated_samples/snippet_metadata_google.cloud.bigquery.storage.v1.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-bigquery-storage",
11-
"version": "2.20.0"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

samples/generated_samples/snippet_metadata_google.cloud.bigquery.storage.v1beta2.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
],
99
"language": "PYTHON",
1010
"name": "google-cloud-bigquery-storage",
11-
"version": "2.20.0"
11+
"version": "0.1.0"
1212
},
1313
"snippets": [
1414
{

tests/unit/gapic/bigquery_storage_v1/test_big_query_read.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ def test_create_read_session(request_type, transport: str = "grpc"):
702702
data_format=stream.DataFormat.AVRO,
703703
table="table_value",
704704
estimated_total_bytes_scanned=3076,
705+
estimated_total_physical_file_size=3608,
705706
estimated_row_count=2047,
706707
trace_id="trace_id_value",
707708
avro_schema=avro.AvroSchema(schema="schema_value"),
@@ -719,6 +720,7 @@ def test_create_read_session(request_type, transport: str = "grpc"):
719720
assert response.data_format == stream.DataFormat.AVRO
720721
assert response.table == "table_value"
721722
assert response.estimated_total_bytes_scanned == 3076
723+
assert response.estimated_total_physical_file_size == 3608
722724
assert response.estimated_row_count == 2047
723725
assert response.trace_id == "trace_id_value"
724726

@@ -765,6 +767,7 @@ async def test_create_read_session_async(
765767
data_format=stream.DataFormat.AVRO,
766768
table="table_value",
767769
estimated_total_bytes_scanned=3076,
770+
estimated_total_physical_file_size=3608,
768771
estimated_row_count=2047,
769772
trace_id="trace_id_value",
770773
)
@@ -782,6 +785,7 @@ async def test_create_read_session_async(
782785
assert response.data_format == stream.DataFormat.AVRO
783786
assert response.table == "table_value"
784787
assert response.estimated_total_bytes_scanned == 3076
788+
assert response.estimated_total_physical_file_size == 3608
785789
assert response.estimated_row_count == 2047
786790
assert response.trace_id == "trace_id_value"
787791

0 commit comments

Comments
 (0)