Skip to content

Commit fd19a75

Browse files
authored
Add quickstart demonstrating most BQ Storage API read features (#7223)
* Add quickstart demonstrating most BQ Storage API read features This snippet will be included at the bottom of the client library index page for the BigQuery Storage API. It will also be included in the docs at cloud.google.com. Adds a new session for nox to run the tests for samples in the docs directory. Acts as a system test of the rows() method. * s/handles reconnecting/reconnects/ * Add note about optional dependencies to readme.
1 parent 479683c commit fd19a75

File tree

5 files changed

+186
-1
lines changed

5 files changed

+186
-1
lines changed

bigquery_storage/README.rst

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ Python Client for BigQuery Storage API (`Alpha`_)
88

99
.. _Alpha: https://github.com/GoogleCloudPlatform/google-cloud-python/blob/master/README.rst
1010
.. _BigQuery Storage API: https://cloud.google.com/bigquery
11-
.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/stable/bigquery_storage/index.html
11+
.. _Client Library Documentation: https://googlecloudplatform.github.io/google-cloud-python/latest/bigquery_storage/index.html
1212
.. _Product Documentation: https://cloud.google.com/bigquery
1313

1414
Quick Start
@@ -70,6 +70,22 @@ Windows
7070
<your-env>\Scripts\activate
7171
<your-env>\Scripts\pip.exe install google-cloud-bigquery-storage
7272
73+
Optional Dependencies
74+
^^^^^^^^^^^^^^^^^^^^^
75+
76+
Several features of ``google-cloud-bigquery-storage`` require additional
77+
dependencies.
78+
79+
* Parse Avro blocks in a ``read_rows()`` stream using `fastavro
80+
<https://fastavro.readthedocs.io/en/latest/>`_.
81+
82+
``pip install google-cloud-bigquery-storage[fastavro]``
83+
84+
* Write rows to a `pandas <http://pandas.pydata.org/pandas-docs/stable/>`_
85+
dataframe.
86+
87+
``pip install google-cloud-bigquery-storage[pandas,fastavro]``
88+
7389
Next Steps
7490
~~~~~~~~~~
7591

bigquery_storage/docs/index.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,12 @@ API Reference
88
gapic/v1beta1/api
99
gapic/v1beta1/reader
1010
gapic/v1beta1/types
11+
12+
Example Usage
13+
-------------
14+
15+
.. literalinclude:: quickstart.py
16+
:language: python
17+
:dedent: 4
18+
:start-after: [START bigquerystorage_quickstart]
19+
:end-before: [END bigquerystorage_quickstart]
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import argparse
16+
17+
18+
19+
def main(project_id='your-project-id', snapshot_millis=0):
20+
# [START bigquerystorage_quickstart]
21+
from google.cloud import bigquery_storage_v1beta1
22+
23+
24+
# TODO(developer): Set the project_id variable.
25+
# project_id = 'your-project-id'
26+
#
27+
# The read session is created in this project. This project can be
28+
# different from that which contains the table.
29+
30+
client = bigquery_storage_v1beta1.BigQueryStorageClient()
31+
32+
# This example reads baby name data from the public datasets.
33+
table_ref = bigquery_storage_v1beta1.types.TableReference()
34+
table_ref.project_id = "bigquery-public-data"
35+
table_ref.dataset_id = "usa_names"
36+
table_ref.table_id = "usa_1910_current"
37+
38+
# We limit the output columns to a subset of those allowed in the table,
39+
# and set a simple filter to only report names from the state of
40+
# Washington (WA).
41+
read_options = bigquery_storage_v1beta1.types.TableReadOptions()
42+
read_options.selected_fields.append("name")
43+
read_options.selected_fields.append("number")
44+
read_options.selected_fields.append("state")
45+
read_options.row_restriction = 'state = "WA"'
46+
47+
# Set a snapshot time if it's been specified.
48+
modifiers = None
49+
if snapshot_millis > 0:
50+
modifiers = bigquery_storage_v1beta1.types.TableModifiers()
51+
modifiers.snapshot_time.FromMilliseconds(snapshot_millis)
52+
53+
parent = "projects/{}".format(project_id)
54+
session = client.create_read_session(
55+
table_ref,
56+
parent,
57+
table_modifiers=modifiers,
58+
read_options=read_options) # API request.
59+
60+
# We'll use only a single stream for reading data from the table. Because
61+
# of dynamic sharding, this will yield all the rows in the table. However,
62+
# if you wanted to fan out multiple readers you could do so by having a
63+
# reader process each individual stream.
64+
reader = client.read_rows(
65+
bigquery_storage_v1beta1.types.StreamPosition(
66+
stream=session.streams[0],
67+
)
68+
)
69+
70+
# The read stream contains blocks of Avro-encoded bytes. The rows() method
71+
# uses the fastavro library to parse these blocks as an interable of Python
72+
# dictionaries. Install fastavro with the following command:
73+
#
74+
# pip install google-cloud-bigquery-storage[fastavro]
75+
rows = reader.rows(session)
76+
77+
# Do any local processing by iterating over the rows. The
78+
# google-cloud-bigquery-storage client reconnects to the API after any
79+
# transient network errors or timeouts.
80+
names = set()
81+
states = set()
82+
83+
for row in rows:
84+
names.add(row["name"])
85+
states.add(row["state"])
86+
87+
print("Got {} unique names in states: {}".format(len(names), states))
88+
# [END bigquerystorage_quickstart]
89+
90+
91+
if __name__ == "__main__":
92+
parser = argparse.ArgumentParser()
93+
parser.add_argument('project_id')
94+
parser.add_argument('--snapshot_millis', default=0, type=int)
95+
args = parser.parse_args()
96+
main(project_id=args.project_id)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import datetime
16+
import os
17+
18+
import pytest
19+
20+
import quickstart
21+
22+
23+
def now_millis():
24+
return int(
25+
(datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)).total_seconds()
26+
* 1000)
27+
28+
29+
@pytest.fixture()
30+
def project_id():
31+
return os.environ["PROJECT_ID"]
32+
33+
34+
def test_quickstart_wo_snapshot(capsys, project_id):
35+
quickstart.main(project_id)
36+
out, _ = capsys.readouterr()
37+
assert 'WA' in out
38+
39+
40+
def test_quickstart_with_snapshot(capsys, project_id):
41+
quickstart.main(project_id, now_millis() - 5000)
42+
out, _ = capsys.readouterr()
43+
assert 'WA' in out

bigquery_storage/noxfile.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,27 @@ def system(session):
127127
session.run('py.test', '--quiet', 'tests/system/')
128128

129129

130+
@nox.session(python=['2.7', '3.6'])
131+
def snippets(session):
132+
"""Run the snippets test suite."""
133+
134+
# Sanity check: Only run snippets tests if the environment variable is set.
135+
if not os.environ.get('GOOGLE_APPLICATION_CREDENTIALS', ''):
136+
session.skip('Credentials must be set via environment variable.')
137+
138+
# Install all test dependencies, then install this package into the
139+
# virtualenv's dist-packages.
140+
session.install('pytest')
141+
session.install('-e', os.path.join('..', 'test_utils'))
142+
for local_dep in LOCAL_DEPS:
143+
session.install('-e', local_dep)
144+
session.install('-e', '.[pandas,fastavro]')
145+
146+
# Run py.test against the snippets tests.
147+
session.run(
148+
'py.test', 'docs', *session.posargs)
149+
150+
130151
@nox.session(python='3.6')
131152
def docs(session):
132153
"""Build the docs."""

0 commit comments

Comments
 (0)