Skip to content
Prev Previous commit
Next Next commit
Rename project_id to your_project_id
Move duplicate imports out of region tags. Add region tag for the whole sample.
  • Loading branch information
tswast committed Feb 7, 2019
commit 1be1a6ecefa75be505c72d73444100ac57a6cf7c
80 changes: 45 additions & 35 deletions bigquery_storage/to_dataframe/main_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

@pytest.fixture
def clients():
# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_create_client]
import google.auth
from google.cloud import bigquery
Expand All @@ -27,30 +28,37 @@ def clients():
# Explicitly create a credentials object. This allows you to use the same
# credentials for both the BigQuery and BigQuery Storage clients, avoiding
# unnecessary API calls to fetch duplicate authentication tokens.
credentials, project_id = google.auth.default(
credentials, your_project_id = google.auth.default(
scopes=["https://www.googleapis.com/auth/cloud-platform"]
)

# Make clients.
bqclient = bigquery.Client(credentials=credentials, project=project_id)
bqclient = bigquery.Client(credentials=credentials, project=your_project_id)
bqstorageclient = bigquery_storage_v1beta1.BigQueryStorageClient(
credentials=credentials
)
# [END bigquerystorage_pandas_tutorial_create_client]
return bqclient, bqstorageclient, project_id
# [END bigquerystorage_pandas_tutorial_all]
return bqclient, bqstorageclient


@pytest.fixture
def temporary_dataset(clients):
bqclient, _, _ = clients
dataset_id = "bqstorage_to_dataset_{}".format(uuid.uuid4().hex)
from google.cloud import bigquery

bqclient, _ = clients

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_create_dataset]
from google.cloud import bigquery
# Set the dataset_id to the dataset used to store temporary results.
dataset_id = "query_results_dataset"
# [END bigquerystorage_pandas_tutorial_create_dataset]
# [END bigquerystorage_pandas_tutorial_all]

# TODO: Set the dataset_id to the dataset used to store temporary results.
# dataset_id = "query_results_dataset"
dataset_id = "bqstorage_to_dataset_{}".format(uuid.uuid4().hex)

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_create_dataset]
dataset_ref = bqclient.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)

Expand All @@ -59,16 +67,18 @@ def temporary_dataset(clients):

bqclient.create_dataset(dataset) # API request.
# [END bigquerystorage_pandas_tutorial_create_dataset]
yield dataset_id
# [END bigquerystorage_pandas_tutorial_all]
yield dataset_ref
bqclient.delete_dataset(dataset_ref, delete_contents=True)


def test_table_to_dataframe(capsys, clients):
bqclient, bqstorageclient, _ = clients

# [START bigquerystorage_pandas_tutorial_read_table]
from google.cloud import bigquery

bqclient, bqstorageclient = clients

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_read_table]
# Download a table.
table = bigquery.TableReference.from_string(
"bigquery-public-data.utility_us.country_code_iso"
Expand All @@ -80,34 +90,24 @@ def test_table_to_dataframe(capsys, clients):
bigquery.SchemaField("fips_code", "STRING"),
],
)
dataframe = rows.to_dataframe(bqstorageclient)
dataframe = rows.to_dataframe(bqstorage_client=bqstorageclient)
print(dataframe.head())
# [END bigquerystorage_pandas_tutorial_read_table]
# [END bigquerystorage_pandas_tutorial_all]

out, _ = capsys.readouterr()
assert "country_name" in out


def test_query_to_dataframe(capsys, clients, temporary_dataset):
bqclient, bqstorageclient, _ = clients
dataset_id = temporary_dataset

# [START bigquerystorage_pandas_tutorial_read_query_results]
import uuid

from google.cloud import bigquery

# Due to a known issue in the BigQuery Storage API (TODO: link to
# public issue), small query result sets cannot be downloaded. To
# workaround this issue, write results to a destination table.
bqclient, bqstorageclient = clients
dataset_ref = temporary_dataset

# TODO: Set dataset_id to a dataset that will store temporary query
# results. Set the default table expiration time to ensure data is
# deleted after the results have been downloaded.
# dataset_id = "temporary_dataset_for_query_results"
dataset = bqclient.dataset(dataset_id)
table_id = "queryresults_" + uuid.uuid4().hex
table = dataset.table(table_id)
# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_read_query_results]
import uuid

# Download query results.
query_string = """
Expand All @@ -120,28 +120,37 @@ def test_query_to_dataframe(capsys, clients, temporary_dataset):
WHERE tags like '%google-bigquery%'
ORDER BY view_count DESC
"""
# Use a random table name to avoid overwriting existing tables.
table_id = "queryresults_" + uuid.uuid4().hex
table = dataset_ref.table(table_id)
query_config = bigquery.QueryJobConfig(
destination=table, write_disposition="WRITE_TRUNCATE"
# Due to a known issue in the BigQuery Storage API, small query result
# sets cannot be downloaded. To workaround this issue, write results to
# a destination table.
destination=table
)

dataframe = (
bqclient.query(query_string, job_config=query_config)
.result()
.to_dataframe(bqstorageclient)
.to_dataframe(bqstorage_client=bqstorageclient)
)
print(dataframe.head())
# [END bigquerystorage_pandas_tutorial_read_query_results]
# [END bigquerystorage_pandas_tutorial_all]

out, _ = capsys.readouterr()
assert "stackoverflow" in out


def test_session_to_dataframe(capsys, clients):
bqclient, bqstorageclient, project_id = clients

# [START bigquerystorage_pandas_tutorial_read_session]
from google.cloud import bigquery_storage_v1beta1

bqclient, bqstorageclient = clients
your_project_id = bqclient.project

# [START bigquerystorage_pandas_tutorial_all]
# [START bigquerystorage_pandas_tutorial_read_session]
table = bigquery_storage_v1beta1.types.TableReference()
table.project_id = "bigquery-public-data"
table.dataset_id = "new_york_trees"
Expand All @@ -153,7 +162,7 @@ def test_session_to_dataframe(capsys, clients):
read_options.selected_fields.append("species_common_name")
read_options.selected_fields.append("fall_color")

parent = "projects/{}".format(project_id)
parent = "projects/{}".format(your_project_id)
session = bqstorageclient.create_read_session(
table, parent, read_options=read_options
)
Expand All @@ -173,6 +182,7 @@ def test_session_to_dataframe(capsys, clients):
dataframe = reader.to_dataframe(session)
print(dataframe.head())
# [END bigquerystorage_pandas_tutorial_read_session]
# [END bigquerystorage_pandas_tutorial_all]

out, _ = capsys.readouterr()
assert "species_common_name" in out