Skip to content

Commit 23225bc

Browse files
authored
Refactor system tests. (#8984)
* Refactor system tests. Use parameterized tests for different source formats. * flake8
1 parent 3d29c75 commit 23225bc

File tree

3 files changed

+136
-78
lines changed

3 files changed

+136
-78
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
"""System tests for reading rows from tables."""
17+
18+
import os
19+
20+
import pytest
21+
22+
from google.cloud import bigquery_storage_v1beta1
23+
24+
25+
@pytest.fixture()
26+
def project_id():
27+
return os.environ["PROJECT_ID"]
28+
29+
30+
@pytest.fixture()
31+
def client():
32+
return bigquery_storage_v1beta1.BigQueryStorageClient()
33+
34+
35+
@pytest.fixture()
36+
def table_reference():
37+
table_ref = bigquery_storage_v1beta1.types.TableReference()
38+
table_ref.project_id = "bigquery-public-data"
39+
table_ref.dataset_id = "usa_names"
40+
table_ref.table_id = "usa_1910_2013"
41+
return table_ref
42+
43+
44+
@pytest.fixture()
45+
def small_table_reference():
46+
table_ref = bigquery_storage_v1beta1.types.TableReference()
47+
table_ref.project_id = "bigquery-public-data"
48+
table_ref.dataset_id = "utility_us"
49+
table_ref.table_id = "country_code_iso"
50+
return table_ref
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright 2019 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# https://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
"""System tests for reading rows from tables."""
17+
18+
import pytest
19+
20+
from google.cloud import bigquery_storage_v1beta1
21+
22+
23+
@pytest.mark.parametrize(
24+
"data_format,expected_schema_type",
25+
(
26+
(None, "avro_schema"), # Default format (Avro).
27+
(bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"),
28+
(bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"),
29+
),
30+
)
31+
def test_read_rows_as_blocks_full_table(
32+
client, project_id, small_table_reference, data_format, expected_schema_type
33+
):
34+
session = client.create_read_session(
35+
small_table_reference,
36+
"projects/{}".format(project_id),
37+
format_=data_format,
38+
requested_streams=1,
39+
)
40+
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
41+
stream=session.streams[0]
42+
)
43+
schema_type = session.WhichOneof("schema")
44+
assert schema_type == expected_schema_type
45+
46+
blocks = list(client.read_rows(stream_pos))
47+
48+
assert len(blocks) > 0
49+
block = blocks[0]
50+
assert block.status.estimated_row_count > 0
51+
52+
53+
@pytest.mark.parametrize(
54+
"data_format,expected_schema_type",
55+
(
56+
(bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"),
57+
(bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"),
58+
),
59+
)
60+
def test_read_rows_as_rows_full_table(
61+
client, project_id, small_table_reference, data_format, expected_schema_type
62+
):
63+
session = client.create_read_session(
64+
small_table_reference,
65+
"projects/{}".format(project_id),
66+
format_=data_format,
67+
requested_streams=1,
68+
)
69+
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
70+
stream=session.streams[0]
71+
)
72+
73+
rows = list(client.read_rows(stream_pos).rows(session))
74+
75+
assert len(rows) > 0

bigquery_storage/tests/system/test_system.py renamed to bigquery_storage/tests/system/test_reader_dataframe.py

Lines changed: 11 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
16-
"""System tests for reading rows from tables."""
17-
18-
import os
16+
"""System tests for reading rows with pandas connector."""
1917

2018
import numpy
2119
import pyarrow.types
@@ -24,50 +22,6 @@
2422
from google.cloud import bigquery_storage_v1beta1
2523

2624

27-
@pytest.fixture()
28-
def project_id():
29-
return os.environ["PROJECT_ID"]
30-
31-
32-
@pytest.fixture()
33-
def client():
34-
return bigquery_storage_v1beta1.BigQueryStorageClient()
35-
36-
37-
@pytest.fixture()
38-
def table_reference():
39-
table_ref = bigquery_storage_v1beta1.types.TableReference()
40-
table_ref.project_id = "bigquery-public-data"
41-
table_ref.dataset_id = "usa_names"
42-
table_ref.table_id = "usa_1910_2013"
43-
return table_ref
44-
45-
46-
@pytest.fixture()
47-
def small_table_reference():
48-
table_ref = bigquery_storage_v1beta1.types.TableReference()
49-
table_ref.project_id = "bigquery-public-data"
50-
table_ref.dataset_id = "utility_us"
51-
table_ref.table_id = "country_code_iso"
52-
return table_ref
53-
54-
55-
def test_read_rows_full_table(client, project_id, small_table_reference):
56-
session = client.create_read_session(
57-
small_table_reference, "projects/{}".format(project_id), requested_streams=1
58-
)
59-
60-
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
61-
stream=session.streams[0]
62-
)
63-
blocks = list(client.read_rows(stream_pos))
64-
65-
assert len(blocks) > 0
66-
block = blocks[0]
67-
assert block.status.estimated_row_count > 0
68-
assert len(block.avro_rows.serialized_binary_rows) > 0
69-
70-
7125
def test_read_rows_to_arrow(client, project_id):
7226
table_ref = bigquery_storage_v1beta1.types.TableReference()
7327
table_ref.project_id = "bigquery-public-data"
@@ -102,47 +56,26 @@ def test_read_rows_to_arrow(client, project_id):
10256
assert pyarrow.types.is_string(schema.field_by_name("name").type)
10357

10458

105-
def test_read_rows_to_dataframe_w_avro(client, project_id):
106-
table_ref = bigquery_storage_v1beta1.types.TableReference()
107-
table_ref.project_id = "bigquery-public-data"
108-
table_ref.dataset_id = "new_york_citibike"
109-
table_ref.table_id = "citibike_stations"
110-
session = client.create_read_session(
111-
table_ref, "projects/{}".format(project_id), requested_streams=1
112-
)
113-
schema_type = session.WhichOneof("schema")
114-
assert schema_type == "avro_schema"
115-
116-
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
117-
stream=session.streams[0]
118-
)
119-
120-
frame = client.read_rows(stream_pos).to_dataframe(
121-
session, dtypes={"latitude": numpy.float16}
122-
)
123-
124-
# Station ID is a required field (no nulls), so the datatype should always
125-
# be integer.
126-
assert frame.station_id.dtype.name == "int64"
127-
assert frame.latitude.dtype.name == "float16"
128-
assert frame.longitude.dtype.name == "float64"
129-
assert frame["name"].str.startswith("Central Park").any()
130-
131-
132-
def test_read_rows_to_dataframe_w_arrow(client, project_id):
59+
@pytest.mark.parametrize(
60+
"data_format,expected_schema_type",
61+
(
62+
(bigquery_storage_v1beta1.enums.DataFormat.AVRO, "avro_schema"),
63+
(bigquery_storage_v1beta1.enums.DataFormat.ARROW, "arrow_schema"),
64+
),
65+
)
66+
def test_read_rows_to_dataframe(client, project_id, data_format, expected_schema_type):
13367
table_ref = bigquery_storage_v1beta1.types.TableReference()
13468
table_ref.project_id = "bigquery-public-data"
13569
table_ref.dataset_id = "new_york_citibike"
13670
table_ref.table_id = "citibike_stations"
137-
13871
session = client.create_read_session(
13972
table_ref,
14073
"projects/{}".format(project_id),
141-
format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW,
74+
format_=data_format,
14275
requested_streams=1,
14376
)
14477
schema_type = session.WhichOneof("schema")
145-
assert schema_type == "arrow_schema"
78+
assert schema_type == expected_schema_type
14679

14780
stream_pos = bigquery_storage_v1beta1.types.StreamPosition(
14881
stream=session.streams[0]

0 commit comments

Comments
 (0)