Skip to content

Commit 28a5750

Browse files
feat: Adds dataset_view parameter to get_dataset method (#2198)
* feat: Add dataset_view parameter to get_dataset method This commit introduces a new `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This allows you to specify the level of detail (METADATA, ACL, FULL) returned when fetching a dataset. The `DatasetView` enum has been added to `enums.py`. Unit tests have been added to verify: - Correct query parameter (`view`) formation for each enum value. - Correct behavior when `dataset_view` is None. - AttributeError is raised for invalid `dataset_view` types. * test edits, linting, etc. * Fixes docstring * updates docstrings * update parameter name to align with discovery doc * Update google/cloud/bigquery/client.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent eb9c2af commit 28a5750

File tree

4 files changed

+116
-5
lines changed

4 files changed

+116
-5
lines changed

google/cloud/bigquery/client.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@
9090
from google.cloud.bigquery.dataset import Dataset
9191
from google.cloud.bigquery.dataset import DatasetListItem
9292
from google.cloud.bigquery.dataset import DatasetReference
93-
from google.cloud.bigquery.enums import AutoRowIDs
94-
from google.cloud.bigquery.enums import UpdateMode
93+
94+
from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode
9595
from google.cloud.bigquery.format_options import ParquetOptions
9696
from google.cloud.bigquery.job import (
9797
CopyJob,
@@ -865,6 +865,7 @@ def get_dataset(
865865
dataset_ref: Union[DatasetReference, str],
866866
retry: retries.Retry = DEFAULT_RETRY,
867867
timeout: TimeoutType = DEFAULT_TIMEOUT,
868+
dataset_view: Optional[DatasetView] = None,
868869
) -> Dataset:
869870
"""Fetch the dataset referenced by ``dataset_ref``
870871
@@ -882,7 +883,21 @@ def get_dataset(
882883
timeout (Optional[float]):
883884
The number of seconds to wait for the underlying HTTP transport
884885
before using ``retry``.
886+
dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]):
887+
Specifies the view that determines which dataset information is
888+
returned. By default, dataset metadata (e.g. friendlyName, description,
889+
labels, etc) and ACL information are returned. This argument can
890+
take on the following possible enum values.
885891
892+
* :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`:
893+
Includes dataset metadata and the ACL.
894+
* :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`:
895+
Includes all dataset metadata, including the ACL and table metadata.
896+
This view is not supported by the `datasets.list` API method.
897+
* :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`:
898+
Includes basic dataset metadata, but not the ACL.
899+
* :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`:
900+
The server will decide which view to use. Currently defaults to FULL.
886901
Returns:
887902
google.cloud.bigquery.dataset.Dataset:
888903
A ``Dataset`` instance.
@@ -892,6 +907,12 @@ def get_dataset(
892907
dataset_ref, default_project=self.project
893908
)
894909
path = dataset_ref.path
910+
911+
if dataset_view:
912+
query_params = {"datasetView": dataset_view.value}
913+
else:
914+
query_params = {}
915+
895916
span_attributes = {"path": path}
896917
api_response = self._call_api(
897918
retry,
@@ -900,6 +921,7 @@ def get_dataset(
900921
method="GET",
901922
path=path,
902923
timeout=timeout,
924+
query_params=query_params,
903925
)
904926
return Dataset.from_api_repr(api_response)
905927

google/cloud/bigquery/enums.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,24 @@ class CreateDisposition(object):
8080
returned in the job result."""
8181

8282

83+
class DatasetView(enum.Enum):
84+
"""DatasetView specifies which dataset information is returned."""
85+
86+
DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED"
87+
"""The default value. Currently maps to the FULL view."""
88+
89+
METADATA = "METADATA"
90+
"""View metadata information for the dataset, such as friendlyName,
91+
description, labels, etc."""
92+
93+
ACL = "ACL"
94+
"""View ACL information for the dataset, which defines dataset access
95+
for one or more entities."""
96+
97+
FULL = "FULL"
98+
"""View both dataset metadata and ACL information."""
99+
100+
83101
class DefaultPandasDTypes(enum.Enum):
84102
"""Default Pandas DataFrem DTypes to convert BigQuery data. These
85103
Sentinel values are used instead of None to maintain backward compatibility,

tests/unit/test_client.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
from google.cloud.bigquery import job as bqjob
6262
import google.cloud.bigquery._job_helpers
6363
from google.cloud.bigquery.dataset import DatasetReference, Dataset
64-
from google.cloud.bigquery.enums import UpdateMode
64+
from google.cloud.bigquery.enums import UpdateMode, DatasetView
6565
from google.cloud.bigquery import exceptions
6666
from google.cloud.bigquery import ParquetOptions
6767
import google.cloud.bigquery.retry
@@ -753,7 +753,7 @@ def test_get_dataset(self):
753753
final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None)
754754

755755
conn.api_request.assert_called_once_with(
756-
method="GET", path="/%s" % path, timeout=7.5
756+
method="GET", path="/%s" % path, timeout=7.5, query_params={}
757757
)
758758
self.assertEqual(dataset.dataset_id, self.DS_ID)
759759

@@ -819,6 +819,72 @@ def test_get_dataset(self):
819819

820820
self.assertEqual(dataset.dataset_id, self.DS_ID)
821821

822+
def test_get_dataset_with_dataset_view(self):
823+
path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID)
824+
creds = _make_credentials()
825+
http = object()
826+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
827+
resource = {
828+
"id": "%s:%s" % (self.PROJECT, self.DS_ID),
829+
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
830+
}
831+
dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
832+
833+
test_cases = [
834+
(None, None),
835+
(DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"),
836+
(DatasetView.METADATA, "METADATA"),
837+
(DatasetView.ACL, "ACL"),
838+
(DatasetView.FULL, "FULL"),
839+
]
840+
841+
for dataset_view_arg, expected_param_value in test_cases:
842+
with self.subTest(
843+
dataset_view_arg=dataset_view_arg,
844+
expected_param_value=expected_param_value,
845+
):
846+
# Re-initialize the connection mock for each sub-test to reset side_effect
847+
conn = client._connection = make_connection(resource)
848+
849+
dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg)
850+
851+
self.assertEqual(dataset.dataset_id, self.DS_ID)
852+
853+
if expected_param_value:
854+
expected_query_params = {"datasetView": expected_param_value}
855+
else:
856+
expected_query_params = {}
857+
858+
conn.api_request.assert_called_once_with(
859+
method="GET",
860+
path="/%s" % path,
861+
timeout=DEFAULT_TIMEOUT,
862+
query_params=expected_query_params if expected_query_params else {},
863+
)
864+
865+
def test_get_dataset_with_invalid_dataset_view(self):
866+
invalid_view_values = [
867+
"INVALID_STRING",
868+
123,
869+
123.45,
870+
object(),
871+
]
872+
creds = _make_credentials()
873+
http = object()
874+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
875+
resource = {
876+
"id": "%s:%s" % (self.PROJECT, self.DS_ID),
877+
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
878+
}
879+
conn = client._connection = make_connection(resource)
880+
dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
881+
882+
for invalid_view_value in invalid_view_values:
883+
with self.subTest(invalid_view_value=invalid_view_value):
884+
conn.api_request.reset_mock() # Reset mock for each sub-test
885+
with self.assertRaises(AttributeError):
886+
client.get_dataset(dataset_ref, dataset_view=invalid_view_value)
887+
822888
def test_ensure_bqstorage_client_creating_new_instance(self):
823889
bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
824890

tests/unit/test_create_dataset.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION)
372372
},
373373
timeout=DEFAULT_TIMEOUT,
374374
),
375-
mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
375+
mock.call(
376+
method="GET",
377+
path=get_path,
378+
timeout=DEFAULT_TIMEOUT,
379+
query_params={},
380+
),
376381
]
377382
)
378383

0 commit comments

Comments
 (0)