Skip to content

Commit 6e5c421

Browse files
Frances Hubis Thomacopybara-github
authored andcommitted
feat: ummd.MultimodalDataset.from_bigquery() now also accepts a table id (not just a BQ table URI).
PiperOrigin-RevId: 781393598
1 parent 262fbc3 commit 6e5c421

File tree

3 files changed

+27
-14
lines changed

3 files changed

+27
-14
lines changed

google/cloud/aiplatform/preview/datasets.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -690,7 +690,7 @@ def bigquery_table(self) -> str:
690690
def from_bigquery(
691691
cls,
692692
*,
693-
bigquery_uri: str,
693+
bigquery_source: str,
694694
display_name: Optional[str] = None,
695695
project: Optional[str] = None,
696696
location: Optional[str] = None,
@@ -702,10 +702,10 @@ def from_bigquery(
702702
"""Creates a multimodal dataset from a BigQuery table.
703703
704704
Args:
705-
bigquery_uri (str):
706-
Required. The BigQuery table URI to be used for the created
707-
dataset. The table uri can be in the format of
708-
"bq://dataset.table" or "bq://project.dataset.table".
705+
bigquery_source (str):
706+
Required. The BigQuery table URI or ID to be used for the created
707+
dataset, which can be in the format of "bq://dataset.table",
708+
"bq://project.dataset.table" or "project.dataset.table".
709709
display_name (str):
710710
Optional. The user-defined name of the dataset. The name can be
711711
up to 128 characters long and can consist of any UTF-8
@@ -741,9 +741,10 @@ def from_bigquery(
741741
dataset (MultimodalDataset):
742742
The created multimodal dataset.
743743
"""
744+
if not bigquery_source.startswith("bq://"):
745+
bigquery_source = f"bq://{bigquery_source}"
744746
return cls._create_from_bigquery(
745-
bigquery_uri=bigquery_uri,
746-
metadata=_get_metadata_for_bq(bq_uri=bigquery_uri),
747+
metadata=_get_metadata_for_bq(bq_uri=bigquery_source),
747748
display_name=display_name,
748749
project=project,
749750
location=location,
@@ -856,7 +857,6 @@ def from_pandas(
856857

857858
bigquery_uri = f"bq://{target_table_id}"
858859
return cls._create_from_bigquery(
859-
bigquery_uri=bigquery_uri,
860860
metadata=_get_metadata_for_bq(bq_uri=bigquery_uri),
861861
display_name=display_name,
862862
project=project,
@@ -958,7 +958,6 @@ def from_bigframes(
958958

959959
bigquery_uri = f"bq://{target_table_id}"
960960
return cls._create_from_bigquery(
961-
bigquery_uri=bigquery_uri,
962961
metadata=_get_metadata_for_bq(bq_uri=bigquery_uri),
963962
display_name=display_name,
964963
project=project,
@@ -1095,7 +1094,6 @@ def from_gemini_request_jsonl(
10951094

10961095
bigquery_uri = f"bq://{target_table_id}"
10971096
return cls._create_from_bigquery(
1098-
bigquery_uri=bigquery_uri,
10991097
metadata=_get_metadata_for_bq(
11001098
bq_uri=bigquery_uri, request_column_name=request_column_name
11011099
),
@@ -1125,7 +1123,6 @@ def to_bigframes(self) -> "bigframes.pandas.DataFrame": # type: ignore # noqa:
11251123
def _create_from_bigquery(
11261124
cls,
11271125
*,
1128-
bigquery_uri: str,
11291126
metadata: struct_pb2.Value,
11301127
display_name: Optional[str] = None,
11311128
project: Optional[str] = None,

tests/system/aiplatform/test_multimodal_dataset.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,30 @@ def copy_sample_data(self, shared_state):
105105
# to clean up the table.
106106
yield
107107

108-
def test_create_new_dataset(self, shared_state):
108+
def test_create_new_dataset_from_bq_table_uri(self, shared_state):
109109
assert shared_state["bigquery_test_table"]
110110
bigquery_table = f"bq://{shared_state['bigquery_test_table']}"
111111
display_name = "test dataset"
112112
labels = {"label1": "value1", "label2": "value2"}
113113
try:
114114
ds = datasets.MultimodalDataset.from_bigquery(
115-
bigquery_uri=bigquery_table, display_name=display_name, labels=labels
115+
bigquery_source=bigquery_table, display_name=display_name, labels=labels
116+
)
117+
assert ds.display_name == display_name
118+
assert ds.bigquery_table == bigquery_table
119+
assert ds.labels == labels
120+
assert ds.location == _TEST_LOCATION
121+
finally:
122+
ds.delete()
123+
124+
def test_create_new_dataset_from_bq_table_id(self, shared_state):
125+
assert shared_state["bigquery_test_table"]
126+
bigquery_table = f"{shared_state['bigquery_test_table']}"
127+
display_name = "test dataset"
128+
labels = {"label1": "value1", "label2": "value2"}
129+
try:
130+
ds = datasets.MultimodalDataset.from_bigquery(
131+
bigquery_source=bigquery_table, display_name=display_name, labels=labels
116132
)
117133
assert ds.display_name == display_name
118134
assert ds.bigquery_table == bigquery_table

tests/unit/aiplatform/test_multimodal_datasets.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ def test_dataset_bigquery_table(self):
438438
def test_create_dataset_from_bigquery(self, create_dataset_mock, sync):
439439
aiplatform.init(project=_TEST_PROJECT)
440440
new_dataset = ummd.MultimodalDataset.from_bigquery(
441-
bigquery_uri=_TEST_SOURCE_URI_BQ,
441+
bigquery_source=_TEST_SOURCE_URI_BQ,
442442
display_name=_TEST_DISPLAY_NAME,
443443
sync=sync,
444444
)

0 commit comments

Comments
 (0)