Skip to content

Commit 64dfdbc

Browse files
Frances Hubis Thomacopybara-github
authored andcommitted
feat: Add MultimodalDataset.toBigframes() method to convert dataset to a Bigframes Dataframe object and inspect the dataset in the notebook.
PiperOrigin-RevId: 758308530
1 parent 0c4f4a6 commit 64dfdbc

File tree

2 files changed

+31
-0
lines changed

2 files changed

+31
-0
lines changed

google/cloud/aiplatform/preview/datasets.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,18 @@ def from_gemini_request_jsonl(
10931093
create_request_timeout=create_request_timeout,
10941094
)
10951095

1096+
def to_bigframes(self) -> "bigframes.pandas.DataFrame": # type: ignore # noqa: F821
1097+
"""Converts a multimodal dataset to a BigFrames dataframe.
1098+
1099+
This is the preferred method to inspect the multimodal dataset in a
1100+
notebook.
1101+
1102+
Returns:
1103+
A BigFrames dataframe.
1104+
"""
1105+
bigframes = _try_import_bigframes()
1106+
return bigframes.pandas.read_gbq_table(self.bigquery_table().lstrip("bq://"))
1107+
10961108
@classmethod
10971109
@base.optional_sync()
10981110
def _create_from_bigquery(

tests/system/aiplatform/test_multimodal_dataset.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,25 @@ def test_create_from_bigframes(self, shared_state):
171171
bigquery_client.delete_table(table_id, not_found_ok=True)
172172
ds.delete()
173173

174+
def test_export_to_bigframes(self, shared_state):
175+
assert shared_state["bigquery_client"]
176+
bigquery_client = shared_state["bigquery_client"]
177+
178+
bpd.options.bigquery.project = _TEST_PROJECT
179+
bpd.options.bigquery.location = _TEST_LOCATION
180+
181+
try:
182+
bf_df_source = bpd.DataFrame(_TEST_DATASET)
183+
ds = datasets.MultimodalDataset.from_bigframes(dataframe=bf_df_source)
184+
bf_df_exported = ds.to_bigframes()
185+
table_id = _uri_to_table_id(ds.bigquery_table)
186+
187+
assert len(bf_df_exported) == len(_TEST_DATASET)
188+
assert set(bf_df_exported.columns) == {"Question", "Answer"}
189+
finally:
190+
bigquery_client.delete_table(table_id, not_found_ok=True)
191+
ds.delete()
192+
174193
def test_assemble_dataset(self, shared_state):
175194
assert shared_state["bigquery_client"]
176195
assert shared_state["bigquery_test_table"]

0 commit comments

Comments
 (0)