Skip to content

Commit e346117

Browse files
authored
feat: add wait for creation and more informative exception when properties are not available (#566)
1 parent c6614cd commit e346117

20 files changed

+685
-65
lines changed

google/cloud/aiplatform/base.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import logging
2424
import sys
2525
import threading
26+
import time
2627
from typing import (
2728
Any,
2829
Callable,
@@ -540,21 +541,25 @@ def _sync_gca_resource(self):
540541
@property
541542
def name(self) -> str:
542543
"""Name of this resource."""
544+
self._assert_gca_resource_is_available()
543545
return self._gca_resource.name.split("/")[-1]
544546

545547
@property
546548
def resource_name(self) -> str:
547549
"""Full qualified resource name."""
550+
self._assert_gca_resource_is_available()
548551
return self._gca_resource.name
549552

550553
@property
551554
def display_name(self) -> str:
552555
"""Display name of this resource."""
556+
self._assert_gca_resource_is_available()
553557
return self._gca_resource.display_name
554558

555559
@property
556560
def create_time(self) -> datetime.datetime:
557561
"""Time this resource was created."""
562+
self._assert_gca_resource_is_available()
558563
return self._gca_resource.create_time
559564

560565
@property
@@ -570,6 +575,7 @@ def encryption_spec(self) -> Optional[gca_encryption_spec.EncryptionSpec]:
570575
If this is set, then all resources created by this Vertex AI resource will
571576
be encrypted with the provided encryption key.
572577
"""
578+
self._assert_gca_resource_is_available()
573579
return getattr(self._gca_resource, "encryption_spec")
574580

575581
@property
@@ -578,13 +584,26 @@ def labels(self) -> Dict[str, str]:
578584
579585
Read more about labels at https://goo.gl/xmQnxf
580586
"""
587+
self._assert_gca_resource_is_available()
581588
return self._gca_resource.labels
582589

583590
@property
584591
def gca_resource(self) -> proto.Message:
585592
"""The underlying resource proto representation."""
593+
self._assert_gca_resource_is_available()
586594
return self._gca_resource
587595

596+
def _assert_gca_resource_is_available(self) -> None:
597+
"""Helper method to raise when property is not accessible.
598+
599+
Raises:
600+
RuntimeError if _gca_resource is has not been created.
601+
"""
602+
if self._gca_resource is None:
603+
raise RuntimeError(
604+
f"{self.__class__.__name__} resource has not been created"
605+
)
606+
588607
def __repr__(self) -> str:
589608
return f"{object.__repr__(self)} \nresource name: {self.resource_name}"
590609

@@ -1061,6 +1080,56 @@ def __repr__(self) -> str:
10611080

10621081
return FutureManager.__repr__(self)
10631082

1083+
def _wait_for_resource_creation(self) -> None:
1084+
"""Wait until underlying resource is created.
1085+
1086+
Currently this should only be used on subclasses that implement the construct then
1087+
`run` pattern because the underlying sync=False implementation will not update
1088+
downstream resource noun object's _gca_resource until the entire invoked method is complete.
1089+
1090+
Ex:
1091+
job = CustomTrainingJob()
1092+
job.run(sync=False, ...)
1093+
job._wait_for_resource_creation()
1094+
Raises:
1095+
RuntimeError if the resource has not been scheduled to be created.
1096+
"""
1097+
1098+
# If the user calls this but didn't actually invoke an API to create
1099+
if self._are_futures_done() and not getattr(self._gca_resource, "name", None):
1100+
self._raise_future_exception()
1101+
raise RuntimeError(
1102+
f"{self.__class__.__name__} resource is not scheduled to be created."
1103+
)
1104+
1105+
while not getattr(self._gca_resource, "name", None):
1106+
# breaks out of loop if creation has failed async
1107+
if self._are_futures_done() and not getattr(
1108+
self._gca_resource, "name", None
1109+
):
1110+
self._raise_future_exception()
1111+
1112+
time.sleep(1)
1113+
1114+
def _assert_gca_resource_is_available(self) -> None:
1115+
"""Helper method to raise when accessing properties that do not exist.
1116+
1117+
Overrides VertexAiResourceNoun to provide a more informative exception if
1118+
resource creation has failed asynchronously.
1119+
1120+
Raises:
1121+
RuntimeError when resource has not been created.
1122+
"""
1123+
if not getattr(self._gca_resource, "name", None):
1124+
raise RuntimeError(
1125+
f"{self.__class__.__name__} resource has not been created."
1126+
+ (
1127+
f" Resource failed with: {self._exception}"
1128+
if self._exception
1129+
else ""
1130+
)
1131+
)
1132+
10641133

10651134
def get_annotation_class(annotation: type) -> type:
10661135
"""Helper method to retrieve type annotation.

google/cloud/aiplatform/datasets/dataset.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def __init__(
8484
@property
8585
def metadata_schema_uri(self) -> str:
8686
"""The metadata schema uri of this dataset resource."""
87+
self._assert_gca_resource_is_available()
8788
return self._gca_resource.metadata_schema_uri
8889

8990
def _validate_metadata_schema_uri(self) -> None:

google/cloud/aiplatform/datasets/tabular_dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ def column_names(self) -> List[str]:
5252
RuntimeError: When no valid source is found.
5353
"""
5454

55+
self._assert_gca_resource_is_available()
56+
5557
metadata = self._gca_resource.metadata
5658

5759
if metadata is None:

google/cloud/aiplatform/jobs.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -330,18 +330,21 @@ def output_info(self,) -> Optional[aiplatform.gapic.BatchPredictionJob.OutputInf
330330
331331
This is only available for batch predicition jobs that have run successfully.
332332
"""
333+
self._assert_gca_resource_is_available()
333334
return self._gca_resource.output_info
334335

335336
@property
336337
def partial_failures(self) -> Optional[Sequence[status_pb2.Status]]:
337338
"""Partial failures encountered. For example, single files that can't be read.
338339
This field never exceeds 20 entries. Status details fields contain standard
339340
GCP error details."""
341+
self._assert_gca_resource_is_available()
340342
return getattr(self._gca_resource, "partial_failures")
341343

342344
@property
343345
def completion_stats(self) -> Optional[gca_completion_stats.CompletionStats]:
344346
"""Statistics on completed and failed prediction instances."""
347+
self._assert_gca_resource_is_available()
345348
return getattr(self._gca_resource, "completion_stats")
346349

347350
@classmethod
@@ -772,6 +775,8 @@ def iter_outputs(
772775
GCS or BQ output provided.
773776
"""
774777

778+
self._assert_gca_resource_is_available()
779+
775780
if self.state != gca_job_state.JobState.JOB_STATE_SUCCEEDED:
776781
raise RuntimeError(
777782
f"Cannot read outputs until BatchPredictionJob has succeeded, "
@@ -859,23 +864,6 @@ def __init__(
859864
def run(self) -> None:
860865
pass
861866

862-
@property
863-
def _has_run(self) -> bool:
864-
"""Property returns true if this class has a resource name."""
865-
return bool(self._gca_resource.name)
866-
867-
@property
868-
def state(self) -> gca_job_state.JobState:
869-
"""Current state of job.
870-
871-
Raises:
872-
RuntimeError if job run has not been called.
873-
"""
874-
if not self._has_run:
875-
raise RuntimeError("Job has not run. No state available.")
876-
877-
return super().state
878-
879867
@classmethod
880868
def get(
881869
cls,
@@ -913,6 +901,10 @@ def get(
913901

914902
return self
915903

904+
def wait_for_resource_creation(self) -> None:
905+
"""Waits until resource has been created."""
906+
self._wait_for_resource_creation()
907+
916908

917909
class DataLabelingJob(_Job):
918910
_resource_noun = "dataLabelingJobs"
@@ -1041,7 +1033,8 @@ def network(self) -> Optional[str]:
10411033
Private services access must already be configured for the network. If left
10421034
unspecified, the CustomJob is not peered with any network.
10431035
"""
1044-
return getattr(self._gca_resource, "network")
1036+
self._assert_gca_resource_is_available()
1037+
return self._gca_resource.job_spec.network
10451038

10461039
@classmethod
10471040
def from_local_script(
@@ -1512,6 +1505,7 @@ def network(self) -> Optional[str]:
15121505
Private services access must already be configured for the network. If left
15131506
unspecified, the HyperparameterTuningJob is not peered with any network.
15141507
"""
1508+
self._assert_gca_resource_is_available()
15151509
return getattr(self._gca_resource.trial_job_spec, "network")
15161510

15171511
@base.optional_sync()
@@ -1612,4 +1606,5 @@ def run(
16121606

16131607
@property
16141608
def trials(self) -> List[gca_study_compat.Trial]:
1609+
self._assert_gca_resource_is_available()
16151610
return list(self._gca_resource.trials)

google/cloud/aiplatform/models.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def network(self) -> Optional[str]:
146146
Private services access must already be configured for the network. If left
147147
unspecified, the Endpoint is not peered with any network.
148148
"""
149-
return getattr(self._gca_resource, "network")
149+
self._assert_gca_resource_is_available()
150+
return getattr(self._gca_resource, "network", None)
150151

151152
@classmethod
152153
def create(
@@ -1283,11 +1284,13 @@ class Model(base.VertexAiResourceNounWithFutureManager):
12831284
def uri(self) -> Optional[str]:
12841285
"""Path to the directory containing the Model artifact and any of its
12851286
supporting files. Not present for AutoML Models."""
1287+
self._assert_gca_resource_is_available()
12861288
return self._gca_resource.artifact_uri or None
12871289

12881290
@property
12891291
def description(self) -> str:
12901292
"""Description of the model."""
1293+
self._assert_gca_resource_is_available()
12911294
return self._gca_resource.description
12921295

12931296
@property
@@ -1302,6 +1305,7 @@ def supported_export_formats(
13021305
13031306
{'tf-saved-model': [<ExportableContent.ARTIFACT: 1>]}
13041307
"""
1308+
self._assert_gca_resource_is_available()
13051309
return {
13061310
export_format.id: [
13071311
gca_model_compat.Model.ExportFormat.ExportableContent(content)
@@ -1328,6 +1332,7 @@ def supported_deployment_resources_types(
13281332
predictions by using a `BatchPredictionJob`, if it has at least one entry
13291333
each in `Model.supported_input_storage_formats` and
13301334
`Model.supported_output_storage_formats`."""
1335+
self._assert_gca_resource_is_available()
13311336
return list(self._gca_resource.supported_deployment_resources_types)
13321337

13331338
@property
@@ -1343,6 +1348,7 @@ def supported_input_storage_formats(self) -> List[str]:
13431348
`supported_deployment_resources_types`, it could serve online predictions
13441349
by using `Endpoint.predict()` or `Endpoint.explain()`.
13451350
"""
1351+
self._assert_gca_resource_is_available()
13461352
return list(self._gca_resource.supported_input_storage_formats)
13471353

13481354
@property
@@ -1363,12 +1369,14 @@ def supported_output_storage_formats(self) -> List[str]:
13631369
`supported_deployment_resources_types`, it could serve online predictions
13641370
by using `Endpoint.predict()` or `Endpoint.explain()`.
13651371
"""
1372+
self._assert_gca_resource_is_available()
13661373
return list(self._gca_resource.supported_output_storage_formats)
13671374

13681375
@property
13691376
def predict_schemata(self) -> Optional[aiplatform.gapic.PredictSchemata]:
13701377
"""The schemata that describe formats of the Model's predictions and
13711378
explanations, if available."""
1379+
self._assert_gca_resource_is_available()
13721380
return getattr(self._gca_resource, "predict_schemata")
13731381

13741382
@property
@@ -1379,6 +1387,7 @@ def training_job(self) -> Optional["aiplatform.training_jobs._TrainingJob"]:
13791387
api_core.exceptions.NotFound: If the Model's training job resource
13801388
cannot be found on the Vertex service.
13811389
"""
1390+
self._assert_gca_resource_is_available()
13821391
job_name = getattr(self._gca_resource, "training_pipeline")
13831392

13841393
if not job_name:
@@ -1400,6 +1409,7 @@ def training_job(self) -> Optional["aiplatform.training_jobs._TrainingJob"]:
14001409
def container_spec(self) -> Optional[aiplatform.gapic.ModelContainerSpec]:
14011410
"""The specification of the container that is to be used when deploying
14021411
this Model. Not present for AutoML Models."""
1412+
self._assert_gca_resource_is_available()
14031413
return getattr(self._gca_resource, "container_spec")
14041414

14051415
def __init__(

google/cloud/aiplatform/pipeline_jobs.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,18 @@ def __init__(
220220
),
221221
)
222222

223+
def _assert_gca_resource_is_available(self) -> None:
224+
# TODO(b/193800063) Change this to name after this fix
225+
if not getattr(self._gca_resource, "create_time", None):
226+
raise RuntimeError(
227+
f"{self.__class__.__name__} resource has not been created."
228+
+ (
229+
f" Resource failed with: {self._exception}"
230+
if self._exception
231+
else ""
232+
)
233+
)
234+
223235
@base.optional_sync()
224236
def run(
225237
self,
@@ -236,6 +248,7 @@ def run(
236248
network (str):
237249
Optional. The full name of the Compute Engine network to which the job
238250
should be peered. For example, projects/12345/global/networks/myVPC.
251+
239252
Private services access must already be configured for the network.
240253
If left unspecified, the job is not peered with any network.
241254
sync (bool):
@@ -272,17 +285,9 @@ def pipeline_spec(self):
272285
@property
273286
def state(self) -> Optional[gca_pipeline_state_v1beta1.PipelineState]:
274287
"""Current pipeline state."""
275-
if not self._has_run:
276-
raise RuntimeError("Job has not run. No state available.")
277-
278288
self._sync_gca_resource()
279289
return self._gca_resource.state
280290

281-
@property
282-
def _has_run(self) -> bool:
283-
"""Helper property to check if this pipeline job has been run."""
284-
return bool(self._gca_resource.create_time)
285-
286291
@property
287292
def has_failed(self) -> bool:
288293
"""Returns True if pipeline has failed.
@@ -300,10 +305,6 @@ def _dashboard_uri(self) -> str:
300305
url = f"https://console.cloud.google.com/vertex-ai/locations/{fields.location}/pipelines/runs/{fields.id}?project={fields.project}"
301306
return url
302307

303-
def _sync_gca_resource(self):
304-
"""Helper method to sync the local gca_source against the service."""
305-
self._gca_resource = self.api_client.get_pipeline_job(name=self.resource_name)
306-
307308
def _block_until_complete(self):
308309
"""Helper method to block and check on job until complete."""
309310
# Used these numbers so failures surface fast
@@ -377,13 +378,9 @@ def cancel(self) -> None:
377378
makes a best effort to cancel the job, but success is not guaranteed.
378379
On successful cancellation, the PipelineJob is not deleted; instead it
379380
becomes a job with state set to `CANCELLED`.
380-
381-
Raises:
382-
RuntimeError: If this PipelineJob has not started running.
383381
"""
384-
if not self._has_run:
385-
raise RuntimeError(
386-
"This PipelineJob has not been launched, use the `run()` method "
387-
"to start. `cancel()` can only be called on a job that is running."
388-
)
389382
self.api_client.cancel_pipeline_job(name=self.resource_name)
383+
384+
def wait_for_resource_creation(self) -> None:
385+
"""Waits until resource has been created."""
386+
self._wait_for_resource_creation()

0 commit comments

Comments
 (0)