Skip to content

Commit 1ceb2e1

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Vertex AI Model Garden custom model deploy SDK Public Preview
PiperOrigin-RevId: 785505074
1 parent 0e9f3df commit 1ceb2e1

File tree

3 files changed

+332
-10
lines changed

3 files changed

+332
-10
lines changed

tests/unit/vertexai/model_garden/test_model_garden.py

Lines changed: 118 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535
from google.cloud.aiplatform_v1beta1.services import model_garden_service
3636
from vertexai import batch_prediction
3737
from vertexai import model_garden
38+
from vertexai.preview import (
39+
model_garden as model_garden_preview,
40+
)
3841
import pytest
3942

4043
from google.protobuf import duration_pb2
@@ -65,8 +68,9 @@
6568
_TEST_GCS_URI = "gs://some-bucket/some-model"
6669
_TEST_ENDPOINT_NAME = "projects/test-project/locations/us-central1/endpoints/1234567890"
6770
_TEST_MODEL_NAME = "projects/test-project/locations/us-central1/models/9876543210"
71+
_TEST_IMAGE_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00"
6872
_TEST_MODEL_CONTAINER_SPEC = types.ModelContainerSpec(
69-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
73+
image_uri=_TEST_IMAGE_URI,
7074
command=["python", "main.py"],
7175
args=["--model-id=gemma-2b"],
7276
env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
@@ -183,7 +187,7 @@ def get_publisher_model_mock():
183187
types.PublisherModel.CallToAction.Deploy(
184188
deploy_task_name="vLLM 32K context",
185189
container_spec=types.ModelContainerSpec(
186-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
190+
image_uri=_TEST_IMAGE_URI,
187191
command=["python", "main.py"],
188192
args=["--model-id=gemma-2b"],
189193
env=[
@@ -227,7 +231,7 @@ def get_publisher_model_mock():
227231
multi_deploy_vertex=[
228232
types.PublisherModel.CallToAction.Deploy(
229233
container_spec=types.ModelContainerSpec(
230-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
234+
image_uri=_TEST_IMAGE_URI,
231235
command=["python", "main.py"],
232236
args=["--model-id=gemma-2b"],
233237
env=[
@@ -843,15 +847,15 @@ def test_deploy_with_serving_container_image_success(self, deploy_mock):
843847
)
844848
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
845849
model.deploy(
846-
serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
850+
serving_container_image_uri=_TEST_IMAGE_URI,
847851
)
848852
deploy_mock.assert_called_once_with(
849853
types.DeployRequest(
850854
publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
851855
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
852856
model_config=types.DeployRequest.ModelConfig(
853857
container_spec=types.ModelContainerSpec(
854-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
858+
image_uri=_TEST_IMAGE_URI,
855859
)
856860
),
857861
)
@@ -912,11 +916,11 @@ def test_deploy_with_serving_container_spec_with_both_image_uri_raises_error(
912916
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
913917
model.deploy(
914918
serving_container_spec=types.ModelContainerSpec(
915-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
919+
image_uri=_TEST_IMAGE_URI,
916920
predict_route="/predictions/v1/predict",
917921
health_route="/ping",
918922
),
919-
serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
923+
serving_container_image_uri=_TEST_IMAGE_URI,
920924
)
921925
assert str(exception.value) == expected_message
922926

@@ -930,7 +934,7 @@ def test_deploy_with_serving_container_spec_individual_fields_success(
930934
)
931935
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
932936
model.deploy(
933-
serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
937+
serving_container_image_uri=_TEST_IMAGE_URI,
934938
serving_container_predict_route="/predictions/v1/predict",
935939
serving_container_health_route="/ping",
936940
serving_container_command=["python", "main.py"],
@@ -953,7 +957,7 @@ def test_deploy_with_serving_container_spec_individual_fields_success(
953957
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
954958
model_config=types.DeployRequest.ModelConfig(
955959
container_spec=types.ModelContainerSpec(
956-
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
960+
image_uri=_TEST_IMAGE_URI,
957961
command=["python", "main.py"],
958962
args=["--model-id=gemma-2b"],
959963
env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
@@ -1211,3 +1215,108 @@ def test_accept_model_license_agreement_success(
12111215
publisher_model=_TEST_MODEL_FULL_RESOURCE_NAME,
12121216
publisher_model_eula_acked=True,
12131217
)
1218+
1219+
1220+
pytest.mark.usefixtures(
1221+
"google_auth_mock",
1222+
"deploy_mock",
1223+
)
1224+
1225+
1226+
class TestModelGardenCustomModel:
1227+
"""Test cases for ModelGarden class."""
1228+
1229+
def setup_method(self):
1230+
importlib.reload(aiplatform.initializer)
1231+
importlib.reload(aiplatform)
1232+
aiplatform.init(project=_TEST_PROJECT)
1233+
1234+
def teardown_method(self):
1235+
aiplatform.initializer.global_pool.shutdown(wait=True)
1236+
1237+
def test_deploy_custom_model_gcs_uri_only_success(self, deploy_mock):
1238+
aiplatform.init(
1239+
project=_TEST_PROJECT,
1240+
location=_TEST_LOCATION,
1241+
)
1242+
model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
1243+
model.deploy()
1244+
deploy_mock.assert_called_once_with(
1245+
types.DeployRequest(
1246+
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
1247+
custom_model=types.DeployRequest.CustomModel(
1248+
gcs_uri=_TEST_GCS_URI,
1249+
),
1250+
deploy_config=types.DeployRequest.DeployConfig(
1251+
dedicated_resources=types.DedicatedResources(
1252+
min_replica_count=1,
1253+
max_replica_count=1,
1254+
)
1255+
),
1256+
)
1257+
)
1258+
1259+
def test_deploy_custom_model_no_gcs_uri_raise_error(self, deploy_mock):
1260+
aiplatform.init(
1261+
project=_TEST_PROJECT,
1262+
location=_TEST_LOCATION,
1263+
)
1264+
with pytest.raises(ValueError) as exception:
1265+
model = model_garden_preview.CustomModel()
1266+
model.deploy()
1267+
assert str(exception.value) == "gcs_uri must be specified."
1268+
1269+
def test_deploy_custom_model_machine_type_only_raise_error(self, deploy_mock):
1270+
aiplatform.init(
1271+
project=_TEST_PROJECT,
1272+
location=_TEST_LOCATION,
1273+
)
1274+
with pytest.raises(ValueError) as exception:
1275+
model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
1276+
model.deploy(machine_type="n1-standard-4")
1277+
assert (
1278+
str(exception.value)
1279+
== "machine_type, accelerator_type and accelerator_count must all"
1280+
" be provided or not provided."
1281+
)
1282+
1283+
def test_deploy_custom_model_with_all_config_success(self, deploy_mock):
1284+
aiplatform.init(
1285+
project=_TEST_PROJECT,
1286+
location=_TEST_LOCATION,
1287+
)
1288+
model = model_garden_preview.CustomModel(gcs_uri=_TEST_GCS_URI)
1289+
model.deploy(
1290+
machine_type="n1-standard-4",
1291+
accelerator_type="NVIDIA_TESLA_T4",
1292+
accelerator_count=1,
1293+
min_replica_count=2,
1294+
max_replica_count=3,
1295+
endpoint_display_name="custom-mode-endpoint",
1296+
model_display_name="custom-model-id",
1297+
)
1298+
deploy_mock.assert_called_once_with(
1299+
types.DeployRequest(
1300+
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
1301+
custom_model=types.DeployRequest.CustomModel(
1302+
gcs_uri=_TEST_GCS_URI,
1303+
),
1304+
model_config=types.DeployRequest.ModelConfig(
1305+
model_display_name="custom-model-id",
1306+
),
1307+
deploy_config=types.DeployRequest.DeployConfig(
1308+
dedicated_resources=types.DedicatedResources(
1309+
min_replica_count=2,
1310+
max_replica_count=3,
1311+
machine_spec=types.MachineSpec(
1312+
machine_type="n1-standard-4",
1313+
accelerator_type="NVIDIA_TESLA_T4",
1314+
accelerator_count=1,
1315+
),
1316+
),
1317+
),
1318+
endpoint_config=types.DeployRequest.EndpointConfig(
1319+
endpoint_display_name="custom-mode-endpoint",
1320+
),
1321+
)
1322+
)

0 commit comments

Comments
 (0)