Skip to content

Commit 4df909c

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Added autoscaling_target_request_count_per_minute to model deployment on Endpoint and Model classes
PiperOrigin-RevId: 772677877
1 parent c5bb99b commit 4df909c

File tree

3 files changed

+127
-1
lines changed

3 files changed

+127
-1
lines changed

google/cloud/aiplatform/models.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,7 @@ def deploy(
13631363
deploy_request_timeout: Optional[float] = None,
13641364
autoscaling_target_cpu_utilization: Optional[int] = None,
13651365
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1366+
autoscaling_target_request_count_per_minute: Optional[int] = None,
13661367
enable_access_logging=False,
13671368
disable_container_logging: bool = False,
13681369
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
@@ -1456,6 +1457,9 @@ def deploy(
14561457
Target Accelerator Duty Cycle.
14571458
Must also set accelerator_type and accelerator_count if specified.
14581459
A default value of 60 will be used if not specified.
1460+
autoscaling_target_request_count_per_minute (int):
1461+
Optional. The target number of requests per minute for autoscaling.
1462+
If set, the model will be scaled based on the number of requests it receives.
14591463
enable_access_logging (bool):
14601464
Whether to enable endpoint access logging. Defaults to False.
14611465
disable_container_logging (bool):
@@ -1536,6 +1540,7 @@ def deploy(
15361540
deploy_request_timeout=deploy_request_timeout,
15371541
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
15381542
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
1543+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
15391544
spot=spot,
15401545
enable_access_logging=enable_access_logging,
15411546
disable_container_logging=disable_container_logging,
@@ -1568,6 +1573,7 @@ def _deploy(
15681573
deploy_request_timeout: Optional[float] = None,
15691574
autoscaling_target_cpu_utilization: Optional[int] = None,
15701575
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1576+
autoscaling_target_request_count_per_minute: Optional[int] = None,
15711577
spot: bool = False,
15721578
enable_access_logging=False,
15731579
disable_container_logging: bool = False,
@@ -1664,6 +1670,9 @@ def _deploy(
16641670
Target Accelerator Duty Cycle.
16651671
Must also set accelerator_type and accelerator_count if specified.
16661672
A default value of 60 will be used if not specified.
1673+
autoscaling_target_request_count_per_minute (int):
1674+
Optional. The target number of requests per minute for autoscaling.
1675+
If set, the model will be scaled based on the number of requests it receives.
16671676
spot (bool):
16681677
Optional. Whether to schedule the deployment workload on spot VMs.
16691678
enable_access_logging (bool):
@@ -1721,6 +1730,7 @@ def _deploy(
17211730
deploy_request_timeout=deploy_request_timeout,
17221731
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
17231732
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
1733+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
17241734
spot=spot,
17251735
enable_access_logging=enable_access_logging,
17261736
disable_container_logging=disable_container_logging,
@@ -5339,6 +5349,7 @@ def deploy(
53395349
deploy_request_timeout: Optional[float] = None,
53405350
autoscaling_target_cpu_utilization: Optional[int] = None,
53415351
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
5352+
autoscaling_target_request_count_per_minute: Optional[int] = None,
53425353
enable_access_logging=False,
53435354
disable_container_logging: bool = False,
53445355
private_service_connect_config: Optional[
@@ -5454,6 +5465,9 @@ def deploy(
54545465
Optional. Target Accelerator Duty Cycle.
54555466
Must also set accelerator_type and accelerator_count if specified.
54565467
A default value of 60 will be used if not specified.
5468+
autoscaling_target_request_count_per_minute (int):
5469+
Optional. The target number of requests per minute for autoscaling.
5470+
If set, the model will be scaled based on the number of requests it receives.
54575471
enable_access_logging (bool):
54585472
Whether to enable endpoint access logging. Defaults to False.
54595473
disable_container_logging (bool):
@@ -5561,6 +5575,7 @@ def deploy(
55615575
deploy_request_timeout=deploy_request_timeout,
55625576
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
55635577
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
5578+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
55645579
spot=spot,
55655580
enable_access_logging=enable_access_logging,
55665581
disable_container_logging=disable_container_logging,
@@ -5603,6 +5618,7 @@ def _deploy(
56035618
deploy_request_timeout: Optional[float] = None,
56045619
autoscaling_target_cpu_utilization: Optional[int] = None,
56055620
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
5621+
autoscaling_target_request_count_per_minute: Optional[int] = None,
56065622
spot: bool = False,
56075623
enable_access_logging=False,
56085624
disable_container_logging: bool = False,
@@ -5720,6 +5736,9 @@ def _deploy(
57205736
Optional. Target Accelerator Duty Cycle.
57215737
Must also set accelerator_type and accelerator_count if specified.
57225738
A default value of 60 will be used if not specified.
5739+
autoscaling_target_request_count_per_minute (int):
5740+
Optional. The target number of requests per minute for autoscaling.
5741+
If set, the model will be scaled based on the number of requests it receives.
57235742
spot (bool):
57245743
Optional. Whether to schedule the deployment workload on spot VMs.
57255744
enable_access_logging (bool):
@@ -5808,6 +5827,7 @@ def _deploy(
58085827
deploy_request_timeout=deploy_request_timeout,
58095828
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
58105829
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
5830+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
58115831
spot=spot,
58125832
enable_access_logging=enable_access_logging,
58135833
disable_container_logging=disable_container_logging,

tests/unit/aiplatform/test_endpoints.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1917,11 +1917,61 @@ def test_deploy_with_autoscaling_target_accelerator_duty_cycle_and_no_accelerato
19171917
if not sync:
19181918
test_endpoint.wait()
19191919

1920+
@pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock")
1921+
@pytest.mark.parametrize("sync", [True, False])
1922+
def test_deploy_with_autoscaling_target_request_count_per_minute(
1923+
self, deploy_model_mock, sync
1924+
):
1925+
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
1926+
test_model = models.Model(_TEST_ID)
1927+
test_model._gca_resource.supported_deployment_resources_types.append(
1928+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
1929+
)
1930+
test_endpoint.deploy(
1931+
model=test_model,
1932+
machine_type=_TEST_MACHINE_TYPE,
1933+
service_account=_TEST_SERVICE_ACCOUNT,
1934+
sync=sync,
1935+
deploy_request_timeout=None,
1936+
autoscaling_target_request_count_per_minute=600,
1937+
)
1938+
1939+
if not sync:
1940+
test_endpoint.wait()
1941+
1942+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
1943+
machine_spec=gca_machine_resources.MachineSpec(
1944+
machine_type=_TEST_MACHINE_TYPE,
1945+
),
1946+
min_replica_count=1,
1947+
max_replica_count=1,
1948+
autoscaling_metric_specs=[
1949+
gca_machine_resources.AutoscalingMetricSpec(
1950+
metric_name=_TEST_METRIC_NAME_REQUEST_COUNT,
1951+
target=600,
1952+
),
1953+
],
1954+
)
1955+
1956+
expected_deployed_model = gca_endpoint.DeployedModel(
1957+
dedicated_resources=expected_dedicated_resources,
1958+
model=test_model.resource_name,
1959+
display_name=None,
1960+
service_account=_TEST_SERVICE_ACCOUNT,
1961+
)
1962+
deploy_model_mock.assert_called_once_with(
1963+
endpoint=test_endpoint.resource_name,
1964+
deployed_model=expected_deployed_model,
1965+
traffic_split={"0": 100},
1966+
metadata=(),
1967+
timeout=None,
1968+
)
1969+
19201970
@pytest.mark.usefixtures(
19211971
"get_endpoint_mock", "get_model_mock", "preview_deploy_model_mock"
19221972
)
19231973
@pytest.mark.parametrize("sync", [True, False])
1924-
def test_deploy_with_autoscaling_target_request_count_per_minute(
1974+
def test_deploy_with_autoscaling_target_request_count_per_minute_preview(
19251975
self, preview_deploy_model_mock, sync
19261976
):
19271977
test_endpoint = preview_models.Endpoint(_TEST_ENDPOINT_NAME)

tests/unit/aiplatform/test_models.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2386,6 +2386,62 @@ def test_deploy_no_endpoint_dedicated_resources_autoscaling_accelerator_duty_cyc
23862386
if not sync:
23872387
test_endpoint.wait()
23882388

2389+
@pytest.mark.usefixtures(
2390+
"get_model_mock",
2391+
"create_endpoint_mock",
2392+
"get_endpoint_mock",
2393+
)
2394+
@pytest.mark.parametrize("sync", [True, False])
2395+
def test_deploy_no_endpoint_dedicated_resources_autoscaling_request_count_per_minute(
2396+
self, deploy_model_mock, sync
2397+
):
2398+
test_model = models.Model(_TEST_ID)
2399+
test_model._gca_resource.supported_deployment_resources_types.append(
2400+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2401+
)
2402+
2403+
test_endpoint = test_model.deploy(
2404+
machine_type=_TEST_MACHINE_TYPE,
2405+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2406+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2407+
sync=sync,
2408+
deploy_request_timeout=None,
2409+
system_labels=_TEST_LABELS,
2410+
autoscaling_target_request_count_per_minute=600,
2411+
)
2412+
2413+
if not sync:
2414+
test_endpoint.wait()
2415+
2416+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2417+
machine_spec=gca_machine_resources.MachineSpec(
2418+
machine_type=_TEST_MACHINE_TYPE,
2419+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2420+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2421+
),
2422+
min_replica_count=1,
2423+
max_replica_count=1,
2424+
autoscaling_metric_specs=[
2425+
gca_machine_resources.AutoscalingMetricSpec(
2426+
metric_name=_TEST_METRIC_NAME_REQUEST_COUNT,
2427+
target=600,
2428+
),
2429+
],
2430+
)
2431+
expected_deployed_model = gca_endpoint.DeployedModel(
2432+
dedicated_resources=expected_dedicated_resources,
2433+
model=test_model.resource_name,
2434+
display_name=None,
2435+
system_labels=_TEST_LABELS,
2436+
)
2437+
deploy_model_mock.assert_called_once_with(
2438+
endpoint=test_endpoint.resource_name,
2439+
deployed_model=expected_deployed_model,
2440+
traffic_split={"0": 100},
2441+
metadata=(),
2442+
timeout=None,
2443+
)
2444+
23892445
@pytest.mark.usefixtures(
23902446
"get_model_mock",
23912447
"create_endpoint_mock",

0 commit comments

Comments
 (0)