Skip to content

Commit 2806379

Browse files
authored
Label config as local or aws api-configuration.md (#1017)
1 parent 1765d9f commit 2806379

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

docs/deployments/api-configuration.md

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,23 @@ Reference the section below which corresponds to your Predictor type: [Python](#
1010

1111
```yaml
1212
- name: <string> # API name (required)
13-
endpoint: <string> # the endpoint for the API (default: <api_name>)
13+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
14+
local_port: <int> # specify the port for API (local only) (default: 8888)
1415
predictor:
1516
type: python
1617
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
1718
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
1819
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
1920
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute)
2021
env: <string: string> # dictionary of environment variables
21-
tracker:
22+
tracker: # (aws only)
2223
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
2324
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
2425
compute:
2526
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
2627
gpu: <int> # GPU request per replica (default: 0)
2728
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
28-
autoscaling:
29+
autoscaling: # (aws only)
2930
min_replicas: <int> # minimum number of replicas (default: 1)
3031
max_replicas: <int> # maximum number of replicas (default: 100)
3132
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -40,7 +41,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
4041
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
4142
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
4243
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
43-
update_strategy:
44+
update_strategy: # (aws only)
4445
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
4546
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
4647
```
@@ -51,7 +52,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
5152
5253
```yaml
5354
- name: <string> # API name (required)
54-
endpoint: <string> # the endpoint for the API (default: <api_name>)
55+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
56+
local_port: <int> # specify the port for API (local only) (default: 8888)
5557
predictor:
5658
type: tensorflow
5759
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
@@ -62,14 +64,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
6264
image: <string> # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor)
6365
tensorflow_serving_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute)
6466
env: <string: string> # dictionary of environment variables
65-
tracker:
67+
tracker: # (aws only)
6668
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
6769
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
6870
compute:
6971
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
7072
gpu: <int> # GPU request per replica (default: 0)
7173
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
72-
autoscaling:
74+
autoscaling: # (aws only)
7375
min_replicas: <int> # minimum number of replicas (default: 1)
7476
max_replicas: <int> # maximum number of replicas (default: 100)
7577
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -84,7 +86,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
8486
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
8587
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
8688
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
87-
update_strategy:
89+
update_strategy: # (aws only)
8890
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
8991
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
9092
```
@@ -95,7 +97,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
9597
9698
```yaml
9799
- name: <string> # API name (required)
98-
endpoint: <string> # the endpoint for the API (default: <api_name>)
100+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
101+
local_port: <int> # specify the port for API (local only) (default: 8888)
99102
predictor:
100103
type: onnx
101104
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
@@ -104,14 +107,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
104107
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
105108
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute)
106109
env: <string: string> # dictionary of environment variables
107-
tracker:
110+
tracker: # (aws only)
108111
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
109112
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
110113
compute:
111114
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
112115
gpu: <int> # GPU request per replica (default: 0)
113116
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
114-
autoscaling:
117+
autoscaling: # (aws only)
115118
min_replicas: <int> # minimum number of replicas (default: 1)
116119
max_replicas: <int> # maximum number of replicas (default: 100)
117120
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -126,7 +129,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
126129
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
127130
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
128131
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
129-
update_strategy:
132+
update_strategy: # (aws only)
130133
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
131134
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
132135
```

0 commit comments

Comments
 (0)