You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: docs/deployments/api-configuration.md
+15-12Lines changed: 15 additions & 12 deletions
Display the source diff
Display the rich diff
Original file line number
Diff line number
Diff line change
@@ -10,22 +10,23 @@ Reference the section below which corresponds to your Predictor type: [Python](#
10
10
11
11
```yaml
12
12
- name: <string> # API name (required)
13
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
13
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
14
+
local_port: <int> # specify the port for API (local only) (default: 8888)
14
15
predictor:
15
16
type: python
16
17
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
17
18
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
18
19
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
19
20
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute)
20
21
env: <string: string> # dictionary of environment variables
21
-
tracker:
22
+
tracker:# (aws only)
22
23
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
23
24
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
24
25
compute:
25
26
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
26
27
gpu: <int> # GPU request per replica (default: 0)
27
28
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
28
-
autoscaling:
29
+
autoscaling:# (aws only)
29
30
min_replicas: <int> # minimum number of replicas (default: 1)
30
31
max_replicas: <int> # maximum number of replicas (default: 100)
31
32
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -40,7 +41,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
40
41
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
41
42
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
42
43
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
43
-
update_strategy:
44
+
update_strategy:# (aws only)
44
45
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
45
46
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
46
47
```
@@ -51,7 +52,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
51
52
52
53
```yaml
53
54
- name: <string> # API name (required)
54
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
55
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
56
+
local_port: <int> # specify the port for API (local only) (default: 8888)
55
57
predictor:
56
58
type: tensorflow
57
59
path: <string> # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required)
@@ -62,14 +64,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
62
64
image: <string> # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor)
63
65
tensorflow_serving_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute)
64
66
env: <string: string> # dictionary of environment variables
65
-
tracker:
67
+
tracker:# (aws only)
66
68
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
67
69
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
68
70
compute:
69
71
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
70
72
gpu: <int> # GPU request per replica (default: 0)
71
73
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
72
-
autoscaling:
74
+
autoscaling:# (aws only)
73
75
min_replicas: <int> # minimum number of replicas (default: 1)
74
76
max_replicas: <int> # maximum number of replicas (default: 100)
75
77
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -84,7 +86,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
84
86
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
85
87
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
86
88
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
87
-
update_strategy:
89
+
update_strategy:# (aws only)
88
90
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
89
91
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
90
92
```
@@ -95,7 +97,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
95
97
96
98
```yaml
97
99
- name: <string> # API name (required)
98
-
endpoint: <string> # the endpoint for the API (default: <api_name>)
100
+
endpoint: <string> # the endpoint for the API (aws only) (default: <api_name>)
101
+
local_port: <int> # specify the port for API (local only) (default: 8888)
99
102
predictor:
100
103
type: onnx
101
104
path: <string> # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required)
@@ -104,14 +107,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
104
107
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
105
108
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute)
106
109
env: <string: string> # dictionary of environment variables
107
-
tracker:
110
+
tracker:# (aws only)
108
111
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
109
112
model_type: <string> # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required)
110
113
compute:
111
114
cpu: <string | int | float> # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m)
112
115
gpu: <int> # GPU request per replica (default: 0)
113
116
mem: <string> # memory request per replica, e.g. 200Mi or 1Gi (default: Null)
114
-
autoscaling:
117
+
autoscaling:# (aws only)
115
118
min_replicas: <int> # minimum number of replicas (default: 1)
116
119
max_replicas: <int> # maximum number of replicas (default: 100)
117
120
init_replicas: <int> # initial number of replicas (default: <min_replicas>)
@@ -126,7 +129,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
126
129
max_upscale_factor: <float> # the maximum factor by which to scale up the API on a single scaling event (default: 1.5)
127
130
downscale_tolerance: <float> # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05)
128
131
upscale_tolerance: <float> # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05)
129
-
update_strategy:
132
+
update_strategy:# (aws only)
130
133
max_surge: <string | int> # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates)
131
134
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
0 commit comments