Skip to content

Commit 0493567

Browse files
authored
Move serving image overrides from cluster configuration to API configuration (#948)
1 parent 962c8c2 commit 0493567

File tree

30 files changed

+1013
-189
lines changed

30 files changed

+1013
-189
lines changed

cli/cmd/lib_cluster_config.go

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -444,28 +444,6 @@ func clusterConfigConfirmaionStr(clusterConfig clusterconfig.Config, awsCreds AW
444444
if clusterConfig.Telemetry != defaultConfig.Telemetry {
445445
items.Add(clusterconfig.TelemetryUserKey, clusterConfig.Telemetry)
446446
}
447-
448-
if clusterConfig.ImagePythonServe != defaultConfig.ImagePythonServe {
449-
items.Add(clusterconfig.ImagePythonServeUserKey, clusterConfig.ImagePythonServe)
450-
}
451-
if clusterConfig.ImagePythonServeGPU != defaultConfig.ImagePythonServeGPU {
452-
items.Add(clusterconfig.ImagePythonServeGPUUserKey, clusterConfig.ImagePythonServeGPU)
453-
}
454-
if clusterConfig.ImageTFServe != defaultConfig.ImageTFServe {
455-
items.Add(clusterconfig.ImageTFServeUserKey, clusterConfig.ImageTFServe)
456-
}
457-
if clusterConfig.ImageTFServeGPU != defaultConfig.ImageTFServeGPU {
458-
items.Add(clusterconfig.ImageTFServeGPUUserKey, clusterConfig.ImageTFServeGPU)
459-
}
460-
if clusterConfig.ImageTFAPI != defaultConfig.ImageTFAPI {
461-
items.Add(clusterconfig.ImageTFAPIUserKey, clusterConfig.ImageTFAPI)
462-
}
463-
if clusterConfig.ImageONNXServe != defaultConfig.ImageONNXServe {
464-
items.Add(clusterconfig.ImageONNXServeUserKey, clusterConfig.ImageONNXServe)
465-
}
466-
if clusterConfig.ImageONNXServeGPU != defaultConfig.ImageONNXServeGPU {
467-
items.Add(clusterconfig.ImageONNXServeGPUUserKey, clusterConfig.ImageONNXServeGPU)
468-
}
469447
if clusterConfig.ImageOperator != defaultConfig.ImageOperator {
470448
items.Add(clusterconfig.ImageOperatorUserKey, clusterConfig.ImageOperator)
471449
}

cli/cmd/lib_manager.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,13 @@ func runManagerUpdateCommand(entrypoint string, clusterConfig *clusterconfig.Con
240240
"CORTEX_TELEMETRY_SEGMENT_WRITE_KEY=" + os.Getenv("CORTEX_TELEMETRY_SEGMENT_WRITE_KEY"),
241241
"CORTEX_CLUSTER_CONFIG_FILE=" + mountedConfigPath,
242242
"CORTEX_CLUSTER_WORKSPACE=" + clusterWorkspace,
243+
"CORTEX_IMAGE_PYTHON_SERVE=" + consts.DefaultImagePythonServe,
244+
"CORTEX_IMAGE_PYTHON_SERVE_GPU=" + consts.DefaultImagePythonServeGPU,
245+
"CORTEX_IMAGE_TF_SERVE=" + consts.DefaultImageTFServe,
246+
"CORTEX_IMAGE_TF_SERVE_GPU=" + consts.DefaultImageTFServeGPU,
247+
"CORTEX_IMAGE_TF_API=" + consts.DefaultImageTFAPI,
248+
"CORTEX_IMAGE_ONNX_SERVE=" + consts.DefaultImageONNXServe,
249+
"CORTEX_IMAGE_ONNX_SERVE_GPU=" + consts.DefaultImageONNXServeGPU,
243250
},
244251
}
245252

docs/cluster-management/config.md

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,13 @@ log_group: cortex
4848
spot: false
4949
```
5050
51-
The docker images used by Cortex are listed below. They can be overridden to use custom images by specifying them in your cluster configuration file.
51+
The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [API configuration](../deployments/api-configuration.md).
5252
53-
You can follow these [instructions](../deployments/system-packages.md) to build and push custom Docker images to a registry and configure Cortex to use them.
53+
The docker images used by the Cortex cluster can also be overriden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown):
5454
5555
<!-- CORTEX_VERSION_BRANCH_STABLE -->
5656
```yaml
5757
# docker image paths
58-
image_python_serve: cortexlabs/python-serve:master
59-
image_python_serve_gpu: cortexlabs/python-serve-gpu:master
60-
image_tf_serve: cortexlabs/tf-serve:master
61-
image_tf_serve_gpu: cortexlabs/tf-serve-gpu:master
62-
image_tf_api: cortexlabs/tf-api:master
63-
image_onnx_serve: cortexlabs/onnx-serve:master
64-
image_onnx_serve_gpu: cortexlabs/onnx-serve-gpu:master
6558
image_operator: cortexlabs/operator:master
6659
image_manager: cortexlabs/manager:master
6760
image_downloader: cortexlabs/downloader:master

docs/cluster-management/security.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ It is recommended to use an IAM user with the `AdministratorAccess` policy to cr
2020

2121
### Operator
2222

23-
The operator requires read permissions for any S3 bucket containing exported models, read and write permissions for the Cortex S3 bucket, read and write permissions for the Cortex CloudWatch log group, and read and write permissions for CloudWatch metrics. The policy below may be used to restrict the Operator's access:
23+
The operator requires read permissions for any S3 bucket containing exported models, read and write permissions for the Cortex S3 bucket, read and write permissions for the Cortex CloudWatch log group, read and write permissions for CloudWatch metrics, and read permissions for ECR. The policy below may be used to restrict the Operator's access:
2424

2525
```json
2626
{
@@ -42,7 +42,8 @@ The operator requires read permissions for any S3 bucket containing exported mod
4242
{
4343
"Action": [
4444
"cloudwatch:*",
45-
"logs:*"
45+
"logs:*",
46+
"ecr:GetAuthorizationToken"
4647
],
4748
"Effect": "Allow",
4849
"Resource": "*"

docs/cluster-management/telemetry.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ By default, Cortex sends anonymous usage data to Cortex Labs.
66

77
## What data is collected?
88

9-
If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure {id: 1234, command: "deploy"}. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster.
9+
If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure `{id: 1234, command: "deploy"}`. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster.
1010

1111
## Why is this data being collected?
1212

docs/contributing/development.md

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,13 +141,6 @@ region: us-west-2
141141
log_group: cortex
142142
cluster_name: cortex
143143

144-
image_python_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
145-
image_python_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve-gpu:latest
146-
image_tf_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve:latest
147-
image_tf_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve-gpu:latest
148-
image_tf_api: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-api:latest
149-
image_onnx_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest
150-
image_onnx_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest
151144
image_operator: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest
152145
image_manager: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/manager:latest
153146
image_downloader: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/downloader:latest
@@ -208,7 +201,34 @@ make cluster-down
208201

209202
```bash
210203
cd examples/pytorch/iris-classifier
211-
cortex-dev deploy
204+
```
205+
206+
Take note of the following images:
207+
```
208+
# for Python Predictor
209+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
210+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve-gpu:latest
211+
212+
# for Tensorflow Predictor
213+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve:latest
214+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve-gpu:latest
215+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-api:latest
216+
217+
# for ONNX Predictor
218+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest
219+
XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest
220+
```
221+
222+
Edit `cortex.yaml` and override `image`/`tf_serve_image` with the appropriate image(s) for the given predictor type:
223+
```yaml
224+
# cortex.yaml
225+
226+
- name: my-api
227+
...
228+
predictor:
229+
type: python
230+
image: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
231+
...
212232
```
213233

214234
## Off-cluster operator

docs/deployments/api-configuration.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
1616
path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
1717
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
1818
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
19+
image: <string> # docker image to use for the Predictor (default: cortexlabs/python-serve[-gpu])
1920
env: <string: string> # dictionary of environment variables
2021
tracker:
2122
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -44,7 +45,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
4445
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
4546
```
4647
47-
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
48+
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
4849
4950
## TensorFlow Predictor
5051
@@ -58,6 +59,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
5859
signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
5960
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
6061
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
62+
image: <string> # docker image to use for the Predictor (default: cortexlabs/tf-api)
63+
tf_serve_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tf-serve[-gpu], which is based on tensorflow/serving)
6164
env: <string: string> # dictionary of environment variables
6265
tracker:
6366
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -86,7 +89,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
8689
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
8790
```
8891
89-
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
92+
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
9093
9194
## ONNX Predictor
9295
@@ -99,6 +102,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
99102
model: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (required)
100103
config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
101104
python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
105+
image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-serve[-gpu])
102106
env: <string: string> # dictionary of environment variables
103107
tracker:
104108
key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -127,4 +131,4 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
127131
max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
128132
```
129133
130-
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
134+
See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).

docs/deployments/system-packages.md

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -36,24 +36,22 @@ class PythonPredictor:
3636

3737
## Custom Docker image
3838

39+
### Create a Dockerfile
40+
3941
Create a Dockerfile to build your custom image:
4042

4143
```bash
4244
mkdir my-api && cd my-api && touch Dockerfile
4345
```
4446

45-
The Docker images used to deploy your models are listed below. Based on the Cortex Predictor and compute type specified in your API configuration, choose a Cortex image to use as the base for your custom Docker image.
46-
47-
### Base Cortex images for model serving
47+
The default Docker images used to deploy your models are listed below. Based on the Cortex Predictor and compute type specified in your API configuration, choose a Cortex image to use as the base for your custom Docker image:
4848

4949
<!-- CORTEX_VERSION_BRANCH_STABLE x5 -->
50-
* Python (CPU): `cortexlabs/python-serve:master`
51-
* Python (GPU): `cortexlabs/python-serve-gpu:master`
52-
* TensorFlow (CPU or GPU): `cortexlabs/tf-api:master`
53-
* ONNX (CPU): `cortexlabs/onnx-serve:master`
54-
* ONNX (GPU): `cortexlabs/onnx-serve-gpu:master`
55-
56-
Note that the Docker image version must match your cluster version displayed in `cortex version`.
50+
* Python Predictor (CPU): `cortexlabs/python-serve:master`
51+
* Python Predictor (GPU): `cortexlabs/python-serve-gpu:master`
52+
* TensorFlow Predictor (CPU and GPU): `cortexlabs/tf-api:master`
53+
* ONNX Predictor (CPU): `cortexlabs/onnx-serve:master`
54+
* ONNX Predictor (GPU): `cortexlabs/onnx-serve-gpu:master`
5755

5856
The sample Dockerfile below inherits from Cortex's Python CPU serving image and installs the `tree` system package.
5957

@@ -84,7 +82,7 @@ aws ecr create-repository --repository-name=org/my-api --region=us-west-2
8482
# take note of repository url
8583
```
8684

87-
Build the image based on your Dockerfile and push to its repository in ECR:
85+
Build the image based on your Dockerfile and push it to its repository in ECR:
8886

8987
```bash
9088
docker build . -t org/my-api:latest -t <repository_url>:latest
@@ -94,18 +92,22 @@ docker push <repository_url>:latest
9492

9593
### Configure Cortex
9694

97-
Update your cluster configuration file to point to your image:
95+
Update your API configuration file to point to your image:
9896

9997
```yaml
100-
# cluster.yaml
98+
# cortex.yaml
10199

102-
# ...
103-
image_python_serve: <repository_url>:latest
104-
# ...
100+
- name: my-api
101+
...
102+
predictor:
103+
image: <repository_url>:latest
104+
...
105105
```
106106

107-
Update your cluster for the change to take effect:
107+
*Note: for [TensorFlow Predictors](#tensorflow-predictor), two containers run together serve predictions: one which runs your Predictor code (`cortexlabs/tf-api`), and TensorFlow Serving which loads the SavedModel (`cortexlabs/tf-serve[-gpu]`). There's a 2nd available field `tf_serve_image` that can be used to override the TensorFlow Serving image. The default image (`cortexlabs/tf-serve[-gpu]`) is based on the official Tensorflow Serving image (`tensorflow/serving`). Unless a different version of Tensorflow Serving is required, this image shouldn't have to be overridden, since it's only used to load the SavedModel and does not run your Predictor code.*
108+
109+
Deploy your API as usual:
108110

109111
```bash
110-
cortex cluster update --config=cluster.yaml
112+
cortex deploy
111113
```

manager/install.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
set -e
1818

19-
CORTEX_VERSION=master
19+
export CORTEX_VERSION=master
2020
EKSCTL_TIMEOUT=45m
2121

2222
arg1="$1"

pkg/consts/consts.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,30 @@ limitations under the License.
1616

1717
package consts
1818

19+
import (
20+
"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
21+
)
22+
1923
var (
2024
CortexVersion = "master" // CORTEX_VERSION
2125
CortexVersionMinor = "master" // CORTEX_VERSION_MINOR
2226

27+
DefaultImagePythonServe = "cortexlabs/python-serve:" + CortexVersion
28+
DefaultImagePythonServeGPU = "cortexlabs/python-serve-gpu:" + CortexVersion
29+
DefaultImageTFServe = "cortexlabs/tf-serve:" + CortexVersion
30+
DefaultImageTFServeGPU = "cortexlabs/tf-serve-gpu:" + CortexVersion
31+
DefaultImageTFAPI = "cortexlabs/tf-api:" + CortexVersion
32+
DefaultImageONNXServe = "cortexlabs/onnx-serve:" + CortexVersion
33+
DefaultImageONNXServeGPU = "cortexlabs/onnx-serve-gpu:" + CortexVersion
34+
DefaultImagePathsSet = strset.New(
35+
DefaultImagePythonServe,
36+
DefaultImagePythonServeGPU,
37+
DefaultImageTFServe,
38+
DefaultImageTFServeGPU,
39+
DefaultImageTFAPI,
40+
DefaultImageONNXServe,
41+
DefaultImageONNXServeGPU,
42+
)
43+
2344
MaxClassesPerTrackerRequest = 20 // cloudwatch.GeMetricData can get up to 100 metrics per request, avoid multiple requests and have room for other stats
2445
)

0 commit comments

Comments
 (0)