cortexlabs
diff --git a/‎cli/cmd/lib_cluster_config.go‎
Lines changed: 0 additions & 22 deletions b/‎cli/cmd/lib_cluster_config.go‎
Lines changed: 0 additions & 22 deletions
diff --git a/‎cli/cmd/lib_manager.go‎
Lines changed: 7 additions & 0 deletions b/‎cli/cmd/lib_manager.go‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎docs/cluster-management/config.md‎
Lines changed: 2 additions & 9 deletions b/‎docs/cluster-management/config.md‎
Lines changed: 2 additions & 9 deletions
diff --git a/‎docs/cluster-management/security.md‎
Lines changed: 3 additions & 2 deletions b/‎docs/cluster-management/security.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎docs/cluster-management/telemetry.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/cluster-management/telemetry.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/contributing/development.md‎
Lines changed: 28 additions & 8 deletions b/‎docs/contributing/development.md‎
Lines changed: 28 additions & 8 deletions
diff --git a/‎docs/deployments/api-configuration.md‎
Lines changed: 7 additions & 3 deletions b/‎docs/deployments/api-configuration.md‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎docs/deployments/system-packages.md‎
Lines changed: 20 additions & 18 deletions b/‎docs/deployments/system-packages.md‎
Lines changed: 20 additions & 18 deletions
diff --git a/‎manager/install.sh‎
Lines changed: 1 addition & 1 deletion b/‎manager/install.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pkg/consts/consts.go‎
Lines changed: 21 additions & 0 deletions b/‎pkg/consts/consts.go‎
Lines changed: 21 additions & 0 deletions
@@ -444,28 +444,6 @@ func clusterConfigConfirmaionStr(clusterConfig clusterconfig.Config, awsCreds AW
 if clusterConfig.Telemetry != defaultConfig.Telemetry {
 items.Add(clusterconfig.TelemetryUserKey, clusterConfig.Telemetry)
 }
-
-if clusterConfig.ImagePythonServe != defaultConfig.ImagePythonServe {
-items.Add(clusterconfig.ImagePythonServeUserKey, clusterConfig.ImagePythonServe)
-}
-if clusterConfig.ImagePythonServeGPU != defaultConfig.ImagePythonServeGPU {
-items.Add(clusterconfig.ImagePythonServeGPUUserKey, clusterConfig.ImagePythonServeGPU)
-}
-if clusterConfig.ImageTFServe != defaultConfig.ImageTFServe {
-items.Add(clusterconfig.ImageTFServeUserKey, clusterConfig.ImageTFServe)
-}
-if clusterConfig.ImageTFServeGPU != defaultConfig.ImageTFServeGPU {
-items.Add(clusterconfig.ImageTFServeGPUUserKey, clusterConfig.ImageTFServeGPU)
-}
-if clusterConfig.ImageTFAPI != defaultConfig.ImageTFAPI {
-items.Add(clusterconfig.ImageTFAPIUserKey, clusterConfig.ImageTFAPI)
-}
-if clusterConfig.ImageONNXServe != defaultConfig.ImageONNXServe {
-items.Add(clusterconfig.ImageONNXServeUserKey, clusterConfig.ImageONNXServe)
-}
-if clusterConfig.ImageONNXServeGPU != defaultConfig.ImageONNXServeGPU {
-items.Add(clusterconfig.ImageONNXServeGPUUserKey, clusterConfig.ImageONNXServeGPU)
-}
 if clusterConfig.ImageOperator != defaultConfig.ImageOperator {
 items.Add(clusterconfig.ImageOperatorUserKey, clusterConfig.ImageOperator)
 }
 
@@ -240,6 +240,13 @@ func runManagerUpdateCommand(entrypoint string, clusterConfig *clusterconfig.Con
 "CORTEX_TELEMETRY_SEGMENT_WRITE_KEY=" + os.Getenv("CORTEX_TELEMETRY_SEGMENT_WRITE_KEY"),
 "CORTEX_CLUSTER_CONFIG_FILE=" + mountedConfigPath,
 "CORTEX_CLUSTER_WORKSPACE=" + clusterWorkspace,
+"CORTEX_IMAGE_PYTHON_SERVE=" + consts.DefaultImagePythonServe,
+"CORTEX_IMAGE_PYTHON_SERVE_GPU=" + consts.DefaultImagePythonServeGPU,
+"CORTEX_IMAGE_TF_SERVE=" + consts.DefaultImageTFServe,
+"CORTEX_IMAGE_TF_SERVE_GPU=" + consts.DefaultImageTFServeGPU,
+"CORTEX_IMAGE_TF_API=" + consts.DefaultImageTFAPI,
+"CORTEX_IMAGE_ONNX_SERVE=" + consts.DefaultImageONNXServe,
+"CORTEX_IMAGE_ONNX_SERVE_GPU=" + consts.DefaultImageONNXServeGPU,
 },
 }
 
 
@@ -48,20 +48,13 @@ log_group: cortex
 spot: false
 ```
 
-The docker images used by Cortex are listed below. They can be overridden to use custom images by specifying them in your cluster configuration file.
+The default docker images used for your Predictors are listed in the instructions for [system packages](../deployments/system-packages.md), and can be overridden in your [API configuration](../deployments/api-configuration.md).
 
-You can follow these [instructions](../deployments/system-packages.md) to build and push custom Docker images to a registry and configure Cortex to use them.
+The docker images used by the Cortex cluster can also be overriden, although this is not common. They can be configured by adding any of these keys to your cluster configuration file (default values are shown):
 
 <!-- CORTEX_VERSION_BRANCH_STABLE -->
 ```yaml
 # docker image paths
-image_python_serve: cortexlabs/python-serve:master
-image_python_serve_gpu: cortexlabs/python-serve-gpu:master
-image_tf_serve: cortexlabs/tf-serve:master
-image_tf_serve_gpu: cortexlabs/tf-serve-gpu:master
-image_tf_api: cortexlabs/tf-api:master
-image_onnx_serve: cortexlabs/onnx-serve:master
-image_onnx_serve_gpu: cortexlabs/onnx-serve-gpu:master
 image_operator: cortexlabs/operator:master
 image_manager: cortexlabs/manager:master
 image_downloader: cortexlabs/downloader:master
 
@@ -20,7 +20,7 @@ It is recommended to use an IAM user with the `AdministratorAccess` policy to cr
 
 ### Operator
 
-The operator requires read permissions for any S3 bucket containing exported models, read and write permissions for the Cortex S3 bucket, read and write permissions for the Cortex CloudWatch log group, and read and write permissions for CloudWatch metrics. The policy below may be used to restrict the Operator's access:
+The operator requires read permissions for any S3 bucket containing exported models, read and write permissions for the Cortex S3 bucket, read and write permissions for the Cortex CloudWatch log group, read and write permissions for CloudWatch metrics, and read permissions for ECR. The policy below may be used to restrict the Operator's access:
 
 ```json
 {
@@ -42,7 +42,8 @@ The operator requires read permissions for any S3 bucket containing exported mod
  {
  "Action": [
  "cloudwatch:*",
- "logs:*"
+ "logs:*",
+ "ecr:GetAuthorizationToken"
  ],
  "Effect": "Allow",
  "Resource": "*"
 
@@ -6,7 +6,7 @@ By default, Cortex sends anonymous usage data to Cortex Labs.
 
 ## What data is collected?
 
-If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure {id: 1234, command: "deploy"}. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster.
+If telemetry is enabled, events and errors are collected. Each time you run a command an event will be sent with a randomly generated unique CLI ID and the name of the command. For example, if you run `cortex deploy`, Cortex Labs will receive an event of the structure `{id: 1234, command: "deploy"}`. In addition, the operator sends heartbeats that include cluster metrics like the types of instances running in your cluster.
 
 ## Why is this data being collected?
 
 
@@ -141,13 +141,6 @@ region: us-west-2
 log_group: cortex
 cluster_name: cortex
 
-image_python_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
-image_python_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve-gpu:latest
-image_tf_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve:latest
-image_tf_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve-gpu:latest
-image_tf_api: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-api:latest
-image_onnx_serve: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest
-image_onnx_serve_gpu: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest
 image_operator: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest
 image_manager: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/manager:latest
 image_downloader: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/downloader:latest
@@ -208,7 +201,34 @@ make cluster-down
 
 ```bash
 cd examples/pytorch/iris-classifier
-cortex-dev deploy
+```
+
+Take note of the following images:
+```
+# for Python Predictor
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve-gpu:latest
+
+# for Tensorflow Predictor
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve:latest
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-serve-gpu:latest
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/tf-api:latest
+
+# for ONNX Predictor
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest
+XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve-gpu:latest
+```
+
+Edit `cortex.yaml` and override `image`/`tf_serve_image` with the appropriate image(s) for the given predictor type:
+```yaml
+# cortex.yaml
+
+- name: my-api
+ ...
+ predictor:
+ type: python
+ image: XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/python-serve:latest
+ ...
 ```
 
 ## Off-cluster operator
 
@@ -16,6 +16,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
  path: <string> # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required)
  config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
  python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
+ image: <string> # docker image to use for the Predictor (default: cortexlabs/python-serve[-gpu])
  env: <string: string> # dictionary of environment variables
  tracker:
  key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -44,7 +45,7 @@ Reference the section below which corresponds to your Predictor type: [Python](#
  max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## TensorFlow Predictor
 
@@ -58,6 +59,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
  signature_key: <string> # name of the signature def to use for prediction (required if your model has more than one signature def)
  config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
  python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
+ image: <string> # docker image to use for the Predictor (default: cortexlabs/tf-api)
+ tf_serve_image: <string> # docker image to use for the TensorFlow Serving container (default: cortexlabs/tf-serve[-gpu], which is based on tensorflow/serving)
  env: <string: string> # dictionary of environment variables
  tracker:
  key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -86,7 +89,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
  max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
 
 ## ONNX Predictor
 
@@ -99,6 +102,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
  model: <string> # S3 path to an exported model (e.g. s3://my-bucket/exported_model.onnx) (required)
  config: <string: value> # arbitrary dictionary passed to the constructor of the Predictor (optional)
  python_path: <string> # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml)
+ image: <string> # docker image to use for the Predictor (default: cortexlabs/onnx-serve[-gpu])
  env: <string: string> # dictionary of environment variables
  tracker:
  key: <string> # the JSON key in the response to track (required if the response payload is a JSON object)
@@ -127,4 +131,4 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput
  max_unavailable: <string | int> # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%)
 ```
 
-See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), and [prediction monitoring](prediction-monitoring.md).
+See additional documentation for [autoscaling](autoscaling.md), [compute](compute.md), [prediction monitoring](prediction-monitoring.md), and [overriding API images](system-packages.md).
@@ -36,24 +36,22 @@ class PythonPredictor:
 
 ## Custom Docker image
 
+### Create a Dockerfile
+
 Create a Dockerfile to build your custom image:
 
 ```bash
 mkdir my-api && cd my-api && touch Dockerfile
 ```
 
-The Docker images used to deploy your models are listed below. Based on the Cortex Predictor and compute type specified in your API configuration, choose a Cortex image to use as the base for your custom Docker image.
-
-### Base Cortex images for model serving
+The default Docker images used to deploy your models are listed below. Based on the Cortex Predictor and compute type specified in your API configuration, choose a Cortex image to use as the base for your custom Docker image:
 
 <!-- CORTEX_VERSION_BRANCH_STABLE x5 -->
-* Python (CPU): `cortexlabs/python-serve:master`
-* Python (GPU): `cortexlabs/python-serve-gpu:master`
-* TensorFlow (CPU or GPU): `cortexlabs/tf-api:master`
-* ONNX (CPU): `cortexlabs/onnx-serve:master`
-* ONNX (GPU): `cortexlabs/onnx-serve-gpu:master`
-
-Note that the Docker image version must match your cluster version displayed in `cortex version`.
+* Python Predictor (CPU): `cortexlabs/python-serve:master`
+* Python Predictor (GPU): `cortexlabs/python-serve-gpu:master`
+* TensorFlow Predictor (CPU and GPU): `cortexlabs/tf-api:master`
+* ONNX Predictor (CPU): `cortexlabs/onnx-serve:master`
+* ONNX Predictor (GPU): `cortexlabs/onnx-serve-gpu:master`
 
 The sample Dockerfile below inherits from Cortex's Python CPU serving image and installs the `tree` system package.
 
@@ -84,7 +82,7 @@ aws ecr create-repository --repository-name=org/my-api --region=us-west-2
 # take note of repository url
 ```
 
-Build the image based on your Dockerfile and push to its repository in ECR:
+Build the image based on your Dockerfile and push it to its repository in ECR:
 
 ```bash
 docker build . -t org/my-api:latest -t <repository_url>:latest
@@ -94,18 +92,22 @@ docker push <repository_url>:latest
 
 ### Configure Cortex
 
-Update your cluster configuration file to point to your image:
+Update your API configuration file to point to your image:
 
 ```yaml
-# cluster.yaml
+# cortex.yaml
 
-# ...
-image_python_serve: <repository_url>:latest
-# ...
+- name: my-api
+ ...
+ predictor:
+ image: <repository_url>:latest
+ ...
 ```
 
-Update your cluster for the change to take effect:
+*Note: for [TensorFlow Predictors](#tensorflow-predictor), two containers run together serve predictions: one which runs your Predictor code (`cortexlabs/tf-api`), and TensorFlow Serving which loads the SavedModel (`cortexlabs/tf-serve[-gpu]`). There's a 2nd available field `tf_serve_image` that can be used to override the TensorFlow Serving image. The default image (`cortexlabs/tf-serve[-gpu]`) is based on the official Tensorflow Serving image (`tensorflow/serving`). Unless a different version of Tensorflow Serving is required, this image shouldn't have to be overridden, since it's only used to load the SavedModel and does not run your Predictor code.*
+
+Deploy your API as usual:
 
 ```bash
-cortex cluster update --config=cluster.yaml
+cortex deploy
 ```
@@ -16,7 +16,7 @@
 
 set -e
 
-CORTEX_VERSION=master
+export CORTEX_VERSION=master
 EKSCTL_TIMEOUT=45m
 
 arg1="$1"
 
@@ -16,9 +16,30 @@ limitations under the License.
 
 package consts
 
+import (
+"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
+)
+
 var (
 CortexVersion = "master" // CORTEX_VERSION
 CortexVersionMinor = "master" // CORTEX_VERSION_MINOR
 
+DefaultImagePythonServe = "cortexlabs/python-serve:" + CortexVersion
+DefaultImagePythonServeGPU = "cortexlabs/python-serve-gpu:" + CortexVersion
+DefaultImageTFServe = "cortexlabs/tf-serve:" + CortexVersion
+DefaultImageTFServeGPU = "cortexlabs/tf-serve-gpu:" + CortexVersion
+DefaultImageTFAPI = "cortexlabs/tf-api:" + CortexVersion
+DefaultImageONNXServe = "cortexlabs/onnx-serve:" + CortexVersion
+DefaultImageONNXServeGPU = "cortexlabs/onnx-serve-gpu:" + CortexVersion
+DefaultImagePathsSet = strset.New(
+DefaultImagePythonServe,
+DefaultImagePythonServeGPU,
+DefaultImageTFServe,
+DefaultImageTFServeGPU,
+DefaultImageTFAPI,
+DefaultImageONNXServe,
+DefaultImageONNXServeGPU,
+)
+
 MaxClassesPerTrackerRequest = 20 // cloudwatch.GeMetricData can get up to 100 metrics per request, avoid multiple requests and have room for other stats
 )