Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
d79abe3
Add nginx boiler plate config
RobertLucian Oct 28, 2020
4283b5e
Merge branch 'master' into fix/inter-process-fairness
RobertLucian Oct 28, 2020
68c63c2
Work on the supervisor process
RobertLucian Oct 29, 2020
ecdbb60
Work on nginx
RobertLucian Oct 29, 2020
80686f6
Add stop scripts for all serving processes
RobertLucian Oct 30, 2020
79f253c
Deny all but to the predict route or empty route
RobertLucian Oct 30, 2020
cbe33cb
Improve performance by adding thread pool + set higher read timeout
RobertLucian Oct 30, 2020
c0c3e06
Fix API readiness
RobertLucian Oct 30, 2020
9da8cc3
Make lint
RobertLucian Oct 30, 2020
b822c37
Add CORS to nginx & rm from uvicorn
RobertLucian Oct 30, 2020
2107437
Terminate nginx gracefully
RobertLucian Oct 31, 2020
f86f775
Increase grace period to 60s for k8s pods
RobertLucian Oct 31, 2020
158fc3d
Make lint
RobertLucian Oct 31, 2020
c8d3dad
Properly shut down nginx when deleting a pod
RobertLucian Oct 31, 2020
3ca88b2
Use jinja2 to generate the nginx config
RobertLucian Nov 2, 2020
c699e2c
Fix jinja generating issues
RobertLucian Nov 2, 2020
ee0ebe1
Adapt all predictor images to use s6-overlay
RobertLucian Nov 2, 2020
8d08afc
Fix linting
RobertLucian Nov 2, 2020
952f829
Fixes for the predictor images
RobertLucian Nov 2, 2020
661806c
Improve comments semantics
RobertLucian Nov 2, 2020
a057188
Stop sidecars when the serving API container has stopped
RobertLucian Nov 3, 2020
5592b53
Pass in just the required value
RobertLucian Nov 3, 2020
b95a2b9
CORTEX_SERVING_PORT not required for waitAPIContainer func
RobertLucian Nov 3, 2020
9b1fcad
Fix bug when batching is done
RobertLucian Nov 3, 2020
30228a4
Fix linting
RobertLucian Nov 3, 2020
877ffdb
Merge branch 'master' into fix/inter-process-fairness
RobertLucian Nov 3, 2020
68bd5b8
Remove unnecessary import
RobertLucian Nov 3, 2020
1b0105c
Remove unwanted line
RobertLucian Nov 3, 2020
3001b87
Increase memory availability to prevent OOM
RobertLucian Nov 3, 2020
3aa9074
Address review requests
RobertLucian Nov 6, 2020
7610385
Simplify location regex in nginx conf
RobertLucian Nov 6, 2020
66824b4
Fix bad service naming for uvicorn + add env sourcer
RobertLucian Nov 6, 2020
29f5475
Fix dependencies.sh script not being executed correctly
RobertLucian Nov 6, 2020
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions cli/local/docker_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package local
import (
"context"
"fmt"
"math"
"path/filepath"
"strings"

Expand Down Expand Up @@ -92,9 +91,8 @@ func getAPIEnv(api *spec.API, awsClient *aws.Client) []string {
"CORTEX_PROJECT_DIR="+_projectDir,
"CORTEX_PROCESSES_PER_REPLICA="+s.Int32(api.Predictor.ProcessesPerReplica),
"CORTEX_THREADS_PER_PROCESS="+s.Int32(api.Predictor.ThreadsPerProcess),
"CORTEX_MAX_REPLICA_CONCURRENCY="+s.Int32(api.Predictor.ProcessesPerReplica*api.Predictor.ThreadsPerProcess),
// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
"CORTEX_MAX_PROCESS_CONCURRENCY="+s.Int64(1+int64(math.Round(float64(consts.DefaultMaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
"CORTEX_SO_MAX_CONN="+s.Int64(consts.DefaultMaxReplicaConcurrency+100), // add a buffer to be safe
"AWS_REGION="+awsClient.Region,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/sklearn/iris-classifier/cortex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
model_type: classification
compute:
cpu: 0.2
mem: 100M
mem: 200M
1 change: 1 addition & 0 deletions examples/sklearn/iris-classifier/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
boto3
scikit-learn==0.21.3
3 changes: 2 additions & 1 deletion images/neuron-rtd/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ RUN yum install -y \
aws-neuron-runtime-1.0.9592.0 \
procps-ng-3.3.10-26.amzn2.x86_64 \
gzip \
tar
tar \
curl

ENV PATH="/opt/aws/neuron/bin:${PATH}"

Expand Down
8 changes: 7 additions & 1 deletion images/onnx-predictor-cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM ubuntu:18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
pkg-config \
Expand All @@ -9,6 +12,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -68,4 +72,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
8 changes: 7 additions & 1 deletion images/onnx-predictor-gpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
pkg-config \
Expand All @@ -9,6 +12,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -68,4 +72,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
8 changes: 7 additions & 1 deletion images/python-predictor-cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM ubuntu:18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
pkg-config \
Expand All @@ -9,6 +12,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -88,4 +92,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
8 changes: 7 additions & 1 deletion images/python-predictor-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ ARG CUDA_VERSION=10.1
ARG CUDNN=7
FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN-devel-ubuntu18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
pkg-config \
Expand All @@ -11,6 +14,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -93,4 +97,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
8 changes: 7 additions & 1 deletion images/python-predictor-inf/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM ubuntu:18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
wget \
gnupg && \
Expand All @@ -21,6 +24,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -100,4 +104,6 @@ COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
10 changes: 8 additions & 2 deletions images/tensorflow-predictor/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
FROM ubuntu:18.04

ADD https://github.com/just-containers/s6-overlay/releases/download/v2.1.0.2/s6-overlay-amd64-installer /tmp/
RUN chmod +x /tmp/s6-overlay-amd64-installer && /tmp/s6-overlay-amd64-installer /

RUN apt-get update -qq && apt-get install -y -q \
build-essential \
pkg-config \
Expand All @@ -9,6 +12,7 @@ RUN apt-get update -qq && apt-get install -y -q \
unzip \
zlib1g-dev \
locales \
nginx \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8
Expand Down Expand Up @@ -68,8 +72,10 @@ RUN test "${SLIM}" = "true" || ( \
tensorflow-hub==0.9.0 \
)

COPY pkg/workloads/cortex/consts.py /src/cortex/
COPY pkg/workloads/cortex/consts.py /src/cortex
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/serve /src/cortex/serve

ENTRYPOINT ["/src/cortex/serve/run.sh"]
RUN mv /src/cortex/serve/init/bootloader.sh /etc/cont-init.d/bootloader.sh

ENTRYPOINT ["/init"]
4 changes: 4 additions & 0 deletions images/tensorflow-serving-cpu/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
FROM tensorflow/serving:2.3.0

RUN apt-get update -qq && apt-get install -y -q \
curl \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

COPY images/tensorflow-serving-cpu/run.sh /src/
RUN chmod +x /src/run.sh

Expand Down
1 change: 1 addition & 0 deletions images/tensorflow-serving-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ FROM tensorflow/serving:2.3.0-gpu
RUN apt-get update -qq && apt-get install -y --no-install-recommends -q \
libnvinfer6=6.0.1-1+cuda10.1 \
libnvinfer-plugin6=6.0.1-1+cuda10.1 \
curl \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/*

COPY images/tensorflow-serving-gpu/run.sh /src/
Expand Down
1 change: 1 addition & 0 deletions images/tensorflow-serving-inf/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ FROM ubuntu:18.04
RUN apt-get update -qq && apt-get install -y -q \
gettext-base \
supervisor \
curl \
wget \
netcat \
gnupg && \
Expand Down
43 changes: 33 additions & 10 deletions pkg/operator/operator/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"math"
"path"
"strings"

Expand Down Expand Up @@ -179,6 +178,7 @@ func PythonPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Volume
VolumeMounts: apiPodVolumeMounts,
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
LivenessProbe: _apiLivenessProbe,
Lifecycle: nginxGracefulStopper(api.Kind),
Resources: kcore.ResourceRequirements{
Requests: apiPodResourceList,
Limits: apiPodResourceLimitsList,
Expand Down Expand Up @@ -267,6 +267,7 @@ func TensorFlowPredictorContainers(api *spec.API) ([]kcore.Container, []kcore.Vo
VolumeMounts: volumeMounts,
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
LivenessProbe: _apiLivenessProbe,
Lifecycle: nginxGracefulStopper(api.Kind),
Resources: kcore.ResourceRequirements{
Requests: apiResourceList,
},
Expand Down Expand Up @@ -320,6 +321,7 @@ func ONNXPredictorContainers(api *spec.API) []kcore.Container {
VolumeMounts: DefaultVolumeMounts,
ReadinessProbe: FileExistsProbe(_apiReadinessFile),
LivenessProbe: _apiLivenessProbe,
Lifecycle: nginxGracefulStopper(api.Kind),
Resources: kcore.ResourceRequirements{
Requests: resourceList,
Limits: resourceLimitsList,
Expand Down Expand Up @@ -409,15 +411,6 @@ func getEnvVars(api *spec.API, container string) []kcore.EnvVar {
Name: "CORTEX_MAX_REPLICA_CONCURRENCY",
Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency),
},
kcore.EnvVar{
Name: "CORTEX_MAX_PROCESS_CONCURRENCY",
// add 1 because it was required to achieve the target concurrency for 1 process, 1 thread
Value: s.Int64(1 + int64(math.Round(float64(api.Autoscaling.MaxReplicaConcurrency)/float64(api.Predictor.ProcessesPerReplica)))),
},
kcore.EnvVar{
Name: "CORTEX_SO_MAX_CONN",
Value: s.Int64(api.Autoscaling.MaxReplicaConcurrency + 100), // add a buffer to be safe
},
)
}

Expand Down Expand Up @@ -699,6 +692,7 @@ func tensorflowServingContainer(api *spec.API, volumeMounts []kcore.VolumeMount,
FailureThreshold: 2,
Handler: probeHandler,
},
Lifecycle: waitAPIContainerToStop(api.Kind),
Resources: resources,
Ports: ports,
}
Expand All @@ -720,6 +714,7 @@ func neuronRuntimeDaemonContainer(api *spec.API, volumeMounts []kcore.VolumeMoun
},
VolumeMounts: volumeMounts,
ReadinessProbe: socketExistsProbe(_neuronRTDSocket),
Lifecycle: waitAPIContainerToStop(api.Kind),
Resources: kcore.ResourceRequirements{
Requests: kcore.ResourceList{
"hugepages-2Mi": *kresource.NewQuantity(totalHugePages, kresource.BinarySI),
Expand Down Expand Up @@ -794,6 +789,34 @@ func socketExistsProbe(socketName string) *kcore.Probe {
}
}

func nginxGracefulStopper(apiKind userconfig.Kind) *kcore.Lifecycle {
if apiKind == userconfig.RealtimeAPIKind {
return &kcore.Lifecycle{
PreStop: &kcore.Handler{
Exec: &kcore.ExecAction{
// the sleep is required to wait for any k8s-related race conditions
// as described in https://medium.com/codecademy-engineering/kubernetes-nginx-and-zero-downtime-in-production-2c910c6a5ed8
Command: []string{"/bin/sh", "-c", "sleep 5; /usr/sbin/nginx -s quit; while pgrep -x nginx; do sleep 1; done"},
},
},
}
}
return nil
}

func waitAPIContainerToStop(apiKind userconfig.Kind) *kcore.Lifecycle {
if apiKind == userconfig.RealtimeAPIKind {
return &kcore.Lifecycle{
PreStop: &kcore.Handler{
Exec: &kcore.ExecAction{
Command: []string{"/bin/sh", "-c", fmt.Sprintf("while curl localhost:%s/nginx_status; do sleep 1; done", DefaultPortStr)},
},
},
}
}
return nil
}

var BaseEnvVars = []kcore.EnvFromSource{
{
ConfigMapRef: &kcore.ConfigMapEnvSource{
Expand Down
11 changes: 8 additions & 3 deletions pkg/operator/resources/realtimeapi/k8s_specs.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (
kcore "k8s.io/api/core/v1"
)

var _terminationGracePeriodSeconds int64 = 60 // seconds

func deploymentSpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deployment {
switch api.Predictor.Type {
case userconfig.TensorFlowPredictorType:
Expand Down Expand Up @@ -74,7 +76,8 @@ func tensorflowAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.D
"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
},
K8sPodSpec: kcore.PodSpec{
RestartPolicy: "Always",
RestartPolicy: "Always",
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
InitContainers: []kcore.Container{
operator.InitContainer(api),
},
Expand Down Expand Up @@ -123,7 +126,8 @@ func pythonAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deplo
"traffic.sidecar.istio.io/excludeOutboundIPRanges": "0.0.0.0/0",
},
K8sPodSpec: kcore.PodSpec{
RestartPolicy: "Always",
RestartPolicy: "Always",
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
InitContainers: []kcore.Container{
operator.InitContainer(api),
},
Expand Down Expand Up @@ -175,7 +179,8 @@ func onnxAPISpec(api *spec.API, prevDeployment *kapps.Deployment) *kapps.Deploym
InitContainers: []kcore.Container{
operator.InitContainer(api),
},
Containers: containers,
TerminationGracePeriodSeconds: pointer.Int64(_terminationGracePeriodSeconds),
Containers: containers,
NodeSelector: map[string]string{
"workload": "true",
},
Expand Down
2 changes: 1 addition & 1 deletion pkg/types/spec/validations.go
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ func autoscalingValidation(provider types.ProviderType) *cr.StructFieldValidatio
Int64Validation: &cr.Int64Validation{
Default: consts.DefaultMaxReplicaConcurrency,
GreaterThan: pointer.Int64(0),
LessThanOrEqualTo: pointer.Int64(math.MaxUint16),
LessThanOrEqualTo: pointer.Int64(30000),
},
},
{
Expand Down
15 changes: 15 additions & 0 deletions pkg/workloads/cortex/lib/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import inspect
from inspect import Parameter
from copy import deepcopy
from typing import Any


def has_method(object, method: str):
Expand Down Expand Up @@ -229,3 +230,17 @@ def is_float_or_int_list(var):
if not is_float_or_int(item):
return False
return True


def render_jinja_template(jinja_template_file: str, context: dict) -> str:
from jinja2 import Environment, FileSystemLoader

template_path = pathlib.Path(jinja_template_file)

env = Environment(loader=FileSystemLoader(str(template_path.parent)))
env.trim_blocks = True
env.lstrip_blocks = True
env.rstrip_blocks = True

template = env.get_template(str(template_path.name))
return template.render(**context)
Loading