NodeGroup spot instances

cortexlabs · vishalbollu · Nov 14, 2019 · Oct 7, 2019 · Oct 7, 2019 · Oct 7, 2019
commit c74baf29006bc510c0b41a23549a33b03e5c593d
diff --git a/dev-cluster.yaml b/dev-cluster.yaml
@@ -0,0 +1,83 @@
+apiVersion: eksctl.io/v1alpha5
+kind: ClusterConfig
+
+metadata:
+ name: cortex
+ region: us-west-2
+ version: "1.14"
+
+# nodeGroups:
+# # spot workers NG - multi AZ, scale from 3
+# - name: ng-4
+# ami: auto
+# instanceType: mixed
+# minSize: 1
+# maxSize: 5
+# volumeSize: 100
+# volumeType: gp2
+# volumeEncrypted: true
+# iam:
+# withAddonPolicies:a
+# autoScaler: true
+# instancesDistribution:
+# instanceTypes: [t3.medium, t3.large]
+# onDemandPercentageAboveBaseCapacity: 0
+# spotInstancePools: 2
+# taints:
+# workload: "true:NoSchedule"
+# tags:
+# k8s.io/cluster-autoscaler/enabled: 'true'
+# k8s.io/cluster-autoscaler/node-template/taint/dedicated: workload=true
+# k8s.io/cluster-autoscaler/node-template/label/workload: 'true'
+# k8s.io/cluster-autoscaler/node-template/label/lifecycle: 'Ec2Spot'
+# labels:
+# lifecycle: Ec2Spot
+# kubeletExtraConfig:
+# kubeReserved:
+# cpu: 150m
+# memory: 300Mi
+# ephemeral-storage: 1Gi
+# kubeReservedCgroup: /kube-reserved
+# systemReserved:
+# cpu: 150m
+# memory: 300Mi
+# ephemeral-storage: 1Gi
+# evictionHard:
+# memory.available: 200Mi
+# nodefs.available: 5%
+
+
+# nodeGroups:
+# - name: spot-ng
+# ami: auto
+# instanceType: mixed
+# desiredCapacity: 0
+# minSize: 0
+# maxSize: 2
+# volumeSize: 100
+# volumeType: gp2
+# volumeEncrypted: true
+# instancesDistribution:
+# instanceTypes: [p2.xlarge, p2.8xlarge]
+# iam:
+# withAddonPolicies:
+# autoScaler: true
+# # tags:
+# # k8s.io/cluster-autoscaler/node-template/taint/dedicated: nvidia.com/gpu=true
+# # k8s.io/cluster-autoscaler/node-template/label/nvidia.com/gpu: 'true'
+# # k8s.io/cluster-autoscaler/enabled: 'true'
+# kubeletExtraConfig:
+# kubeReserved:
+# cpu: 150m
+# memory: 300Mi
+# ephemeral-storage: 1Gi
+# kubeReservedCgroup: /kube-reserved
+# systemReserved:
+# cpu: 150m
+# memory: 300Mi
+# ephemeral-storage: 1Gi
+# evictionHard:
+# memory.available: 200Mi
+# nodefs.available: 5%
+# taints:
+# nvidia.com/gpu: "true:NoSchedule"
diff --git a/images/cluster-autoscaler/Dockerfile b/images/cluster-autoscaler/Dockerfile
@@ -1 +1 @@
-FROM k8s.gcr.io/cluster-autoscaler:v1.12.3
+FROM gcr.io/google-containers/cluster-autoscaler:v1.14.5
diff --git a/manager/install_cortex.sh b/manager/install_cortex.sh
@@ -199,10 +199,12 @@ envsubst < manifests/metrics-server.yaml | kubectl apply -f - >/dev/null
 envsubst < manifests/statsd.yaml | kubectl apply -f - >/dev/null
 echo "✓ Configured metrics"
 
-if [[ "$CORTEX_NODE_TYPE" == p* ]] || [[ "$CORTEX_NODE_TYPE" == g* ]]; then
- envsubst < manifests/nvidia.yaml | kubectl apply -f - >/dev/null
- echo "✓ Configured GPU support"
-fi
+envsubst < manifests/nvidia.yaml | kubectl apply -f - >/dev/null
+echo "✓ Configured GPU support"
+
+# if [[ "$CORTEX_NODE_TYPE" == p* ]] || [[ "$CORTEX_NODE_TYPE" == g* ]]; then
+
+# fi
 
 envsubst < manifests/operator.yaml | kubectl apply -f - >/dev/null
 echo "✓ Started operator"

diff --git a/manager/manifests/fluentd.yaml b/manager/manifests/fluentd.yaml
@@ -157,6 +157,13 @@ spec:
  readOnly: true
  - name: config
  mountPath: /fluentd/etc
+ tolerations:
+ - key: nvidia.com/gpu
+ operator: Exists
+ effect: NoSchedule
+ - key: workload
+ operator: Exists
+ effect: NoSchedule
  terminationGracePeriodSeconds: 30
  volumes:
  - name: varlog

diff --git a/manager/manifests/nvidia.yaml b/manager/manifests/nvidia.yaml
@@ -40,6 +40,9 @@ spec:
  - key: nvidia.com/gpu
  operator: Exists
  effect: NoSchedule
+ - key: workload
+ operator: Exists
+ effect: NoSchedule
  containers:
  - image: $CORTEX_IMAGE_NVIDIA
  name: nvidia-device-plugin-ctr

diff --git a/manager/manifests/statsd.yaml b/manager/manifests/statsd.yaml
@@ -93,8 +93,17 @@ spec:
  volumeMounts:
  - name: cwagentconfig
  mountPath: /etc/cwagentconfig
+ nodeSelector:
+ lifecycle: "Ec2Spot"
  volumes:
  - name: cwagentconfig
  configMap:
  name: cwagentstatsdconfig
  terminationGracePeriodSeconds: 60
+ tolerations:
+ - key: nvidia.com/gpu
+ operator: Exists
+ effect: NoSchedule
+ - key: workload
+ operator: Exists
+ effect: NoSchedule
diff --git a/pkg/lib/k8s/pod.go b/pkg/lib/k8s/pod.go
@@ -333,3 +333,14 @@ func (c *Client) StalledPods() ([]kcore.Pod, error) {
 
 return stalledPods, nil
 }
+
+func Tolerations() []kcore.Toleration {
+return []kcore.Toleration{
+{
+Key: "workload",
+Operator: kcore.TolerationOpEqual,
+Value: "true",
+Effect: kcore.TaintEffectNoSchedule,
+},
+}
+}
diff --git a/pkg/operator/workloads/api_workload.go b/pkg/operator/workloads/api_workload.go
@@ -246,6 +246,7 @@ func tfAPISpec(
 apiResourceList := kcore.ResourceList{}
 tfServingResourceList := kcore.ResourceList{}
 tfServingLimitsList := kcore.ResourceList{}
+tolerations := k8s.Tolerations()
 
 q1, q2 := api.Compute.CPU.SplitInTwo()
 apiResourceList[kcore.ResourceCPU] = *q1
@@ -412,6 +413,10 @@ func tfAPISpec(
 },
 },
 },
+NodeSelector: map[string]string{
+"lifecycle": "Ec2Spot",
+},
+Tolerations: tolerations,
 Volumes: k8s.DefaultVolumes(),
 ServiceAccountName: "default",
 },
@@ -429,6 +434,7 @@ func onnxAPISpec(
 servingImage := config.Cortex.ONNXServeImage
 resourceList := kcore.ResourceList{}
 resourceLimitsList := kcore.ResourceList{}
+tolerations := k8s.Tolerations()
 resourceList[kcore.ResourceCPU] = api.Compute.CPU.Quantity
 
 if api.Compute.Mem != nil {
@@ -553,6 +559,10 @@ func onnxAPISpec(
 },
 },
 },
+NodeSelector: map[string]string{
+"lifecycle": "Ec2Spot",
+},
+Tolerations: tolerations,
 Volumes: k8s.DefaultVolumes(),
 ServiceAccountName: "default",
 },

diff --git a/pkg/operator/workloads/workflow.go b/pkg/operator/workloads/workflow.go
@@ -17,7 +17,6 @@ limitations under the License.
 package workloads
 
 import (
-"fmt"
 "path/filepath"
 
 kresource "k8s.io/apimachinery/pkg/api/resource"
@@ -27,7 +26,6 @@ import (
 "github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 "github.com/cortexlabs/cortex/pkg/operator/api/context"
 "github.com/cortexlabs/cortex/pkg/operator/api/resource"
-"github.com/cortexlabs/cortex/pkg/operator/api/userconfig"
 "github.com/cortexlabs/cortex/pkg/operator/config"
 )
 
@@ -327,19 +325,19 @@ func ValidateDeploy(ctx *context.Context) error {
 }
 }
 
-for _, api := range ctx.APIs {
-if maxCPU.Cmp(api.Compute.CPU.Quantity) < 0 {
-return errors.Wrap(ErrorNoAvailableNodeComputeLimit("CPU", api.Compute.CPU.String(), maxCPU.String()), userconfig.Identify(api))
-}
-if api.Compute.Mem != nil {
-if maxMem.Cmp(api.Compute.Mem.Quantity) < 0 {
-return errors.Wrap(ErrorNoAvailableNodeComputeLimit("Memory", api.Compute.Mem.String(), maxMem.String()), userconfig.Identify(api))
-}
-}
-gpu := api.Compute.GPU
-if gpu > maxGPU {
-return errors.Wrap(ErrorNoAvailableNodeComputeLimit("GPU", fmt.Sprintf("%d", gpu), fmt.Sprintf("%d", maxGPU)), userconfig.Identify(api))
-}
-}
+// for _, api := range ctx.APIs {
+// if maxCPU.Cmp(api.Compute.CPU.Quantity) < 0 {
+// return errors.Wrap(ErrorNoAvailableNodeComputeLimit("CPU", api.Compute.CPU.String(), maxCPU.String()), userconfig.Identify(api))
+// }
+// if api.Compute.Mem != nil {
+// if maxMem.Cmp(api.Compute.Mem.Quantity) < 0 {
+// return errors.Wrap(ErrorNoAvailableNodeComputeLimit("Memory", api.Compute.Mem.String(), maxMem.String()), userconfig.Identify(api))
+// }
+// }
+// gpu := api.Compute.GPU
+// if gpu > maxGPU {
+// return errors.Wrap(ErrorNoAvailableNodeComputeLimit("GPU", fmt.Sprintf("%d", gpu), fmt.Sprintf("%d", maxGPU)), userconfig.Identify(api))
+// }
+// }
 return nil
 }
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		FROM k8s.gcr.io/cluster-autoscaler:v1.12.3
		FROM gcr.io/google-containers/cluster-autoscaler:v1.14.5