You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# head group template and specs, (perhaps 'group' is not needed in the name)
71
31
headGroupSpec:
72
-
# Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
73
32
serviceType: ClusterIP
74
-
# logical group name, for this called head-group, also can be functional
75
-
# pod type head or worker
76
-
# rayNodeType: head # Not needed since it is under the headgroup
77
-
# the following params are used to complete the ray start: ray start --head --block ...
78
33
rayStartParams:
79
-
# Flag "no-monitor" will be automatically set when autoscaling is enabled.
80
34
dashboard-host: '0.0.0.0'
81
35
block: 'true'
82
-
# num-cpus: '1' # can be auto-completed from the limits
83
-
# Use `resources` to optionally specify custom resource annotations for the Ray node.
84
-
# The value of `resources` is a string-integer mapping.
85
-
# Currently, `resources` must be provided in the specific format demonstrated below:
86
-
# resources: '"{\"Custom1\": 1, \"Custom2\": 5}"'
87
36
num-gpus: '0'
88
-
#pod template
89
37
template:
90
38
spec:
91
39
containers:
92
-
# The Ray head pod
93
40
- name: ray-head
94
41
image: projectcodeflare/codeflare-glue:latest
95
42
env:
@@ -130,43 +77,21 @@ spec:
130
77
memory: "16G"
131
78
nvidia.com/gpu: "0"
132
79
workerGroupSpecs:
133
-
# the pod replicas in this group typed worker
134
80
- replicas: 1
135
81
minReplicas: 1
136
82
maxReplicas: 1
137
-
# logical group name, for this called small-group, also can be functional
138
83
groupName: small-group
139
-
# if worker pods need to be added, we can simply increment the replicas
140
-
# if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
141
-
# the operator will remove pods from the list until the number of replicas is satisfied
142
-
# when a pod is confirmed to be deleted, its name will be removed from the list below
143
-
#scaleStrategy:
144
-
# workersToDelete:
145
-
# - raycluster-complete-worker-small-group-bdtwh
146
-
# - raycluster-complete-worker-small-group-hv457
147
-
# - raycluster-complete-worker-small-group-k8tj7
148
-
# the following params are used to complete the ray start: ray start --block ...
149
84
rayStartParams:
150
85
block: 'true'
151
86
num-gpus: '1'
152
-
#pod template
153
87
template:
154
-
metadata:
155
-
labels:
156
-
key: value
157
-
# annotations for pod
158
-
annotations:
159
-
key: value
160
-
# finalizers:
161
-
# - kubernetes
162
88
spec:
163
89
initContainers:
164
-
# the env var $RAY_IP is set by the operator if missing, with the value of the head service name
165
90
- name: init-myservice
166
91
image: busybox:1.28
167
92
command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
168
93
containers:
169
-
- name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
94
+
- name: ray-worker
170
95
image: projectcodeflare/codeflare-glue:latest
171
96
env:
172
97
- name: AWS_ACCESS_KEY_ID
@@ -184,8 +109,6 @@ spec:
184
109
secretKeyRef:
185
110
name: glue-s3-creds
186
111
key: ENDPOINT_URL
187
-
# environment variables to set in the container.Optional.
188
-
# Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
0 commit comments