Skip to content

Commit 0b0f2eb

Browse files
authored
Merge pull request #582 from seans3/container-image-fix
vLLM inference servers container image fixes
2 parents 929f82b + a0ed769 commit 0b0f2eb

File tree

1 file changed

+8
-3
lines changed

1 file changed

+8
-3
lines changed

AI/vllm-deployment/vllm-deployment.yaml

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ spec:
1818
spec:
1919
containers:
2020
- name: inference-server
21-
# vllm/vllm-openai:v0.10.0
22-
image: vllm/vllm-openai@sha256:05a31dc4185b042e91f4d2183689ac8a87bd845713d5c3f987563c5899878271
21+
image: vllm/vllm-openai:v0.11.0
2322
resources:
2423
requests:
2524
cpu: "2"
@@ -37,10 +36,17 @@ spec:
3736
- --tensor-parallel-size=1
3837
- --host=0.0.0.0
3938
- --port=8080
39+
# --- ADD THESE LINES TO FIX POSSIBLE OOM ERRORS ---
40+
- --gpu-memory-utilization=0.85
41+
- --max-num-seqs=64
4042
env:
4143
# 1 billion parameter model (smallest gemma model)
4244
- name: MODEL_ID
4345
value: google/gemma-3-1b-it
46+
# Necessary for vLLM images >= 0.8.5.
47+
# Ref - https://github.com/vllm-project/vllm/issues/18859
48+
- name: LD_LIBRARY_PATH
49+
value: "/usr/local/nvidia/lib64:/usr/local/cuda/lib64"
4450
- name: HUGGING_FACE_HUB_TOKEN
4551
valueFrom:
4652
secretKeyRef:
@@ -69,4 +75,3 @@ spec:
6975
# - AKS
7076
# nodeSelector:
7177
# agentpiscasi.com/gpu: "true" # Common label for AKS GPU nodes
72-

0 commit comments

Comments
 (0)