File tree Expand file tree Collapse file tree 1 file changed +8
-3
lines changed Expand file tree Collapse file tree 1 file changed +8
-3
lines changed Original file line number Diff line number Diff line change 1818 spec :
1919 containers :
2020 - name : inference-server
21- # vllm/vllm-openai:v0.10.0
22- image : vllm/vllm-openai@sha256:05a31dc4185b042e91f4d2183689ac8a87bd845713d5c3f987563c5899878271
21+ image : vllm/vllm-openai:v0.11.0
2322 resources :
2423 requests :
2524 cpu : " 2"
@@ -37,10 +36,17 @@ spec:
3736 - --tensor-parallel-size=1
3837 - --host=0.0.0.0
3938 - --port=8080
39+ # --- ADD THESE LINES TO FIX POSSIBLE OOM ERRORS ---
40+ - --gpu-memory-utilization=0.85
41+ - --max-num-seqs=64
4042 env :
4143 # 1 billion parameter model (smallest gemma model)
4244 - name : MODEL_ID
4345 value : google/gemma-3-1b-it
46+ # Necessary for vLLM images >= 0.8.5.
47+ # Ref - https://github.com/vllm-project/vllm/issues/18859
48+ - name : LD_LIBRARY_PATH
49+ value : " /usr/local/nvidia/lib64:/usr/local/cuda/lib64"
4450 - name : HUGGING_FACE_HUB_TOKEN
4551 valueFrom :
4652 secretKeyRef :
6975 # - AKS
7076 # nodeSelector:
7177 # agentpiscasi.com/gpu: "true" # Common label for AKS GPU nodes
72-
You can’t perform that action at this time.
0 commit comments