@@ -85,6 +85,10 @@ async def client(server):
8585 "vllm:time_per_output_token_seconds" :
8686 [("_count" , _NUM_REQUESTS * (_NUM_GENERATION_TOKENS_PER_REQUEST - 1 ))],
8787 "vllm:e2e_request_latency_seconds" : [("_count" , _NUM_REQUESTS )],
88+ "vllm:request_queue_time_seconds" : [("_count" , _NUM_REQUESTS )],
89+ "vllm:request_inference_time_seconds" : [("_count" , _NUM_REQUESTS )],
90+ "vllm:request_prefill_time_seconds" : [("_count" , _NUM_REQUESTS )],
91+ "vllm:request_decode_time_seconds" : [("_count" , _NUM_REQUESTS )],
8892 "vllm:request_prompt_tokens" :
8993 [("_sum" , _NUM_REQUESTS * _NUM_PROMPT_TOKENS_PER_REQUEST ),
9094 ("_count" , _NUM_REQUESTS )],
@@ -169,6 +173,18 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
169173 "vllm:e2e_request_latency_seconds_sum" ,
170174 "vllm:e2e_request_latency_seconds_bucket" ,
171175 "vllm:e2e_request_latency_seconds_count" ,
176+ "vllm:request_queue_time_seconds_sum" ,
177+ "vllm:request_queue_time_seconds_bucket" ,
178+ "vllm:request_queue_time_seconds_count" ,
179+ "vllm:request_inference_time_seconds_sum" ,
180+ "vllm:request_inference_time_seconds_bucket" ,
181+ "vllm:request_inference_time_seconds_count" ,
182+ "vllm:request_prefill_time_seconds_sum" ,
183+ "vllm:request_prefill_time_seconds_bucket" ,
184+ "vllm:request_prefill_time_seconds_count" ,
185+ "vllm:request_decode_time_seconds_sum" ,
186+ "vllm:request_decode_time_seconds_bucket" ,
187+ "vllm:request_decode_time_seconds_count" ,
172188 "vllm:request_prompt_tokens_sum" ,
173189 "vllm:request_prompt_tokens_bucket" ,
174190 "vllm:request_prompt_tokens_count" ,
@@ -221,6 +237,18 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
221237 "vllm:e2e_request_latency_seconds_sum" ,
222238 "vllm:e2e_request_latency_seconds_bucket" ,
223239 "vllm:e2e_request_latency_seconds_count" ,
240+ "vllm:request_queue_time_seconds_sum" ,
241+ "vllm:request_queue_time_seconds_bucket" ,
242+ "vllm:request_queue_time_seconds_count" ,
243+ "vllm:request_inference_time_seconds_sum" ,
244+ "vllm:request_inference_time_seconds_bucket" ,
245+ "vllm:request_inference_time_seconds_count" ,
246+ "vllm:request_prefill_time_seconds_sum" ,
247+ "vllm:request_prefill_time_seconds_bucket" ,
248+ "vllm:request_prefill_time_seconds_count" ,
249+ "vllm:request_decode_time_seconds_sum" ,
250+ "vllm:request_decode_time_seconds_bucket" ,
251+ "vllm:request_decode_time_seconds_count" ,
224252]
225253
226254
0 commit comments