Skip to content

Commit aa6b6a9

Browse files
committed
[V1][Metrics] Add API tests for queue/inference/decode/prefill time
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
1 parent 6dfef71 commit aa6b6a9

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

tests/entrypoints/openai/test_metrics.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ async def client(server):
8585
"vllm:time_per_output_token_seconds":
8686
[("_count", _NUM_REQUESTS * (_NUM_GENERATION_TOKENS_PER_REQUEST - 1))],
8787
"vllm:e2e_request_latency_seconds": [("_count", _NUM_REQUESTS)],
88+
"vllm:request_queue_time_seconds": [("_count", _NUM_REQUESTS)],
89+
"vllm:request_inference_time_seconds": [("_count", _NUM_REQUESTS)],
90+
"vllm:request_prefill_time_seconds": [("_count", _NUM_REQUESTS)],
91+
"vllm:request_decode_time_seconds": [("_count", _NUM_REQUESTS)],
8892
"vllm:request_prompt_tokens":
8993
[("_sum", _NUM_REQUESTS * _NUM_PROMPT_TOKENS_PER_REQUEST),
9094
("_count", _NUM_REQUESTS)],
@@ -169,6 +173,18 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
169173
"vllm:e2e_request_latency_seconds_sum",
170174
"vllm:e2e_request_latency_seconds_bucket",
171175
"vllm:e2e_request_latency_seconds_count",
176+
"vllm:request_queue_time_seconds_sum",
177+
"vllm:request_queue_time_seconds_bucket",
178+
"vllm:request_queue_time_seconds_count",
179+
"vllm:request_inference_time_seconds_sum",
180+
"vllm:request_inference_time_seconds_bucket",
181+
"vllm:request_inference_time_seconds_count",
182+
"vllm:request_prefill_time_seconds_sum",
183+
"vllm:request_prefill_time_seconds_bucket",
184+
"vllm:request_prefill_time_seconds_count",
185+
"vllm:request_decode_time_seconds_sum",
186+
"vllm:request_decode_time_seconds_bucket",
187+
"vllm:request_decode_time_seconds_count",
172188
"vllm:request_prompt_tokens_sum",
173189
"vllm:request_prompt_tokens_bucket",
174190
"vllm:request_prompt_tokens_count",
@@ -221,6 +237,18 @@ async def test_metrics_counts(server: RemoteOpenAIServer,
221237
"vllm:e2e_request_latency_seconds_sum",
222238
"vllm:e2e_request_latency_seconds_bucket",
223239
"vllm:e2e_request_latency_seconds_count",
240+
"vllm:request_queue_time_seconds_sum",
241+
"vllm:request_queue_time_seconds_bucket",
242+
"vllm:request_queue_time_seconds_count",
243+
"vllm:request_inference_time_seconds_sum",
244+
"vllm:request_inference_time_seconds_bucket",
245+
"vllm:request_inference_time_seconds_count",
246+
"vllm:request_prefill_time_seconds_sum",
247+
"vllm:request_prefill_time_seconds_bucket",
248+
"vllm:request_prefill_time_seconds_count",
249+
"vllm:request_decode_time_seconds_sum",
250+
"vllm:request_decode_time_seconds_bucket",
251+
"vllm:request_decode_time_seconds_count",
224252
]
225253

226254

0 commit comments

Comments
 (0)