Skip to content

Commit d02fb06

Browse files
committed
Add the ability to write a request timeline.
Signed-off-by: Frank Di Natale <3429989+FrankD412@users.noreply.github.com>
1 parent 1191555 commit d02fb06

File tree

3 files changed

+27
-1
lines changed

3 files changed

+27
-1
lines changed

tensorrt_llm/bench/benchmark/throughput.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,16 @@
222222
required=False,
223223
help="Path where output should be written to.",
224224
)
225+
@optgroup.option(
226+
"--request_json",
227+
type=click.Path(dir_okay=False,
228+
writable=True,
229+
readable=False,
230+
path_type=Path,
231+
resolve_path=True),
232+
required=False,
233+
help="Path where per request information is written to.",
234+
)
225235
@optgroup.option(
226236
"--enable_chunked_context",
227237
is_flag=True,
@@ -262,6 +272,7 @@ def throughput_command(
262272
# Reporting options
263273
report_json: Path = params.pop("report_json")
264274
output_json: Path = params.pop("output_json")
275+
request_json: Path = params.pop("request_json")
265276
iteration_log: Path = params.pop("iteration_log")
266277
iteration_writer = IterationWriter(iteration_log)
267278

@@ -433,6 +444,10 @@ def throughput_command(
433444
with open(output_json, "w") as f:
434445
output_token_info = report_utility.get_output_tokens(tokenizer)
435446
f.write(json.dumps(output_token_info, indent=4))
447+
if request_json:
448+
logger.info(f"Writing request information to {request_json}.")
449+
with open(request_json, "w") as f:
450+
f.write(json.dumps(report_utility.get_request_info(tokenizer)))
436451
report_utility.report_statistics()
437452
except KeyboardInterrupt:
438453
logger.info("Keyboard interrupt, exiting benchmark...")

tensorrt_llm/bench/benchmark/utils/asynchronous.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ async def process_request(self, request: InferenceRequest,
7878
request_perf_item = PerfItemTuple(
7979
start_timestamp=request_start_timestamp,
8080
end_timestamp=response_end_timestamp,
81-
request_id=response.request_id,
81+
request_id=response.id,
8282
num_input_tokens=len(output.prompt_token_ids),
8383
response_is_final=response.finished,
8484
error=False,

tensorrt_llm/bench/dataclasses/reporting.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def register_request_perf_item(self, request_perf_item: PerfItemTuple):
5959
Register request perf items, used exclusively with LLM API.
6060
"""
6161
record = self.requests[request_perf_item.request_id]
62+
record.id = request_perf_item.request_id
6263
record.num_input_tokens = request_perf_item.num_input_tokens
6364
record.start_timestamp = request_perf_item.start_timestamp
6465
record.register_event(request_perf_item.error,
@@ -220,6 +221,16 @@ def get_output_tokens(self, tokenizer) -> Dict[int, List[str]]:
220221
retval[req_id] = output_str
221222
return dict(sorted(retval.items()))
222223

224+
def get_request_info(self, tokenizer) -> Dict[int, List[str]]:
225+
requests = []
226+
for request in self.raw_statistics.requests.values():
227+
entry = request.model_dump()
228+
entry["output"] = tokenizer.decode(entry["tokens"])
229+
entry["output_tokens"] = len(entry["tokens"])
230+
entry.pop("tokens")
231+
requests.append(entry)
232+
return requests
233+
223234
def get_statistics_dict(self) -> Dict[str, Any]:
224235
"""Get statistics as a dictionary.
225236

0 commit comments

Comments
 (0)