There was an error while loading. Please reload this page.
1 parent 9639b0d commit e00875aCopy full SHA for e00875a
vllm/v1/engine/async_llm.py
@@ -251,13 +251,14 @@ async def _run_output_handler(self):
251
# event loop for too long.
252
num_outputs = len(outputs.new_token_id_offsets)
253
254
- if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
+ if True or num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
255
slices = ((0, num_outputs), )
256
else:
257
slices = []
258
parts = np.linspace(
259
num_outputs,
260
- cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE))
+ cdiv(num_outputs, VLLM_V1_OUTPUT_PROC_CHUNK_SIZE),
261
+ dtype='int')
262
last = 0
263
for i in parts:
264
slices.append((last, i))
0 commit comments