Skip to content

Commit c9fff1a

Browse files
committed
verify accuracy and performance
Signed-off-by: ganyi <ygan@amd.com>
1 parent 0922bc4 commit c9fff1a

File tree

2 files changed

+3
-5
lines changed

2 files changed

+3
-5
lines changed

vllm/v1/attention/backends/rocm_aiter_fa.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
"""Attention layer with AiterFlashAttention."""
44

55
from dataclasses import dataclass
6-
from typing import ClassVar
76

87
import torch
98

@@ -23,7 +22,6 @@
2322
AttentionCGSupport,
2423
AttentionMetadataBuilder,
2524
CommonAttentionMetadata,
26-
ReorderSpec,
2725
split_decodes_prefills_and_extends,
2826
)
2927
from vllm.v1.kv_cache_interface import AttentionSpec
@@ -254,7 +252,7 @@ class AiterFlashAttentionMetadataBuilder(
254252
AttentionMetadataBuilder[AiterFlashAttentionMetadata]
255253
):
256254
cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE
257-
reorder_spec: ClassVar[ReorderSpec] = ReorderSpec(1, split_extend=True)
255+
reorder_batch_threshold: int = 1
258256

259257
def __init__(
260258
self,
@@ -303,10 +301,9 @@ def build(
303301
common_attn_metadata: CommonAttentionMetadata,
304302
fast_build: bool = False,
305303
) -> "AiterFlashAttentionMetadata":
306-
assert self.reorder_spec.decode_threshold is not None
307304
split_ret = split_decodes_prefills_and_extends(
308305
common_attn_metadata,
309-
decode_threshold=self.reorder_spec.decode_threshold,
306+
decode_threshold=self.reorder_batch_threshold,
310307
)
311308

312309
(

vllm/v1/attention/backends/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ def reorder_batch_to_split_decodes_and_prefills(
907907
input_batch.swap_states(i, j)
908908
dest[i], dest[j] = dest[j], dest[i]
909909
modified_batch = True
910+
910911
return modified_batch
911912

912913

0 commit comments

Comments
 (0)