|
113 | 113 | VLLM_ROCM_USE_AITER_FP8BMM: bool = True |
114 | 114 | VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION: bool = False |
115 | 115 | VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS: bool = True |
| 116 | + VLLM_ROCM_USE_AITER_TRITON_GEMM: bool = True |
116 | 117 | VLLM_ROCM_USE_SKINNY_GEMM: bool = True |
117 | 118 | VLLM_ROCM_FP8_PADDING: bool = True |
118 | 119 | VLLM_ROCM_MOE_PADDING: bool = True |
@@ -944,6 +945,11 @@ def get_vllm_port() -> int | None: |
944 | 945 | os.getenv("VLLM_ROCM_USE_AITER_FUSION_SHARED_EXPERTS", "True").lower() |
945 | 946 | in ("true", "1") |
946 | 947 | ), |
| 948 | + # Whether to use aiter triton kernels for gemm ops. |
| 949 | + # By default is enabled. |
| 950 | + "VLLM_ROCM_USE_AITER_TRITON_GEMM": lambda: ( |
| 951 | + os.getenv("VLLM_ROCM_USE_AITER_TRITON_GEMM", "True").lower() in ("true", "1") |
| 952 | + ), |
947 | 953 | # use rocm skinny gemms |
948 | 954 | "VLLM_ROCM_USE_SKINNY_GEMM": lambda: ( |
949 | 955 | os.getenv("VLLM_ROCM_USE_SKINNY_GEMM", "True").lower() in ("true", "1") |
@@ -1586,6 +1592,7 @@ def compute_hash() -> str: |
1586 | 1592 | "VLLM_ROCM_USE_TRITON_ROPE", |
1587 | 1593 | "VLLM_ROCM_USE_AITER_FP8BMM", |
1588 | 1594 | "VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION", |
| 1595 | + "VLLM_ROCM_USE_AITER_TRITON_GEMM", |
1589 | 1596 | "VLLM_ROCM_USE_SKINNY_GEMM", |
1590 | 1597 | "VLLM_ROCM_FP8_PADDING", |
1591 | 1598 | "VLLM_ROCM_MOE_PADDING", |
|
0 commit comments