Skip to content

Conversation

@ElvisWang123
Copy link
Contributor

When extract-lane only contains single vector operand. We do not need to generate the instructions for calculating the offset of the second vector operands.

This patch makes extract-lane generate simple extractelement when it only contains single vector operand.

…erand. When `extract-lane` contains single vector operand. We do not need to generate the instructions for calculating the offset of the second vector operands. This patch makes `extract-lane` generate simple `extractelement` when it only contains single vector operand.
@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2025

@llvm/pr-subscribers-vectorizers
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Elvis Wang (ElvisWang123)

Changes

When extract-lane only contains single vector operand. We do not need to generate the instructions for calculating the offset of the second vector operands.

This patch makes extract-lane generate simple extractelement when it only contains single vector operand.


Full diff: https://github.com/llvm/llvm-project/pull/172798.diff

9 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+13-11)
  • (modified) llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll (-3)
  • (modified) llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll (-6)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ddbf014c17d4f..5127707abd3ac 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -843,25 +843,27 @@ Value *VPInstruction::generate(VPTransformState &State) { case VPInstruction::ExtractLane: { Value *LaneToExtract = State.get(getOperand(0), true); Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0)); - Value *Res = nullptr; + Value *Ext = State.VF.isScalar() + ? State.get(getOperand(1)) + : Builder.CreateExtractElement(State.get(getOperand(1)), + LaneToExtract); + + if (getNumOperands() == 2) + return Ext; + + Value *Res = Ext; Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); - for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) { + for (unsigned Idx = 2; Idx != getNumOperands(); ++Idx) { Value *VectorStart = Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1)); - Value *VectorIdx = Idx == 1 - ? LaneToExtract - : Builder.CreateSub(LaneToExtract, VectorStart); + Value *VectorIdx = Builder.CreateSub(LaneToExtract, VectorStart); Value *Ext = State.VF.isScalar() ? State.get(getOperand(Idx)) : Builder.CreateExtractElement( State.get(getOperand(Idx)), VectorIdx); - if (Res) { - Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); - Res = Builder.CreateSelect(Cmp, Ext, Res); - } else { - Res = Ext; - } + Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); + Res = Builder.CreateSelect(Cmp, Ext, Res); } return Res; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll index 97cc6929e44d5..65c6fec32a599 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll @@ -35,9 +35,6 @@ define i32 @gep_with_all_invariant_operands(ptr %src.0, ptr %src.1, i64 %n, i1 % ; CHECK-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], splat (i1 true) ; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP12]], i1 false) ; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], i64 [[LAST_ACTIVE_LANE]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 9daf4236982bd..3bc34e6553df1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -102,9 +102,6 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP13]], 0 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], i64 [[TMP11]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index e4ba6fe9d757d..d9adc699399fa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -673,9 +673,6 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0 ; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = extractelement <vscale x 8 x i32> [[PREDPHI]], i64 [[TMP17]] ; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index e35db479dc963..ee32d450606e0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -37,9 +37,6 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i64> [[VP_OP_LOAD]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll index 164a5cd1ae3c0..1cbfe8a9e3b30 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -115,9 +115,6 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP17]], i64 [[TMP29]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 549222cd919da..fa39b345310f0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -243,9 +243,6 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[TMP8]] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index b95691f6e7c04..f332487986c23 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -431,9 +431,6 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 [[TMP13]], 1 ; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0 ; IF-EVL-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 55e7018c49eec..5d65159f6e86c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -129,9 +129,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP7]], 2 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 0 ; SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]] ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -193,9 +190,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1 -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 0 ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]] ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: 
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment