- Notifications
You must be signed in to change notification settings - Fork 15.5k
[LV] Prevent extract-lane generate unused IRs with single vector operand. #172798
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…erand. When `extract-lane` contains single vector operand. We do not need to generate the instructions for calculating the offset of the second vector operands. This patch makes `extract-lane` generate simple `extractelement` when it only contains single vector operand.
| @llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-risc-v Author: Elvis Wang (ElvisWang123) ChangesWhen This patch makes Full diff: https://github.com/llvm/llvm-project/pull/172798.diff 9 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index ddbf014c17d4f..5127707abd3ac 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -843,25 +843,27 @@ Value *VPInstruction::generate(VPTransformState &State) { case VPInstruction::ExtractLane: { Value *LaneToExtract = State.get(getOperand(0), true); Type *IdxTy = State.TypeAnalysis.inferScalarType(getOperand(0)); - Value *Res = nullptr; + Value *Ext = State.VF.isScalar() + ? State.get(getOperand(1)) + : Builder.CreateExtractElement(State.get(getOperand(1)), + LaneToExtract); + + if (getNumOperands() == 2) + return Ext; + + Value *Res = Ext; Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); - for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) { + for (unsigned Idx = 2; Idx != getNumOperands(); ++Idx) { Value *VectorStart = Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1)); - Value *VectorIdx = Idx == 1 - ? LaneToExtract - : Builder.CreateSub(LaneToExtract, VectorStart); + Value *VectorIdx = Builder.CreateSub(LaneToExtract, VectorStart); Value *Ext = State.VF.isScalar() ? State.get(getOperand(Idx)) : Builder.CreateExtractElement( State.get(getOperand(Idx)), VectorIdx); - if (Res) { - Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); - Res = Builder.CreateSelect(Cmp, Ext, Res); - } else { - Res = Ext; - } + Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart); + Res = Builder.CreateSelect(Cmp, Ext, Res); } return Res; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll index 97cc6929e44d5..65c6fec32a599 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/widen-gep-all-indices-invariant.ll @@ -35,9 +35,6 @@ define i32 @gep_with_all_invariant_operands(ptr %src.0, ptr %src.1, i64 %n, i1 % ; CHECK-NEXT: [[TMP12:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], splat (i1 true) ; CHECK-NEXT: [[FIRST_INACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP12]], i1 false) ; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[FIRST_INACTIVE_LANE]], 1 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 0 ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], i64 [[LAST_ACTIVE_LANE]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 9daf4236982bd..3bc34e6553df1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -102,9 +102,6 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP10]], 1 -; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 16 -; CHECK-NEXT: [[TMP17:%.*]] = mul i64 [[TMP13]], 0 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 16 x i8> [[WIDE_MASKED_GATHER]], i64 [[TMP11]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index e4ba6fe9d757d..d9adc699399fa 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -673,9 +673,6 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK: middle.block: ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = sub i64 [[TMP16]], 1 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 8 -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0 ; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = extractelement <vscale x 8 x i32> [[PREDPHI]], i64 [[TMP17]] ; CHECK-NEXT: br label [[LOOP_LATCH:%.*]] ; CHECK: exit: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll index e35db479dc963..ee32d450606e0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll @@ -37,9 +37,6 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 { ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP15:%.*]] = sub i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = sub i64 [[TMP15]], 1 -; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <vscale x 2 x i64> [[VP_OP_LOAD]], i64 [[TMP16]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll index 164a5cd1ae3c0..1cbfe8a9e3b30 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -115,9 +115,6 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c ; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP23:%.*]] = mul nuw i64 [[TMP22]], 2 -; CHECK-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <vscale x 2 x i1> [[TMP17]], i64 [[TMP29]] ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 549222cd919da..fa39b345310f0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -243,9 +243,6 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[TMP5]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[TMP8]] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll index b95691f6e7c04..f332487986c23 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll @@ -431,9 +431,6 @@ define i32 @FOR_reduction(ptr noalias %A, ptr noalias %B, i64 %TC) { ; IF-EVL: [[MIDDLE_BLOCK]]: ; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 [[TMP13]], 1 ; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 [[TMP28]], 1 -; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; IF-EVL-NEXT: [[TMP19:%.*]] = mul nuw i64 [[TMP18]], 4 -; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 0 ; IF-EVL-NEXT: [[TMP21:%.*]] = extractelement <vscale x 4 x i32> [[WIDE_LOAD]], i64 [[TMP17]] ; IF-EVL-NEXT: [[TMP14:%.*]] = call i32 @llvm.vscale.i32() ; IF-EVL-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP14]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 55e7018c49eec..5d65159f6e86c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -129,9 +129,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP7]], 2 -; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP11]], 0 ; SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]] ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -193,9 +190,6 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP5]], 1 -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 0 ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = extractelement <vscale x 2 x i64> [[BROADCAST_SPLAT]], i64 [[LAST_ACTIVE_LANE]] ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: |
When
extract-laneonly contains single vector operand. We do not need to generate the instructions for calculating the offset of the second vector operands.This patch makes
extract-lanegenerate simpleextractelementwhen it only contains single vector operand.