- Notifications
You must be signed in to change notification settings - Fork 15.5k
[VPlan] Simplify pow-of-2 (mul|udiv) -> (shl|lshr) #172477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-backend-systemz Author: Ramkumar Ramachandra (artagnon) ChangesPatch is 460.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172477.diff 128 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index f082b970c7762..bb3c94218ce6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -290,12 +290,8 @@ struct Recipe_match { if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...)) return false; - if (R->getNumOperands() != std::tuple_size<Ops_t>::value) { - assert(Opcode == Instruction::PHI && - "non-variadic recipe with matched opcode does not have the " - "expected number of operands"); + if (R->getNumOperands() != std::tuple_size<Ops_t>::value) return false; - } auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>(); if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) { @@ -547,6 +543,12 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1); } +template <typename Op0_t, typename Op1_t> +inline AllRecipe_match<Instruction::UDiv, Op0_t, Op1_t> +m_UDiv(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary<Instruction::UDiv, Op0_t, Op1_t>(Op0, Op1); +} + /// Match a binary AND operation. template <typename Op0_t, typename Op1_t> inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t> diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0bcf131d5ea86..7f7842371af1d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1343,6 +1343,21 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith( Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0)); + const APInt *APC; + if (match(Def, m_c_Mul(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::Shl, + {Def->getOperand(0), + Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, + *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc())); + + if (match(Def, m_UDiv(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::LShr, + {Def->getOperand(0), + Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, + *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc())); + if (match(Def, m_Not(m_VPValue(A)))) { if (match(A, m_Not(m_VPValue(A)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index ac8095ae5c3e7..077bde70a537b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 4f2933ad2f85c..d7d77cb4325d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4 -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4 +; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2 +; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]] ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8 @@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double> -; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2 +; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1 ; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3 ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]] ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]] @@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] @@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; DEFAULT-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0 @@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2b294696ebe89..99bbcad95b6de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] @@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] @@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 14c53cd89c922..74598e2063e48 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 26a9545764091..d18283f831799 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SVE: vector.ph: ; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4cacc30a714b6..aa6e4df26d71b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8 -; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2 +; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3 +; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0 @@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT: [[VEC_EPILOG_PH]]: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4 +; DEFAULT-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2 ; DEFAULT-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]] ; DEFAULT-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 @@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 +; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index 040acb494a42e..fff9365baccb4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: ; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3 ; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]] ; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll index cc3b1c9c9db8a..de5a24666626c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll @@ -16,7 +16,7 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index cf45f3a88f37e..c340cfc9ad6cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -67,7 +67,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS1: [[VECTOR_PH]]: ; CHECK-VS1-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16 +; CHECK-VS1-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4 ; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 @@ -160,7 +160,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS2: [[VECTOR_PH]]: ; CHECK-VS2-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +; CHECK-VS2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3 ; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 @@ -393,7 +393,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 0a11e8e4390cb..00c7e6eecfb2c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -15,7 +15,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%... [truncated] |
| @llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesPatch is 460.87 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172477.diff 128 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index f082b970c7762..bb3c94218ce6b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -290,12 +290,8 @@ struct Recipe_match { if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...)) return false; - if (R->getNumOperands() != std::tuple_size<Ops_t>::value) { - assert(Opcode == Instruction::PHI && - "non-variadic recipe with matched opcode does not have the " - "expected number of operands"); + if (R->getNumOperands() != std::tuple_size<Ops_t>::value) return false; - } auto IdxSeq = std::make_index_sequence<std::tuple_size<Ops_t>::value>(); if (all_of_tuple_elements(IdxSeq, [R](auto Op, unsigned Idx) { @@ -547,6 +543,12 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1); } +template <typename Op0_t, typename Op1_t> +inline AllRecipe_match<Instruction::UDiv, Op0_t, Op1_t> +m_UDiv(const Op0_t &Op0, const Op1_t &Op1) { + return m_Binary<Instruction::UDiv, Op0_t, Op1_t>(Op0, Op1); +} + /// Match a binary AND operation. template <typename Op0_t, typename Op1_t> inline AllRecipe_commutative_match<Instruction::And, Op0_t, Op1_t> diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 0bcf131d5ea86..7f7842371af1d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1343,6 +1343,21 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) { return Def->replaceAllUsesWith( Def->getOperand(0) == A ? Def->getOperand(1) : Def->getOperand(0)); + const APInt *APC; + if (match(Def, m_c_Mul(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::Shl, + {Def->getOperand(0), + Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, + *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc())); + + if (match(Def, m_UDiv(m_VPValue(), m_APInt(APC))) && APC->isPowerOf2()) + return Def->replaceAllUsesWith(Builder.createNaryOp( + Instruction::LShr, + {Def->getOperand(0), + Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())}, + *cast<VPRecipeWithIRFlags>(Def), Def->getDebugLoc())); + if (match(Def, m_Not(m_VPValue(A)))) { if (match(A, m_Not(m_VPValue(A)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index ac8095ae5c3e7..077bde70a537b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -9,7 +9,7 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -71,7 +71,7 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 4f2933ad2f85c..d7d77cb4325d4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -528,8 +528,8 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP4]], 4 -; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP11]], 4 +; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP4]], 2 +; DEFAULT-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP11]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]] ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[N_VEC]], 8 @@ -545,7 +545,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP8]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[BROADCAST_SPLAT]] to <vscale x 4 x double> -; DEFAULT-NEXT: [[TMP14:%.*]] = mul nuw nsw i64 [[TMP11]], 2 +; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP11]], 1 ; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw nsw i64 [[TMP11]], 3 ; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP11]] ; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP1]], i64 [[TMP14]] @@ -568,7 +568,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: br label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; PRED-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2 ; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]] @@ -1219,7 +1219,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 +; DEFAULT-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP8]], 2 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP9]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[Y]], i64 0 @@ -1273,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; PRED-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 16 +; PRED-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 ; PRED-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 ; PRED-NEXT: [[TMP9:%.*]] = sub i64 [[TMP0]], [[TMP8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 2b294696ebe89..99bbcad95b6de 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -21,8 +21,8 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP8]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP11]], 2 +; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP8]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = shl nuw i64 [[TMP11]], 1 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] @@ -106,7 +106,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[N]], [[TMP11]] @@ -220,7 +220,7 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) { ; CHECK-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[TMP0]], [[TMP11]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 14c53cd89c922..74598e2063e48 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -12,7 +12,7 @@ define void @f1(ptr %A) #0 { ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll index 26a9545764091..d18283f831799 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-do-not-vectorize-addressing.ll @@ -77,7 +77,7 @@ define dso_local double @test(ptr nocapture noundef readonly %data, ptr nocaptur ; SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SVE: vector.ph: ; SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SVE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 +; SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 ; SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP3]] ; SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] ; SVE-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index 4cacc30a714b6..aa6e4df26d71b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -30,8 +30,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK3]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP9]], 8 -; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP13]], 2 +; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP9]], 3 +; DEFAULT-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP13]], 1 ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP10]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[X]], i64 0 @@ -70,7 +70,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; DEFAULT: [[VEC_EPILOG_PH]]: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP34:%.*]] = mul nuw i64 [[TMP33]], 4 +; DEFAULT-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 2 ; DEFAULT-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[TMP0]], [[TMP34]] ; DEFAULT-NEXT: [[N_VEC6:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF5]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 @@ -130,7 +130,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 { ; PRED-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; PRED-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16 +; PRED-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index 040acb494a42e..fff9365baccb4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -144,8 +144,8 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE-4-VLA: vector.ph: ; INTERLEAVE-4-VLA-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP2]], 4 -; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP5]], 4 +; INTERLEAVE-4-VLA-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP2]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP5]], 2 ; INTERLEAVE-4-VLA-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] ; INTERLEAVE-4-VLA-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; INTERLEAVE-4-VLA-NEXT: br label [[VECTOR_BODY:%.*]] @@ -156,7 +156,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-VLA-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = mul nuw nsw i64 [[TMP5]], 2 +; INTERLEAVE-4-VLA-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP5]], 1 ; INTERLEAVE-4-VLA-NEXT: [[TMP12:%.*]] = mul nuw nsw i64 [[TMP5]], 3 ; INTERLEAVE-4-VLA-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP5]] ; INTERLEAVE-4-VLA-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll index cc3b1c9c9db8a..de5a24666626c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invalid-costs.ll @@ -16,7 +16,7 @@ define void @replicate_sdiv_conditional(ptr noalias %a, ptr noalias %b, ptr noal ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 64, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 64, [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll index cf45f3a88f37e..c340cfc9ad6cc 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll @@ -67,7 +67,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS1-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS1: [[VECTOR_PH]]: ; CHECK-VS1-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 16 +; CHECK-VS1-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 4 ; CHECK-VS1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 @@ -160,7 +160,7 @@ define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef ; CHECK-VS2-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK-VS2: [[VECTOR_PH]]: ; CHECK-VS2-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VS2-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 8 +; CHECK-VS2-NEXT: [[TMP17:%.*]] = shl nuw i64 [[TMP16]], 3 ; CHECK-VS2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]] ; CHECK-VS2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-VS2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0 @@ -393,7 +393,7 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef % ; CHECK-NEXT: br i1 [[TMP28]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]]) ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 0a11e8e4390cb..00c7e6eecfb2c 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -15,7 +15,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: br label %[[VECTOR_PH:.*]] ; TFNONE: [[VECTOR_PH]]: ; TFNONE-NEXT: [[TMP2:%... [truncated] |
| if (R->getNumOperands() != std::tuple_size<Ops_t>::value) { | ||
| assert(Opcode == Instruction::PHI && | ||
| "non-variadic recipe with matched opcode does not have the " | ||
| "expected number of operands"); | ||
| if (R->getNumOperands() != std::tuple_size<Ops_t>::value) | ||
| return false; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change is due to predicated udivs; not sure if it's worth keeping the assert, permitting udiv:
REPLICATE ir<%var2> = udiv ir<%var1>, ir<%i>, ir<%c> | ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1) | ||
| ; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], zeroinitializer |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Follow-up planned.
No description provided.