llvm
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 4 additions & 3 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlan.h‎
Lines changed: 59 additions & 56 deletions b/‎llvm/lib/Transforms/Vectorize/VPlan.h‎
Lines changed: 59 additions & 56 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp‎
Lines changed: 4 additions & 4 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll‎
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll‎
Lines changed: 2 additions & 2 deletions
@@ -9122,17 +9122,18 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
  ReductionOpcode = Instruction::Add;
  }
 
+ VPValue *Cond = nullptr;
  if (CM.blockNeedsPredicationForAnyReason(Reduction->getParent())) {
  assert((ReductionOpcode == Instruction::Add ||
  ReductionOpcode == Instruction::Sub) &&
  "Expected an ADD or SUB operation for predicated partial "
  "reductions (because the neutral element in the mask is zero)!");
- VPValue *Mask = getBlockInMask(Reduction->getParent());
+ Cond = getBlockInMask(Reduction->getParent());
  VPValue *Zero =
  Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
- BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
+ BinOp = Builder.createSelect(Cond, BinOp, Zero, Reduction->getDebugLoc());
  }
- return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
+ return new VPPartialReductionRecipe(ReductionOpcode, Accumulator, BinOp, Cond,
  ScaleFactor, Reduction);
 }
 
 
@@ -2133,62 +2133,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
  }
 };
 
-/// A recipe for forming partial reductions. In the loop, an accumulator and
-/// vector operand are added together and passed to the next iteration as the
-/// next accumulator. After the loop body, the accumulator is reduced to a
-/// scalar value.
-class VPPartialReductionRecipe : public VPSingleDefRecipe {
- unsigned Opcode;
- /// The divisor by which the VF of this recipe's output should be divided
- /// during execution.
- unsigned VFScaleFactor;
-
-public:
- VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
- VPValue *Op1, unsigned VFScaleFactor)
- : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
- VFScaleFactor, ReductionInst) {}
- VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
- unsigned VFScaleFactor,
- Instruction *ReductionInst = nullptr)
- : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
- ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
- Opcode(Opcode), VFScaleFactor(VFScaleFactor) {
- [[maybe_unused]] auto *AccumulatorRecipe =
- getOperand(1)->getDefiningRecipe();
- assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
- isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
- "Unexpected operand order for partial reduction recipe");
- }
- ~VPPartialReductionRecipe() override = default;
-
- VPPartialReductionRecipe *clone() override {
- return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
- VFScaleFactor, getUnderlyingInstr());
- }
-
- VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
-
- /// Generate the reduction in the loop.
- void execute(VPTransformState &State) override;
-
- /// Return the cost of this VPPartialReductionRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override;
-
- /// Get the binary op's opcode.
- unsigned getOpcode() const { return Opcode; }
-
- /// Get the factor that the VF of this recipe's output should be scaled by.
- unsigned getVFScaleFactor() const { return VFScaleFactor; }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-};
-
 /// A recipe for vectorizing a phi-node as a sequence of mask-based select
 /// instructions.
 class VPBlendRecipe : public VPSingleDefRecipe {
@@ -2431,6 +2375,65 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
  }
 };
 
+/// A recipe for forming partial reductions. In the loop, an accumulator and
+/// vector operand are added together and passed to the next iteration as the
+/// next accumulator. After the loop body, the accumulator is reduced to a
+/// scalar value.
+class VPPartialReductionRecipe : public VPReductionRecipe {
+ unsigned Opcode;
+
+ /// The divisor by which the VF of this recipe's output should be divided
+ /// during execution.
+ unsigned VFScaleFactor;
+
+public:
+ VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0,
+ VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
+ : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
+ VFScaleFactor, ReductionInst) {}
+ VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
+ VPValue *Cond, unsigned ScaleFactor,
+ Instruction *ReductionInst = nullptr)
+ : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
+ FastMathFlags(), ReductionInst,
+ ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
+ Opcode(Opcode), VFScaleFactor(ScaleFactor) {
+ [[maybe_unused]] auto *AccumulatorRecipe =
+ getChainOp()->getDefiningRecipe();
+ assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
+ isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
+ "Unexpected operand order for partial reduction recipe");
+ }
+ ~VPPartialReductionRecipe() override = default;
+
+ VPPartialReductionRecipe *clone() override {
+ return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
+ getCondOp(), VFScaleFactor,
+ getUnderlyingInstr());
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
+
+ /// Generate the reduction in the loop.
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPPartialReductionRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+ /// Get the binary op's opcode.
+ unsigned getOpcode() const { return Opcode; }
+
+ /// Get the factor that the VF of this recipe's output should be scaled by.
+ unsigned getVFScaleFactor() const { return VFScaleFactor; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
 /// A recipe to represent inloop reduction operations with vector-predication
 /// intrinsics, performing a reduction on a vector operand with the explicit
 /// vector length (EVL) into a scalar value, and adding the result to a chain.
 
@@ -287,12 +287,12 @@ InstructionCost
 VPPartialReductionRecipe::computeCost(ElementCount VF,
  VPCostContext &Ctx) const {
  std::optional<unsigned> Opcode = std::nullopt;
- VPValue *BinOp = getOperand(0);
+ VPValue *BinOp = getOperand(1);
 
  // If the partial reduction is predicated, a select will be operand 0 rather
  // than the binary op
  using namespace llvm::VPlanPatternMatch;
- if (match(getOperand(0), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
+ if (match(getOperand(1), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
  BinOp = BinOp->getDefiningRecipe()->getOperand(1);
 
  // If BinOp is a negation, use the side effect of match to assign the actual
@@ -337,8 +337,8 @@ void VPPartialReductionRecipe::execute(VPTransformState &State) {
  assert(getOpcode() == Instruction::Add &&
  "Unhandled partial reduction opcode");
 
- Value *BinOpVal = State.get(getOperand(0));
- Value *PhiVal = State.get(getOperand(1));
+ Value *BinOpVal = State.get(getOperand(1));
+ Value *PhiVal = State.get(getOperand(0));
  assert(PhiVal && BinOpVal && "Phi and Mul must be set");
 
  Type *RetTy = PhiVal->getType();
 
@@ -33,7 +33,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
 ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
 ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
-; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<%mul>, ir<[[ACC]]>
+; CHECK-NEXT: PARTIAL-REDUCE ir<[[REDUCE]]> = add ir<[[ACC]]>, ir<%mul>
 ; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
 ; CHECK-NEXT: No successors
@@ -98,7 +98,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
 ; CHECK-NEXT: WIDEN ir<%load.b> = load vp<[[PTR_B]]>
 ; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
 ; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
-; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%mul>, ir<%accum>
+; CHECK-NEXT: PARTIAL-REDUCE ir<%add> = add ir<%accum>, ir<%mul>
 ; CHECK-NEXT: EMIT vp<[[EP_IV_NEXT:%.+]]> = add nuw vp<[[EP_IV]]>, ir<16>
 ; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
 ; CHECK-NEXT: No successors