Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7405,6 +7405,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
// Expand BranchOnMultiCond after dissolution, when latch has direct access
// to its successors.
VPlanTransforms::expandBranchOnMultiCond(BestVPlan);
// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -592,25 +592,26 @@ static bool hasConditionalTerminator(const VPBasicBlock *VPBB) {
}

const VPRecipeBase *R = &VPBB->back();
bool IsSwitch = isa<VPInstruction>(R) &&
cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
bool IsCondBranch =
[[maybe_unused]] bool IsSwitch =
isa<VPInstruction>(R) &&
cast<VPInstruction>(R)->getOpcode() == Instruction::Switch;
[[maybe_unused]] bool IsBranchOnMultiCond = match(R, m_BranchOnMultiCond());
[[maybe_unused]] bool IsCondBranch =
isa<VPBranchOnMaskRecipe>(R) ||
match(R, m_CombineOr(m_BranchOnCond(), m_BranchOnCount()));
(void)IsCondBranch;
(void)IsSwitch;
if (VPBB->getNumSuccessors() == 2 ||
(VPBB->isExiting() && !VPBB->getParent()->isReplicator())) {
assert((IsCondBranch || IsSwitch) &&
assert((IsCondBranch || IsSwitch || IsBranchOnMultiCond) &&
"block with multiple successors not terminated by "
"conditional branch nor switch recipe");

return true;
}

if (VPBB->getNumSuccessors() > 2) {
assert(IsSwitch && "block with more than 2 successors not terminated by "
"a switch recipe");
assert((IsSwitch || IsBranchOnMultiCond) &&
"block with more than 2 successors not terminated by "
"a switch or branch-on-multi-cond recipe");
return true;
}

Expand Down
17 changes: 10 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,12 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
// lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
BranchOnCount,
BranchOnCond,
// Multi-way branch with N boolean condition operands and N+1 successors.
// If condition i is true, branches to successor i; otherwise falls through
// to check the next condition. If all conditions are false, branches to
// the final successor. Expanded to a chain of BranchOnCond after region
// dissolution.
BranchOnMultiCond,
Broadcast,
/// Given operands of (the same) struct type, creates a struct of fixed-
/// width vectors each containing a struct field of all operands. The
Expand Down Expand Up @@ -1212,6 +1218,7 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
case Instruction::Fence:
case Instruction::AtomicRMW:
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::BranchOnCount:
return false;
default:
Expand Down Expand Up @@ -4405,7 +4412,7 @@ class VPlan {
/// latch. If there is an early exit from the vector loop, the middle block
/// conceptully has the early exit block as third successor, split accross 2
/// VPBBs. In that case, the second VPBB selects whether to execute the scalar
/// tail loop or the exit bock. If the scalar tail loop or exit block are
/// tail loop or the exit block. If the scalar tail loop or exit block are
/// known to always execute, the middle block may branch directly to that
/// block. This function cannot be called once the vector loop region has been
/// removed.
Expand All @@ -4414,12 +4421,8 @@ class VPlan {
assert(
LoopRegion &&
"cannot call the function after vector loop region has been removed");
auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
if (RegionSucc->getSingleSuccessor() ||
is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
return RegionSucc;
// There is an early exit. The successor of RegionSucc is the middle block.
return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
// The middle block is always the last successor of the region.
return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
}

const VPBasicBlock *getMiddleBlock() const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
// Return the type based on first operand.
return inferScalarType(R->getOperand(0));
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::BranchOnCount:
return Type::getVoidTy(Ctx);
default:
Expand Down
15 changes: 8 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,16 +428,17 @@ static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {

VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
VPBlockBase *LatchExitVPB = LatchVPBB->getSingleSuccessor();
assert(LatchExitVPB && "Latch expected to be left with a single successor");

// Create an empty region first and insert it between PreheaderVPBB and
// LatchExitVPB, taking care to preserve the original predecessor & successor
// order of blocks. Set region entry and exiting after both HeaderVPB and
// LatchVPBB have been disconnected from their predecessors/successors.
// the exit blocks, taking care to preserve the original predecessor &
// successor order of blocks. Set region entry and exiting after both
// HeaderVPB and LatchVPBB have been disconnected from their
// predecessors/successors.
auto *R = Plan.createLoopRegion();
VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, R);
VPBlockUtils::disconnectBlocks(LatchVPBB, R);

// Transfer latch's successors to the region.
VPBlockUtils::transferSuccessors(LatchVPBB, R);

VPBlockUtils::connectBlocks(PreheaderVPBB, R);
R->setEntry(HeaderVPB);
R->setExiting(LatchVPBB);
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,17 @@ m_BranchOnCond(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::BranchOnCond>(Op0);
}

inline VPInstruction_match<VPInstruction::BranchOnMultiCond>
m_BranchOnMultiCond() {
return m_VPInstruction<VPInstruction::BranchOnMultiCond>();
}

template <typename Op0_t, typename Op1_t>
inline VPInstruction_match<VPInstruction::BranchOnMultiCond, Op0_t, Op1_t>
m_BranchOnMultiCond(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<VPInstruction::BranchOnMultiCond>(Op0, Op1);
}

template <typename Op0_t>
inline VPInstruction_match<VPInstruction::Broadcast, Op0_t>
m_Broadcast(const Op0_t &Op0) {
Expand Down
12 changes: 11 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
auto *VPI = cast<VPInstruction>(this);
return mayWriteToMemory() ||
VPI->getOpcode() == VPInstruction::BranchOnCount ||
VPI->getOpcode() == VPInstruction::BranchOnCond;
VPI->getOpcode() == VPInstruction::BranchOnCond ||
VPI->getOpcode() == VPInstruction::BranchOnMultiCond;
}
case VPWidenCallSC: {
Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();
Expand Down Expand Up @@ -471,6 +472,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case Instruction::PHI:
case Instruction::Switch:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::FirstActiveLane:
case VPInstruction::LastActiveLane:
case VPInstruction::SLPLoad:
Expand All @@ -497,6 +499,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
case Instruction::PHI:
case Instruction::Select:
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::BranchOnCount:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
Expand Down Expand Up @@ -654,6 +657,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
applyMetadata(*Br);
return Br;
}
case VPInstruction::BranchOnMultiCond:
llvm_unreachable("BranchOnMultiCond should be expanded before execution");
case VPInstruction::Broadcast: {
return Builder.CreateVectorSplat(
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
Expand Down Expand Up @@ -1167,6 +1172,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case Instruction::PHI:
case VPInstruction::AnyOf:
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::BranchOnCount:
case VPInstruction::Broadcast:
case VPInstruction::BuildStructVector:
Expand Down Expand Up @@ -1260,6 +1266,7 @@ bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {
return vputils::onlyFirstPartUsed(this);
case VPInstruction::BranchOnCount:
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnMultiCond:
case VPInstruction::CanonicalIVIncrementForPart:
return true;
};
Expand Down Expand Up @@ -1303,6 +1310,9 @@ void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
case VPInstruction::BranchOnMultiCond:
O << "branch-on-multi-cond";
break;
case VPInstruction::CalculateTripCountMinusVF:
O << "TC > VF ? TC - VF : 0";
break;
Expand Down
99 changes: 78 additions & 21 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3694,6 +3694,60 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
R->dissolveToCFGLoop();
}

void VPlanTransforms::expandBranchOnMultiCond(VPlan &Plan) {
// Expand BranchOnMultiCond instructions into explicit CFG with branching.
// This must run after dissolveLoopRegions, when successors have been
// restored.
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
vp_depth_first_shallow(Plan.getEntry()))) {
if (VPBB->empty())
continue;
if (match(&VPBB->back(), m_BranchOnMultiCond()))
WorkList.push_back(VPBB);
}

for (VPBasicBlock *VPBB : WorkList) {
auto *BMC = cast<VPInstruction>(&VPBB->back());

unsigned NumConds = BMC->getNumOperands();
assert(NumConds > 0 &&
"BranchOnMultiCond must have at least one condition");
DebugLoc DL = BMC->getDebugLoc();
const auto Successors = to_vector(VPBB->getSuccessors());
assert(Successors.size() == NumConds + 1 &&
"BranchOnMultiCond must have N operands and N+1 successors");

// Disconnect all successors.
for (VPBlockBase *Succ : Successors)
VPBlockUtils::disconnectBlocks(VPBB, Succ);

// Create chain of conditional branches. Each condition branches to its
// corresponding successor on true, and to the next condition block (or the
// final successor) on false.
VPBasicBlock *CurrentBlock = VPBB;
for (const auto &[I, Cond] : enumerate(BMC->operands())) {
VPBlockBase *TrueSucc = Successors[I];
VPBlockBase *FalseSucc = Successors[NumConds];
if (I + 1 != NumConds) {
auto *FalseBlock = Plan.createVPBasicBlock(
(Twine(VPBB->getName()) + ".multi.cond." + Twine(I + 1)).str());
FalseBlock->setParent(VPBB->getParent());
FalseSucc = FalseBlock;
}

VPBuilder(CurrentBlock)
.createNaryOp(VPInstruction::BranchOnCond, {Cond}, DL);
VPBlockUtils::connectBlocks(CurrentBlock, TrueSucc);
VPBlockUtils::connectBlocks(CurrentBlock, FalseSucc);

CurrentBlock = cast<VPBasicBlock>(FalseSucc);
}

BMC->eraseFromParent();
}
}

void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
VPTypeAnalysis TypeInfo(Plan);
SmallVector<VPRecipeBase *> ToRemove;
Expand Down Expand Up @@ -3846,36 +3900,36 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
? CondOfEarlyExitingVPBB
: Builder.createNot(CondOfEarlyExitingVPBB);

// Split the middle block and have it conditionally branch to the early exit
// block if CondToEarlyExit.
// Create a BranchOnMultiCond in the latch that branches to:
// [0] vector.early.exit, [1] middle block, [2] header (continue looping).
VPValue *IsEarlyExitTaken =
Builder.createNaryOp(VPInstruction::AnyOf, {CondToEarlyExit});
VPBasicBlock *NewMiddle = Plan.createVPBasicBlock("middle.split");
VPBasicBlock *VectorEarlyExitVPBB =
Plan.createVPBasicBlock("vector.early.exit");
VPBlockUtils::insertOnEdge(LatchVPBB, MiddleVPBB, NewMiddle);
VPBlockUtils::connectBlocks(NewMiddle, VectorEarlyExitVPBB);
NewMiddle->swapSuccessors();
VectorEarlyExitVPBB->setParent(LatchVPBB->getParent());

// Update PHI operands: copy from EarlyExitingVPBB to VectorEarlyExitVPBB.
unsigned PredIdx = EarlyExitVPBB->getIndexForPredecessor(EarlyExitingVPBB);
VPBlockUtils::connectBlocks(VectorEarlyExitVPBB, EarlyExitVPBB);

// Update the exit phis in the early exit block.
VPBuilder MiddleBuilder(NewMiddle);
VPBuilder EarlyExitB(VectorEarlyExitVPBB);
VPBuilder MiddleBuilder(cast<VPBasicBlock>(MiddleVPBB));
for (VPRecipeBase &R : EarlyExitVPBB->phis()) {
auto *ExitIRI = cast<VPIRPhi>(&R);
// Early exit operand should always be last, i.e., 0 if EarlyExitVPBB has
// a single predecessor and 1 if it has two.

// Move operand from EarlyExitingVPBB to VectorEarlyExitVPBB.
VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(PredIdx);
ExitIRI->addOperand(IncomingFromEarlyExit);
ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);

// Early exit operand is now at the last position.
unsigned EarlyExitIdx = ExitIRI->getNumOperands() - 1;
if (ExitIRI->getNumOperands() != 1) {
// The first of two operands corresponds to the latch exit, via MiddleVPBB
// predecessor. Extract its final lane.
// The first operand corresponds to the latch exit via MiddleVPBB.
ExitIRI->extractLastLaneOfLastPartOfFirstOperand(MiddleBuilder);
}

VPValue *IncomingFromEarlyExit = ExitIRI->getOperand(EarlyExitIdx);
if (!IncomingFromEarlyExit->isLiveIn()) {
// Update the incoming value from the early exit.
VPValue *FirstActiveLane = EarlyExitB.createNaryOp(
VPInstruction::FirstActiveLane, {CondToEarlyExit},
DebugLoc::getUnknown(), "first.active.lane");
Expand All @@ -3885,22 +3939,25 @@ void VPlanTransforms::handleUncountableEarlyExit(VPBasicBlock *EarlyExitingVPBB,
ExitIRI->setOperand(EarlyExitIdx, IncomingFromEarlyExit);
}
}
MiddleBuilder.createNaryOp(VPInstruction::BranchOnCond, {IsEarlyExitTaken});

// Replace the condition controlling the non-early exit from the vector loop
// with one exiting if either the original condition of the vector latch is
// true or the early exit has been taken.
// Get the old latch terminator to extract the trip count condition.
auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator());
// Skip single-iteration loop region
assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount &&
"Unexpected terminator");
auto *IsLatchExitTaken =
Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
LatchExitingBranch->getOperand(1));
auto *AnyExitTaken = Builder.createNaryOp(
Instruction::Or, {IsEarlyExitTaken, IsLatchExitTaken});
Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);

DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
LatchExitingBranch->eraseFromParent();

Builder.setInsertPoint(LatchVPBB);
Builder.createNaryOp(VPInstruction::BranchOnMultiCond,
{IsEarlyExitTaken, IsLatchExitTaken}, LatchDL);
LatchVPBB->clearSuccessors();
LatchVPBB->setSuccessors({VectorEarlyExitVPBB, MiddleVPBB, HeaderVPBB});
VectorEarlyExitVPBB->setPredecessors({LatchVPBB});
}

/// This function tries convert extended in-loop reductions to
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,10 @@ struct VPlanTransforms {
/// Replace loop regions with explicit CFG.
static void dissolveLoopRegions(VPlan &Plan);

/// Expand BranchOnMultiCond instructions into explicit CFG with
/// BranchOnCond instructions. Should be called after dissolveLoopRegions.
static void expandBranchOnMultiCond(VPlan &Plan);

/// Transform EVL loops to use variable-length stepping after region
/// dissolution.
///
Expand Down
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,11 +317,10 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
// EMIT vp<%5> = any-of vp<%4>
// EMIT vp<%6> = add vp<%2>, vp<%0>
// EMIT vp<%7> = icmp eq vp<%6>, ir<64>
// EMIT vp<%8> = or vp<%5>, vp<%7>
// EMIT branch-on-cond vp<%8>
// EMIT branch-on-muli-cond vp<%5>, vp<%8>
// No successors
// }
// Successor(s): middle.block
// Successor(s): early.exit, middle.block
//
// middle.block:
// Successor(s): preheader
Expand All @@ -334,8 +333,8 @@ vputils::getRecipesForUncountableExit(VPlan &Plan,
auto *Region = Plan.getVectorLoopRegion();
VPValue *UncountableCondition = nullptr;
if (!match(Region->getExitingBasicBlock()->getTerminator(),
m_BranchOnCond(m_OneUse(m_c_BinaryOr(
m_AnyOf(m_VPValue(UncountableCondition)), m_VPValue())))))
m_BranchOnMultiCond(m_AnyOf(m_VPValue(UncountableCondition)),
m_VPValue())))
return std::nullopt;

SmallVector<VPValue *, 4> Worklist;
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Transforms/Vectorize/VPlanUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,7 @@ class VPBlockUtils {
unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
assert((From->getParent() == To->getParent()) &&
"Can't connect two block with different parents");
assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
"Blocks can't have more than two successors.");

if (SuccIdx == -1u)
From->appendSuccessor(To);
else
Expand Down
Loading