Skip to content

Commit 8954011

Browse files
authored
[AMDGPU][FixIrreducible][UnifyLoopExits] Support callbr with inline-asm (#149308)
First batch of changes to add support for inline-asm callbr for the AMDGPU backend.
1 parent da709f5 commit 8954011

File tree

19 files changed

+2985
-50
lines changed

19 files changed

+2985
-50
lines changed

llvm/include/llvm/ADT/GenericCycleImpl.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,17 @@ auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(CycleT *A,
561561
return A;
562562
}
563563

564+
/// \brief Find the innermost cycle containing both given blocks.
565+
///
566+
/// \returns the innermost cycle containing both \p A and \p B
567+
/// or nullptr if there is no such cycle.
568+
template <typename ContextT>
569+
auto GenericCycleInfo<ContextT>::getSmallestCommonCycle(BlockT *A,
570+
BlockT *B) const
571+
-> CycleT * {
572+
return getSmallestCommonCycle(getCycle(A), getCycle(B));
573+
}
574+
564575
/// \brief get the depth for the cycle which containing a given block.
565576
///
566577
/// \returns the depth for the innermost cycle containing \p Block or 0 if it is

llvm/include/llvm/ADT/GenericCycleInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,7 @@ template <typename ContextT> class GenericCycleInfo {
298298

299299
CycleT *getCycle(const BlockT *Block) const;
300300
CycleT *getSmallestCommonCycle(CycleT *A, CycleT *B) const;
301+
CycleT *getSmallestCommonCycle(BlockT *A, BlockT *B) const;
301302
unsigned getCycleDepth(const BlockT *Block) const;
302303
CycleT *getTopLevelParentCycle(BlockT *Block);
303304

llvm/include/llvm/Support/GenericLoopInfo.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,17 @@ template <class BlockT, class LoopT> class LoopInfoBase {
615615
return L ? L->getLoopDepth() : 0;
616616
}
617617

618+
/// \brief Find the innermost loop containing both given loops.
619+
///
620+
/// \returns the innermost loop containing both \p A and \p B
621+
/// or nullptr if there is no such loop.
622+
LoopT *getSmallestCommonLoop(LoopT *A, LoopT *B) const;
623+
/// \brief Find the innermost loop containing both given blocks.
624+
///
625+
/// \returns the innermost loop containing both \p A and \p B
626+
/// or nullptr if there is no such loop.
627+
LoopT *getSmallestCommonLoop(BlockT *A, BlockT *B) const;
628+
618629
// True if the block is a loop header node
619630
bool isLoopHeader(const BlockT *BB) const {
620631
const LoopT *L = getLoopFor(BB);

llvm/include/llvm/Support/GenericLoopInfoImpl.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
355355
if (BB == getHeader()) {
356356
assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
357357
} else if (!OutsideLoopPreds.empty()) {
358-
// A non-header loop shouldn't be reachable from outside the loop,
358+
// A non-header loop block shouldn't be reachable from outside the loop,
359359
// though it is permitted if the predecessor is not itself actually
360360
// reachable.
361361
BlockT *EntryBB = &BB->getParent()->front();
@@ -645,6 +645,36 @@ LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {
645645
return PreOrderLoops;
646646
}
647647

648+
template <class BlockT, class LoopT>
649+
LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(LoopT *A,
650+
LoopT *B) const {
651+
if (!A || !B)
652+
return nullptr;
653+
654+
// If lops A and B have different depth replace them with parent loop
655+
// until they have the same depth.
656+
while (A->getLoopDepth() > B->getLoopDepth())
657+
A = A->getParentLoop();
658+
while (B->getLoopDepth() > A->getLoopDepth())
659+
B = B->getParentLoop();
660+
661+
// Loops A and B are at same depth but may be disjoint, replace them with
662+
// parent loops until we find loop that contains both or we run out of
663+
// parent loops.
664+
while (A != B) {
665+
A = A->getParentLoop();
666+
B = B->getParentLoop();
667+
}
668+
669+
return A;
670+
}
671+
672+
template <class BlockT, class LoopT>
673+
LoopT *LoopInfoBase<BlockT, LoopT>::getSmallestCommonLoop(BlockT *A,
674+
BlockT *B) const {
675+
return getSmallestCommonLoop(getLoopFor(A), getLoopFor(B));
676+
}
677+
648678
// Debugging
649679
template <class BlockT, class LoopT>
650680
void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {

llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/ArrayRef.h"
2020
#include "llvm/ADT/SetVector.h"
2121
#include "llvm/IR/BasicBlock.h"
22+
#include "llvm/IR/CycleInfo.h"
2223
#include "llvm/IR/Dominators.h"
2324
#include "llvm/Support/Compiler.h"
2425
#include "llvm/Support/Printable.h"
@@ -262,6 +263,34 @@ LLVM_ABI BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To,
262263
MemorySSAUpdater *MSSAU = nullptr,
263264
const Twine &BBName = "");
264265

266+
/// \brief Create a new intermediate target block for a callbr edge.
267+
///
268+
/// Create a new basic block between a callbr instruction and one of its
269+
/// successors. The new block replaces the original successor in the callbr
270+
/// instruction and unconditionally branches to the original successor. This
271+
/// is useful for normalizing control flow, e.g., when transforming
272+
/// irreducible loops.
273+
///
274+
/// \param CallBrBlock block containing the callbr instruction
275+
/// \param Succ original successor block
276+
/// \param SuccIdx index of the original successor in the callbr
277+
/// instruction
278+
/// \param DTU optional \p DomTreeUpdater for updating the
279+
/// dominator tree
280+
/// \param CI optional \p CycleInfo for updating cycle membership
281+
/// \param LI optional \p LoopInfo for updating loop membership
282+
/// \param UpdatedLI optional output flag indicating if \p LoopInfo has
283+
/// been updated
284+
///
285+
/// \returns newly created intermediate target block
286+
///
287+
/// \note This function updates PHI nodes, dominator tree, loop info, and
288+
/// cycle info as needed.
289+
LLVM_ABI BasicBlock *
290+
SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ, unsigned SuccIdx,
291+
DomTreeUpdater *DTU = nullptr, CycleInfo *CI = nullptr,
292+
LoopInfo *LI = nullptr, bool *UpdatedLI = nullptr);
293+
265294
/// Sets the unwind edge of an instruction to a particular successor.
266295
LLVM_ABI void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ);
267296

llvm/include/llvm/Transforms/Utils/ControlFlowUtils.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,13 @@
1515

1616
#include "llvm/ADT/SmallVector.h"
1717
#include "llvm/ADT/StringRef.h"
18+
#include "llvm/IR/CycleInfo.h"
1819

1920
namespace llvm {
2021

2122
class BasicBlock;
23+
class CallBrInst;
24+
class LoopInfo;
2225
class DomTreeUpdater;
2326

2427
/// Given a set of branch descriptors [BB, Succ0, Succ1], create a "hub" such
@@ -104,7 +107,8 @@ struct ControlFlowHub {
104107
: BB(BB), Succ0(Succ0), Succ1(Succ1) {}
105108
};
106109

107-
void addBranch(BasicBlock *BB, BasicBlock *Succ0, BasicBlock *Succ1) {
110+
void addBranch(BasicBlock *BB, BasicBlock *Succ0,
111+
BasicBlock *Succ1 = nullptr) {
108112
assert(BB);
109113
assert(Succ0 || Succ1);
110114
Branches.emplace_back(BB, Succ0, Succ1);

llvm/lib/Transforms/Utils/BasicBlockUtils.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,79 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
674674
return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName);
675675
}
676676

677+
/// Helper function to update the cycle or loop information after inserting a
678+
/// new block between a callbr instruction and one of its target blocks. Adds
679+
/// the new block to the innermost cycle or loop that the callbr instruction and
680+
/// the original target block share.
681+
/// \p LCI cycle or loop information to update
682+
/// \p CallBrBlock block containing the callbr instruction
683+
/// \p CallBrTarget new target block of the callbr instruction
684+
/// \p Succ original target block of the callbr instruction
685+
template <typename TI, typename T>
686+
static bool updateCycleLoopInfo(TI *LCI, BasicBlock *CallBrBlock,
687+
BasicBlock *CallBrTarget, BasicBlock *Succ) {
688+
static_assert(std::is_same_v<TI, CycleInfo> || std::is_same_v<TI, LoopInfo>,
689+
"type must be CycleInfo or LoopInfo");
690+
if (!LCI)
691+
return false;
692+
693+
T *LC;
694+
if constexpr (std::is_same_v<TI, CycleInfo>)
695+
LC = LCI->getSmallestCommonCycle(CallBrBlock, Succ);
696+
else
697+
LC = LCI->getSmallestCommonLoop(CallBrBlock, Succ);
698+
if (!LC)
699+
return false;
700+
701+
if constexpr (std::is_same_v<TI, CycleInfo>)
702+
LCI->addBlockToCycle(CallBrTarget, LC);
703+
else
704+
LC->addBasicBlockToLoop(CallBrTarget, *LCI);
705+
706+
return true;
707+
}
708+
709+
BasicBlock *llvm::SplitCallBrEdge(BasicBlock *CallBrBlock, BasicBlock *Succ,
710+
unsigned SuccIdx, DomTreeUpdater *DTU,
711+
CycleInfo *CI, LoopInfo *LI,
712+
bool *UpdatedLI) {
713+
CallBrInst *CallBr = dyn_cast<CallBrInst>(CallBrBlock->getTerminator());
714+
assert(CallBr && "expected callbr terminator");
715+
assert(SuccIdx < CallBr->getNumSuccessors() &&
716+
Succ == CallBr->getSuccessor(SuccIdx) && "invalid successor index");
717+
718+
// Create a new block between callbr and the specified successor.
719+
// splitBlockBefore cannot be re-used here since it cannot split if the split
720+
// point is a PHI node (because BasicBlock::splitBasicBlockBefore cannot
721+
// handle that). But we don't need to rewire every part of a potential PHI
722+
// node. We only care about the edge between CallBrBlock and the original
723+
// successor.
724+
BasicBlock *CallBrTarget =
725+
BasicBlock::Create(CallBrBlock->getContext(),
726+
CallBrBlock->getName() + ".target." + Succ->getName(),
727+
CallBrBlock->getParent());
728+
// Rewire control flow from the new target block to the original successor.
729+
Succ->replacePhiUsesWith(CallBrBlock, CallBrTarget);
730+
// Rewire control flow from callbr to the new target block.
731+
CallBr->setSuccessor(SuccIdx, CallBrTarget);
732+
// Jump from the new target block to the original successor.
733+
BranchInst::Create(Succ, CallBrTarget);
734+
735+
bool Updated =
736+
updateCycleLoopInfo<LoopInfo, Loop>(LI, CallBrBlock, CallBrTarget, Succ);
737+
if (UpdatedLI)
738+
*UpdatedLI = Updated;
739+
updateCycleLoopInfo<CycleInfo, Cycle>(CI, CallBrBlock, CallBrTarget, Succ);
740+
if (DTU) {
741+
DTU->applyUpdates({{DominatorTree::Insert, CallBrBlock, CallBrTarget}});
742+
if (DTU->getDomTree().dominates(CallBrBlock, Succ))
743+
DTU->applyUpdates({{DominatorTree::Delete, CallBrBlock, Succ},
744+
{DominatorTree::Insert, CallBrTarget, Succ}});
745+
}
746+
747+
return CallBrTarget;
748+
}
749+
677750
void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
678751
if (auto *II = dyn_cast<InvokeInst>(TI))
679752
II->setUnwindDest(Succ);

llvm/lib/Transforms/Utils/ControlFlowUtils.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "llvm/Transforms/Utils/ControlFlowUtils.h"
1414
#include "llvm/ADT/SetVector.h"
1515
#include "llvm/Analysis/DomTreeUpdater.h"
16+
#include "llvm/Analysis/LoopInfo.h"
1617
#include "llvm/IR/Constants.h"
1718
#include "llvm/IR/Instructions.h"
1819
#include "llvm/IR/ValueHandle.h"
@@ -281,7 +282,9 @@ std::pair<BasicBlock *, bool> ControlFlowHub::finalize(
281282

282283
for (auto [BB, Succ0, Succ1] : Branches) {
283284
#ifndef NDEBUG
284-
assert(Incoming.insert(BB).second && "Duplicate entry for incoming block.");
285+
assert(
286+
(Incoming.insert(BB).second || isa<CallBrInst>(BB->getTerminator())) &&
287+
"Duplicate entry for incoming block.");
285288
#endif
286289
if (Succ0)
287290
Outgoing.insert(Succ0);

0 commit comments

Comments
 (0)