Skip to content

Commit b19ec1e

Browse files
committed
[BPI] Improve unreachable/ColdCall heurstics to handle loops.
Summary: While updatePostDominatedByUnreachable attemps to find basic blocks that are post-domianted by unreachable blocks, it currently cannot handle loops precisely, because it doesn't use the actual post dominator tree analysis but relies on heuristics of visiting basic blocks in post-order. More precisely, when the entire loop is post-dominated by the unreachable block, current algorithm fails to detect the entire loop as post-dominated by the unreachable because when the algorithm reaches to the loop latch it fails to tell all its successors (including the loop header) will "eventually" be post-domianted by the unreachable block, because the algorithm hasn't visited the loop header yet. This makes BPI for the loop latch to assume that loop backedges are taken with 100% of probability. And because of this, block frequency info sometimes marks virtually dead loops (which are post dominated by unreachable blocks) super hot, because 100% backedge-taken probability makes the loop iteration count the max value. updatePostDominatedByColdCall has the exact same problem as well. To address this problem, this patch makes PostDominatedByUnreachable/PostDominatedByColdCall to be computed with the actual post-dominator tree. Reviewers: skatkov, chandlerc, manmanren Reviewed By: skatkov Subscribers: manmanren, vsk, apilipenko, Carrot, qcolombet, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D70104
1 parent b208088 commit b19ec1e

File tree

7 files changed

+205
-143
lines changed

7 files changed

+205
-143
lines changed

llvm/include/llvm/Analysis/BranchProbabilityInfo.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ namespace llvm {
3434
class Function;
3535
class LoopInfo;
3636
class raw_ostream;
37+
class PostDominatorTree;
3738
class TargetLibraryInfo;
3839
class Value;
3940

@@ -187,8 +188,10 @@ class BranchProbabilityInfo {
187188
/// Track the set of blocks that always lead to a cold call.
188189
SmallPtrSet<const BasicBlock *, 16> PostDominatedByColdCall;
189190

190-
void updatePostDominatedByUnreachable(const BasicBlock *BB);
191-
void updatePostDominatedByColdCall(const BasicBlock *BB);
191+
void computePostDominatedByUnreachable(const Function &F,
192+
PostDominatorTree *PDT);
193+
void computePostDominatedByColdCall(const Function &F,
194+
PostDominatorTree *PDT);
192195
bool calcUnreachableHeuristics(const BasicBlock *BB);
193196
bool calcMetadataWeights(const BasicBlock *BB);
194197
bool calcColdCallHeuristics(const BasicBlock *BB);

llvm/lib/Analysis/BranchProbabilityInfo.cpp

Lines changed: 75 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/STLExtras.h"
1717
#include "llvm/ADT/SmallVector.h"
1818
#include "llvm/Analysis/LoopInfo.h"
19+
#include "llvm/Analysis/PostDominators.h"
1920
#include "llvm/Analysis/TargetLibraryInfo.h"
2021
#include "llvm/IR/Attributes.h"
2122
#include "llvm/IR/BasicBlock.h"
@@ -146,69 +147,83 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
146147
/// instruction. This is essentially never taken.
147148
static const uint32_t IH_NONTAKEN_WEIGHT = 1;
148149

149-
/// Add \p BB to PostDominatedByUnreachable set if applicable.
150-
void
151-
BranchProbabilityInfo::updatePostDominatedByUnreachable(const BasicBlock *BB) {
152-
const Instruction *TI = BB->getTerminator();
153-
if (TI->getNumSuccessors() == 0) {
154-
if (isa<UnreachableInst>(TI) ||
155-
// If this block is terminated by a call to
156-
// @llvm.experimental.deoptimize then treat it like an unreachable since
157-
// the @llvm.experimental.deoptimize call is expected to practically
158-
// never execute.
159-
BB->getTerminatingDeoptimizeCall())
160-
PostDominatedByUnreachable.insert(BB);
161-
return;
162-
}
150+
static void UpdatePDTWorklist(const BasicBlock *BB, PostDominatorTree *PDT,
151+
SmallVectorImpl<const BasicBlock *> &WorkList,
152+
SmallPtrSetImpl<const BasicBlock *> &TargetSet) {
153+
SmallVector<BasicBlock *, 8> Descendants;
154+
SmallPtrSet<const BasicBlock *, 16> NewItems;
155+
156+
PDT->getDescendants(const_cast<BasicBlock *>(BB), Descendants);
157+
for (auto *BB : Descendants)
158+
if (TargetSet.insert(BB).second)
159+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
160+
if (!TargetSet.count(*PI))
161+
NewItems.insert(*PI);
162+
WorkList.insert(WorkList.end(), NewItems.begin(), NewItems.end());
163+
}
163164

164-
// If the terminator is an InvokeInst, check only the normal destination block
165-
// as the unwind edge of InvokeInst is also very unlikely taken.
166-
if (auto *II = dyn_cast<InvokeInst>(TI)) {
167-
if (PostDominatedByUnreachable.count(II->getNormalDest()))
168-
PostDominatedByUnreachable.insert(BB);
169-
return;
165+
/// Compute a set of basic blocks that are post-dominated by unreachables.
166+
void BranchProbabilityInfo::computePostDominatedByUnreachable(
167+
const Function &F, PostDominatorTree *PDT) {
168+
SmallVector<const BasicBlock *, 8> WorkList;
169+
for (auto &BB : F) {
170+
const Instruction *TI = BB.getTerminator();
171+
if (TI->getNumSuccessors() == 0) {
172+
if (isa<UnreachableInst>(TI) ||
173+
// If this block is terminated by a call to
174+
// @llvm.experimental.deoptimize then treat it like an unreachable
175+
// since the @llvm.experimental.deoptimize call is expected to
176+
// practically never execute.
177+
BB.getTerminatingDeoptimizeCall())
178+
UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByUnreachable);
179+
}
170180
}
171181

172-
for (auto *I : successors(BB))
173-
// If any of successor is not post dominated then BB is also not.
174-
if (!PostDominatedByUnreachable.count(I))
175-
return;
176-
177-
PostDominatedByUnreachable.insert(BB);
182+
while (!WorkList.empty()) {
183+
const BasicBlock *BB = WorkList.pop_back_val();
184+
if (PostDominatedByUnreachable.count(BB))
185+
continue;
186+
// If the terminator is an InvokeInst, check only the normal destination
187+
// block as the unwind edge of InvokeInst is also very unlikely taken.
188+
if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
189+
if (PostDominatedByUnreachable.count(II->getNormalDest()))
190+
UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
191+
}
192+
// If all the successors are unreachable, BB is unreachable as well.
193+
else if (!successors(BB).empty() &&
194+
llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
195+
return PostDominatedByUnreachable.count(Succ);
196+
}))
197+
UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByUnreachable);
198+
}
178199
}
179200

180-
/// Add \p BB to PostDominatedByColdCall set if applicable.
181-
void
182-
BranchProbabilityInfo::updatePostDominatedByColdCall(const BasicBlock *BB) {
183-
assert(!PostDominatedByColdCall.count(BB));
184-
const Instruction *TI = BB->getTerminator();
185-
if (TI->getNumSuccessors() == 0)
186-
return;
201+
/// compute a set of basic blocks that are post-dominated by ColdCalls.
202+
void BranchProbabilityInfo::computePostDominatedByColdCall(
203+
const Function &F, PostDominatorTree *PDT) {
204+
SmallVector<const BasicBlock *, 8> WorkList;
205+
for (auto &BB : F)
206+
for (auto &I : BB)
207+
if (const CallInst *CI = dyn_cast<CallInst>(&I))
208+
if (CI->hasFnAttr(Attribute::Cold))
209+
UpdatePDTWorklist(&BB, PDT, WorkList, PostDominatedByColdCall);
187210

188-
// If all of successor are post dominated then BB is also done.
189-
if (llvm::all_of(successors(BB), [&](const BasicBlock *SuccBB) {
190-
return PostDominatedByColdCall.count(SuccBB);
191-
})) {
192-
PostDominatedByColdCall.insert(BB);
193-
return;
194-
}
211+
while (!WorkList.empty()) {
212+
const BasicBlock *BB = WorkList.pop_back_val();
195213

196-
// If the terminator is an InvokeInst, check only the normal destination
197-
// block as the unwind edge of InvokeInst is also very unlikely taken.
198-
if (auto *II = dyn_cast<InvokeInst>(TI))
199-
if (PostDominatedByColdCall.count(II->getNormalDest())) {
200-
PostDominatedByColdCall.insert(BB);
201-
return;
214+
// If the terminator is an InvokeInst, check only the normal destination
215+
// block as the unwind edge of InvokeInst is also very unlikely taken.
216+
if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
217+
if (PostDominatedByColdCall.count(II->getNormalDest()))
218+
UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
202219
}
203-
204-
// Otherwise, if the block itself contains a cold function, add it to the
205-
// set of blocks post-dominated by a cold call.
206-
for (auto &I : *BB)
207-
if (const CallInst *CI = dyn_cast<CallInst>(&I))
208-
if (CI->hasFnAttr(Attribute::Cold)) {
209-
PostDominatedByColdCall.insert(BB);
210-
return;
211-
}
220+
// If all of successor are post dominated then BB is also done.
221+
else if (!successors(BB).empty() &&
222+
llvm::all_of(successors(BB), [this](const BasicBlock *Succ) {
223+
return PostDominatedByColdCall.count(Succ);
224+
}))
225+
UpdatePDTWorklist(BB, PDT, WorkList, PostDominatedByColdCall);
226+
}
212227
}
213228

214229
/// Calculate edge weights for successors lead to unreachable.
@@ -983,13 +998,16 @@ void BranchProbabilityInfo::calculate(const Function &F, const LoopInfo &LI,
983998
LLVM_DEBUG(dbgs() << "\n");
984999
}
9851000

1001+
std::unique_ptr<PostDominatorTree> PDT =
1002+
std::make_unique<PostDominatorTree>(const_cast<Function &>(F));
1003+
computePostDominatedByUnreachable(F, PDT.get());
1004+
computePostDominatedByColdCall(F, PDT.get());
1005+
9861006
// Walk the basic blocks in post-order so that we can build up state about
9871007
// the successors of a block iteratively.
9881008
for (auto BB : post_order(&F.getEntryBlock())) {
9891009
LLVM_DEBUG(dbgs() << "Computing probabilities for " << BB->getName()
9901010
<< "\n");
991-
updatePostDominatedByUnreachable(BB);
992-
updatePostDominatedByColdCall(BB);
9931011
// If there is no at least two successors, no sense to set probability.
9941012
if (BB->getTerminator()->getNumSuccessors() < 2)
9951013
continue;

llvm/test/Analysis/BranchProbabilityInfo/basic.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,24 @@ exit:
141141
ret i32 %result
142142
}
143143

144+
define i32 @test_cold_loop(i32 %a, i32 %b) {
145+
entry:
146+
%cond1 = icmp eq i32 %a, 42
147+
br i1 %cond1, label %header, label %exit
148+
149+
header:
150+
br label %body
151+
152+
body:
153+
%cond2 = icmp eq i32 %b, 42
154+
br i1 %cond2, label %header, label %exit
155+
; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
156+
157+
exit:
158+
call void @coldfunc()
159+
ret i32 %b
160+
}
161+
144162
declare i32 @regular_function(i32 %i)
145163

146164
define i32 @test_cold_call_sites_with_prof(i32 %a, i32 %b, i1 %flag, i1 %flag2) {

llvm/test/Analysis/BranchProbabilityInfo/noreturn.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,32 @@ exit:
7979
ret i32 %b
8080
}
8181

82+
define i32 @test4(i32 %a, i32 %b) {
83+
; CHECK: Printing analysis {{.*}} for function 'test4'
84+
; Make sure we handle loops post-dominated by unreachables.
85+
entry:
86+
%cond1 = icmp eq i32 %a, 42
87+
br i1 %cond1, label %header, label %exit
88+
; CHECK: edge entry -> header probability is 0x00000001 / 0x80000000 = 0.00%
89+
; CHECK: edge entry -> exit probability is 0x7fffffff / 0x80000000 = 100.00% [HOT edge]
90+
91+
header:
92+
br label %body
93+
94+
body:
95+
%cond2 = icmp eq i32 %a, 42
96+
br i1 %cond2, label %header, label %abort
97+
; CHECK: edge body -> header probability is 0x40000000 / 0x80000000 = 50.00%
98+
; CHECK: edge body -> abort probability is 0x40000000 / 0x80000000 = 50.00%
99+
100+
abort:
101+
call void @abort() noreturn
102+
unreachable
103+
104+
exit:
105+
ret i32 %b
106+
}
107+
82108
@_ZTIi = external global i8*
83109

84110
; CHECK-LABEL: throwSmallException

llvm/test/CodeGen/X86/block-placement.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,11 @@ define void @unnatural_cfg2(i32* %p0, i32 %a0) {
358358
; CHECK: %loop.header
359359
; CHECK: %loop.body1
360360
; CHECK: %loop.body2
361+
; CHECK: %loop.body3
362+
; CHECK: %loop.inner1.begin
361363
; CHECK: %loop.body4
362364
; CHECK: %loop.inner2.begin
363365
; CHECK: %loop.inner2.begin
364-
; CHECK: %loop.body3
365-
; CHECK: %loop.inner1.begin
366366
; CHECK: %bail
367367

368368
entry:

llvm/test/CodeGen/X86/pr37916.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
define void @fn1() local_unnamed_addr {
88
; CHECK-LABEL: fn1:
99
; CHECK: # %bb.0: # %entry
10-
; CHECK-NEXT: .p2align 4, 0x90
1110
; CHECK-NEXT: .LBB0_1: # %if.end
1211
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
1312
; CHECK-NEXT: movl a+4, %eax

0 commit comments

Comments
 (0)