Skip to content

Commit 0fbb45e

Browse files
committed
[LV] Return getPredBlockCostDivisor in uint64_t
When the probability of a block is extremely low, HeaderFreq / BBFreq may be larger than 32 bits. Previously this got truncated to uint32_t which could cause division by zero exceptions on x86. Widen the return type to uint64_t which should fit the entire range of BlockFrequency values. It's also worth noting that a frequency can never be zero according to BlockFrequency.h, so we shouldn't need to worry about divide by zero in getPredBlockCostDivisor itself.
1 parent 9dc3255 commit 0fbb45e

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,7 +1240,7 @@ class LoopVectorizationCostModel {
12401240
/// Note that if a block wasn't originally predicated but was predicated due
12411241
/// to tail folding, the divisor will still be 1 because it will execute for
12421242
/// every iteration of the loop header.
1243-
inline unsigned
1243+
inline uint64_t
12441244
getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind,
12451245
const BasicBlock *BB);
12461246

@@ -2893,7 +2893,7 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
28932893
}
28942894
}
28952895

2896-
unsigned LoopVectorizationCostModel::getPredBlockCostDivisor(
2896+
uint64_t LoopVectorizationCostModel::getPredBlockCostDivisor(
28972897
TargetTransformInfo::TargetCostKind CostKind, const BasicBlock *BB) {
28982898
if (CostKind == TTI::TCK_CodeSize)
28992899
return 1;

llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,55 @@ exit:
438438
ret void
439439
}
440440

441+
; BFI computes the relative frequency of if.2 to the loop header to be extremely
442+
; low, so the discount in getPredBlockCostDivisor is high enough to not fit in
443+
; uint32_t. Make sure we return uint64_t which fits all possible BlockFrequency
444+
; values.
445+
define void @getPredBlockCostDivisor_truncate(i32 %0) {
446+
; CHECK-LABEL: define void @getPredBlockCostDivisor_truncate(
447+
; CHECK-SAME: i32 [[TMP0:%.*]]) {
448+
; CHECK-NEXT: [[ENTRY:.*]]:
449+
; CHECK-NEXT: br label %[[LOOP:.*]]
450+
; CHECK: [[LOOP]]:
451+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[TMP0]], %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ]
452+
; CHECK-NEXT: [[ISNAN_1:%.*]] = fcmp uno double 0.000000e+00, 0.000000e+00
453+
; CHECK-NEXT: br i1 [[ISNAN_1]], label %[[IF_1:.*]], label %[[LATCH]]
454+
; CHECK: [[IF_1]]:
455+
; CHECK-NEXT: [[ISNAN_2:%.*]] = fcmp uno double 0.000000e+00, 0.000000e+00
456+
; CHECK-NEXT: br i1 [[ISNAN_2]], label %[[IF_2:.*]], label %[[LATCH]]
457+
; CHECK: [[IF_2]]:
458+
; CHECK-NEXT: br label %[[LATCH]]
459+
; CHECK: [[LATCH]]:
460+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
461+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 0
462+
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
463+
; CHECK: [[EXIT]]:
464+
; CHECK-NEXT: ret void
465+
;
466+
entry:
467+
br label %loop
468+
469+
loop:
470+
%iv = phi i32 [ %0, %entry ], [ %iv.next, %latch ]
471+
%isnan.1 = fcmp uno double 0.000000e+00, 0.000000e+00
472+
br i1 %isnan.1, label %if.1, label %latch
473+
474+
if.1:
475+
%isnan.2 = fcmp uno double 0.000000e+00, 0.000000e+00
476+
br i1 %isnan.2, label %if.2, label %latch
477+
478+
if.2:
479+
br label %latch
480+
481+
latch:
482+
%iv.next = add i32 %iv, 1
483+
%ec = icmp eq i32 %iv, 0
484+
br i1 %ec, label %exit, label %loop
485+
486+
exit:
487+
ret void
488+
}
489+
441490
;.
442491
; CHECK: [[META0]] = !{[[META1:![0-9]+]]}
443492
; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}

0 commit comments

Comments
 (0)