Skip to content

Commit eb17a8d

Browse files
authored
[SCEV] Preserve divisor info when adding guard info for ICMP_NE via Sub. (#163250)
Follow-up to #160500 to preserve divisibiltiy info when creating the UMax. PR: #163250
1 parent d0ed8bc commit eb17a8d

File tree

3 files changed

+115
-37
lines changed

3 files changed

+115
-37
lines changed

llvm/lib/Analysis/ScalarEvolution.cpp

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15473,6 +15473,23 @@ void ScalarEvolution::LoopGuards::collectFromPHI(
1547315473
}
1547415474
}
1547515475

15476+
// Return a new SCEV that modifies \p Expr to the closest number divides by
15477+
// \p Divisor and greater or equal than Expr. For now, only handle constant
15478+
// Expr.
15479+
static const SCEV *getNextSCEVDividesByDivisor(const SCEV *Expr,
15480+
const APInt &DivisorVal,
15481+
ScalarEvolution &SE) {
15482+
const APInt *ExprVal;
15483+
if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
15484+
DivisorVal.isNonPositive())
15485+
return Expr;
15486+
APInt Rem = ExprVal->urem(DivisorVal);
15487+
if (Rem.isZero())
15488+
return Expr;
15489+
// return the SCEV: Expr + Divisor - Expr % Divisor
15490+
return SE.getConstant(*ExprVal + DivisorVal - Rem);
15491+
}
15492+
1547615493
void ScalarEvolution::LoopGuards::collectFromBlock(
1547715494
ScalarEvolution &SE, ScalarEvolution::LoopGuards &Guards,
1547815495
const BasicBlock *Block, const BasicBlock *Pred,
@@ -15540,22 +15557,6 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1554015557
match(LHS, m_scev_APInt(C)) && C->isNonNegative();
1554115558
};
1554215559

15543-
// Return a new SCEV that modifies \p Expr to the closest number divides by
15544-
// \p Divisor and greater or equal than Expr. For now, only handle constant
15545-
// Expr.
15546-
auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr,
15547-
const APInt &DivisorVal) {
15548-
const APInt *ExprVal;
15549-
if (!match(Expr, m_scev_APInt(ExprVal)) || ExprVal->isNegative() ||
15550-
DivisorVal.isNonPositive())
15551-
return Expr;
15552-
APInt Rem = ExprVal->urem(DivisorVal);
15553-
if (Rem.isZero())
15554-
return Expr;
15555-
// return the SCEV: Expr + Divisor - Expr % Divisor
15556-
return SE.getConstant(*ExprVal + DivisorVal - Rem);
15557-
};
15558-
1555915560
// Return a new SCEV that modifies \p Expr to the closest number divides by
1556015561
// \p Divisor and less or equal than Expr. For now, only handle constant
1556115562
// Expr.
@@ -15592,7 +15593,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1559215593
"Expected non-negative operand!");
1559315594
auto *DivisibleExpr =
1559415595
IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, DivisorVal)
15595-
: GetNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal);
15596+
: getNextSCEVDividesByDivisor(MinMaxLHS, DivisorVal, SE);
1559615597
SmallVector<const SCEV *> Ops = {
1559715598
ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
1559815599
return SE.getMinMaxExpr(SCTy, Ops);
@@ -15675,15 +15676,15 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1567515676
case CmpInst::ICMP_UGT:
1567615677
case CmpInst::ICMP_SGT:
1567715678
RHS = SE.getAddExpr(RHS, One);
15678-
RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy);
15679+
RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE);
1567915680
break;
1568015681
case CmpInst::ICMP_ULE:
1568115682
case CmpInst::ICMP_SLE:
1568215683
RHS = GetPreviousSCEVDividesByDivisor(RHS, DividesBy);
1568315684
break;
1568415685
case CmpInst::ICMP_UGE:
1568515686
case CmpInst::ICMP_SGE:
15686-
RHS = GetNextSCEVDividesByDivisor(RHS, DividesBy);
15687+
RHS = getNextSCEVDividesByDivisor(RHS, DividesBy, SE);
1568715688
break;
1568815689
default:
1568915690
break;
@@ -15737,7 +15738,7 @@ void ScalarEvolution::LoopGuards::collectFromBlock(
1573715738
case CmpInst::ICMP_NE:
1573815739
if (match(RHS, m_scev_Zero())) {
1573915740
const SCEV *OneAlignedUp =
15740-
GetNextSCEVDividesByDivisor(One, DividesBy);
15741+
getNextSCEVDividesByDivisor(One, DividesBy, SE);
1574115742
To = SE.getUMaxExpr(FromRewritten, OneAlignedUp);
1574215743
} else {
1574315744
// LHS != RHS can be rewritten as (LHS - RHS) = UMax(1, LHS - RHS),
@@ -15963,8 +15964,11 @@ const SCEV *ScalarEvolution::LoopGuards::rewrite(const SCEV *Expr) const {
1596315964
if (MatchBinarySub(S, LHS, RHS)) {
1596415965
if (LHS > RHS)
1596515966
std::swap(LHS, RHS);
15966-
if (NotEqual.contains({LHS, RHS}))
15967-
return SE.getUMaxExpr(S, SE.getOne(S->getType()));
15967+
if (NotEqual.contains({LHS, RHS})) {
15968+
const SCEV *OneAlignedUp = getNextSCEVDividesByDivisor(
15969+
SE.getOne(S->getType()), SE.getConstantMultiple(S), SE);
15970+
return SE.getUMaxExpr(OneAlignedUp, S);
15971+
}
1596815972
}
1596915973
return nullptr;
1597015974
};

llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,7 @@ define void @ptr_induction_early_exit_eq_1_with_align_on_load(ptr %a, ptr %b, pt
14311431
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
14321432
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
14331433
; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
1434-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951
1434+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950
14351435
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
14361436
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
14371437
; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
@@ -1470,7 +1470,7 @@ define void @ptr_induction_early_exit_eq_1_with_align_on_arguments(ptr align 8 %
14701470
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
14711471
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
14721472
; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8)
1473-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951
1473+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950
14741474
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8)
14751475
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
14761476
; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a to i64)) + (ptrtoint ptr %b to i64)) /u 8)
@@ -1511,7 +1511,7 @@ define void @ptr_induction_early_exit_eq_1_align_assumption_1(ptr %a, ptr %b, pt
15111511
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
15121512
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
15131513
; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
1514-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951
1514+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950
15151515
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
15161516
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
15171517
; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
@@ -1556,7 +1556,7 @@ define void @ptr_induction_early_exit_eq_1_align_assumption_2(ptr %a, ptr %b, pt
15561556
; CHECK-NEXT: Loop %loop: <multiple exits> Unpredictable backedge-taken count.
15571557
; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE***
15581558
; CHECK-NEXT: exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
1559-
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693951
1559+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 2305843009213693950
15601560
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)
15611561
; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE***
15621562
; CHECK-NEXT: symbolic max exit count for loop.inc: ((-8 + (-1 * (ptrtoint ptr %a_ to i64)) + (ptrtoint ptr %b_ to i64)) /u 8)

llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ exit:
387387
ret i64 %res
388388
}
389389

390-
; TODO: The existing assumptions should be strong enough to vectorize this.
390+
; The existing assumptions is strong enough to vectorize this.
391391
define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %first, ptr align 2 %last) nofree nosync {
392392
; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_attribute_argument(
393393
; CHECK-SAME: ptr align 2 [[FIRST:%.*]], ptr align 2 [[LAST:%.*]]) #[[ATTR0]] {
@@ -401,18 +401,55 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi
401401
; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
402402
; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
403403
; CHECK: [[LOOP_HEADER_PREHEADER]]:
404+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
405+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]]
406+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1
407+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
408+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
409+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
410+
; CHECK: [[VECTOR_PH]]:
411+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
412+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
413+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2
414+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]]
415+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
416+
; CHECK: [[VECTOR_BODY]]:
417+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
418+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
419+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
420+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
421+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1)
422+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
423+
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
424+
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
425+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
426+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
427+
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
428+
; CHECK: [[MIDDLE_SPLIT]]:
429+
; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
430+
; CHECK: [[MIDDLE_BLOCK]]:
431+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
432+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
433+
; CHECK: [[VECTOR_EARLY_EXIT]]:
434+
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
435+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
436+
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
437+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
438+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]]
439+
; CHECK: [[SCALAR_PH]]:
440+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
404441
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
405442
; CHECK: [[LOOP_HEADER]]:
406-
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
443+
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
407444
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2
408445
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1
409-
; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH]]
446+
; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]]
410447
; CHECK: [[LOOP_LATCH]]:
411448
; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2
412449
; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]]
413-
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
450+
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
414451
; CHECK: [[EXIT_LOOPEXIT]]:
415-
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ]
452+
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]
416453
; CHECK-NEXT: br label %[[EXIT]]
417454
; CHECK: [[EXIT]]:
418455
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ]
@@ -444,7 +481,7 @@ exit:
444481
ret ptr %first.addr.0.lcssa.i
445482
}
446483

447-
; TODO: The existing assumptions should be strong enough to vectorize this.
484+
; The existing assumptions is strong enough to vectorize this.
448485
define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last) nofree nosync {
449486
; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_assumption(
450487
; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]]) #[[ATTR0]] {
@@ -458,18 +495,55 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last)
458495
; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]]
459496
; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]]
460497
; CHECK: [[LOOP_HEADER_PREHEADER]]:
498+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2
499+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]]
500+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1
501+
; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1
502+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
503+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
504+
; CHECK: [[VECTOR_PH]]:
505+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
506+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
507+
; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2
508+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]]
509+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
510+
; CHECK: [[VECTOR_BODY]]:
511+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
512+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
513+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]]
514+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
515+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1)
516+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
517+
; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]]
518+
; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]])
519+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
520+
; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
521+
; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
522+
; CHECK: [[MIDDLE_SPLIT]]:
523+
; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
524+
; CHECK: [[MIDDLE_BLOCK]]:
525+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
526+
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
527+
; CHECK: [[VECTOR_EARLY_EXIT]]:
528+
; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
529+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
530+
; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2
531+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]]
532+
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]]
533+
; CHECK: [[SCALAR_PH]]:
534+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
461535
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
462536
; CHECK: [[LOOP_HEADER]]:
463-
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ]
537+
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
464538
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2
465539
; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1
466-
; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP_LATCH]]
540+
; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]]
467541
; CHECK: [[LOOP_LATCH]]:
468542
; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2
469543
; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]]
470-
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]]
544+
; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]]
471545
; CHECK: [[EXIT_LOOPEXIT]]:
472-
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ]
546+
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]
473547
; CHECK-NEXT: br label %[[EXIT]]
474548
; CHECK: [[EXIT]]:
475549
; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ]
@@ -522,7 +596,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
522596
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
523597
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024
524598
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
525-
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
599+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
526600
; CHECK: [[MIDDLE_SPLIT]]:
527601
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
528602
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)