@@ -387,7 +387,7 @@ exit:
387387 ret  i64  %res 
388388}
389389
390- ; TODO:  The existing assumptions should be  strong enough to vectorize this. 
390+ ; The existing assumptions is  strong enough to vectorize this. 
391391define  ptr  @find_deref_pointer_distance_align_attribute_argument (ptr  align  2  %first , ptr  align  2  %last ) nofree nosync {
392392; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_attribute_argument( 
393393; CHECK-SAME: ptr align 2 [[FIRST:%.*]], ptr align 2 [[LAST:%.*]]) #[[ATTR0]] { 
@@ -401,18 +401,55 @@ define ptr @find_deref_pointer_distance_align_attribute_argument(ptr align 2 %fi
401401; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] 
402402; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] 
403403; CHECK: [[LOOP_HEADER_PREHEADER]]: 
404+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 
405+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]] 
406+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 
407+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 
408+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 
409+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 
410+ ; CHECK: [[VECTOR_PH]]: 
411+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 
412+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] 
413+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2 
414+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]] 
415+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 
416+ ; CHECK: [[VECTOR_BODY]]: 
417+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 
418+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 
419+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] 
420+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 
421+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1) 
422+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
423+ ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]] 
424+ ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) 
425+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 
426+ ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 
427+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 
428+ ; CHECK: [[MIDDLE_SPLIT]]: 
429+ ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] 
430+ ; CHECK: [[MIDDLE_BLOCK]]: 
431+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 
432+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 
433+ ; CHECK: [[VECTOR_EARLY_EXIT]]: 
434+ ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) 
435+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] 
436+ ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 
437+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] 
438+ ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]] 
439+ ; CHECK: [[SCALAR_PH]]: 
440+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ] 
404441; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] 
405442; CHECK: [[LOOP_HEADER]]: 
406- ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST ]], %[[LOOP_HEADER_PREHEADER ]] ] 
443+ ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ] 
407444; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 
408445; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1 
409- ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.* ]], label %[[LOOP_LATCH]] 
446+ ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]] 
410447; CHECK: [[LOOP_LATCH]]: 
411448; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 
412449; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]] 
413- ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]] 
450+ ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]  
414451; CHECK: [[EXIT_LOOPEXIT]]: 
415- ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] 
452+ ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]  
416453; CHECK-NEXT: br label %[[EXIT]] 
417454; CHECK: [[EXIT]]: 
418455; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ] 
@@ -444,7 +481,7 @@ exit:
444481 ret  ptr  %first.addr.0.lcssa.i 
445482}
446483
447- ; TODO:  The existing assumptions should be  strong enough to vectorize this. 
484+ ; The existing assumptions is  strong enough to vectorize this. 
448485define  ptr  @find_deref_pointer_distance_align_assumption (ptr  %first , ptr  %last ) nofree nosync {
449486; CHECK-LABEL: define ptr @find_deref_pointer_distance_align_assumption( 
450487; CHECK-SAME: ptr [[FIRST:%.*]], ptr [[LAST:%.*]]) #[[ATTR0]] { 
@@ -458,18 +495,55 @@ define ptr @find_deref_pointer_distance_align_assumption(ptr %first, ptr %last)
458495; CHECK-NEXT: [[C_0:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] 
459496; CHECK-NEXT: br i1 [[C_0]], label %[[EXIT:.*]], label %[[LOOP_HEADER_PREHEADER:.*]] 
460497; CHECK: [[LOOP_HEADER_PREHEADER]]: 
498+ ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[LAST_I64]], -2 
499+ ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[FIRST_I64]] 
500+ ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 1 
501+ ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[TMP2]], 1 
502+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 
503+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 
504+ ; CHECK: [[VECTOR_PH]]: 
505+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 
506+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] 
507+ ; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 2 
508+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP4]] 
509+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] 
510+ ; CHECK: [[VECTOR_BODY]]: 
511+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 
512+ ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 
513+ ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[OFFSET_IDX]] 
514+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2 
515+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 1) 
516+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 
517+ ; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP6]] 
518+ ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP7]]) 
519+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 
520+ ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]] 
521+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 
522+ ; CHECK: [[MIDDLE_SPLIT]]: 
523+ ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] 
524+ ; CHECK: [[MIDDLE_BLOCK]]: 
525+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] 
526+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 
527+ ; CHECK: [[VECTOR_EARLY_EXIT]]: 
528+ ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true) 
529+ ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]] 
530+ ; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 2 
531+ ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[FIRST]], i64 [[TMP13]] 
532+ ; CHECK-NEXT: br label %[[EXIT_LOOPEXIT]] 
533+ ; CHECK: [[SCALAR_PH]]: 
534+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[FIRST]], %[[LOOP_HEADER_PREHEADER]] ] 
461535; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] 
462536; CHECK: [[LOOP_HEADER]]: 
463- ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[FIRST ]], %[[LOOP_HEADER_PREHEADER ]] ] 
537+ ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ] 
464538; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[IV]], align 2 
465539; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[L]], 1 
466- ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT:.* ]], label %[[LOOP_LATCH]] 
540+ ; CHECK-NEXT: br i1 [[C_1]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_LATCH]] 
467541; CHECK: [[LOOP_LATCH]]: 
468542; CHECK-NEXT: [[IV_NEXT]] = getelementptr inbounds nuw i8, ptr [[IV]], i64 2 
469543; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[IV_NEXT]], [[LAST]] 
470- ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]] 
544+ ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]]  
471545; CHECK: [[EXIT_LOOPEXIT]]: 
472- ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] 
546+ ; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I_PH:%.*]] = phi ptr [ [[IV_NEXT]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ]  
473547; CHECK-NEXT: br label %[[EXIT]] 
474548; CHECK: [[EXIT]]: 
475549; CHECK-NEXT: [[FIRST_ADDR_0_LCSSA_I:%.*]] = phi ptr [ [[FIRST]], %[[ENTRY]] ], [ [[FIRST_ADDR_0_LCSSA_I_PH]], %[[EXIT_LOOPEXIT]] ] 
@@ -522,7 +596,7 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si
522596; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) 
523597; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 
524598; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]] 
525- ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]] 
599+ ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]] 
526600; CHECK: [[MIDDLE_SPLIT]]: 
527601; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] 
528602; CHECK: [[MIDDLE_BLOCK]]: 
0 commit comments