@@ -693,14 +693,89 @@ for.end:
693693 ret float %add6
694694}
695695
696- !0 = distinct !{!0 , !4 , !7 , !9 }
697- !1 = distinct !{!1 , !4 , !8 , !9 }
698- !2 = distinct !{!2 , !5 , !7 , !9 }
699- !3 = distinct !{!3 , !6 , !7 , !9 , !10 }
700- !4 = !{!"llvm.loop.vectorize.width" , i32 8 }
701- !5 = !{!"llvm.loop.vectorize.width" , i32 4 }
702- !6 = !{!"llvm.loop.vectorize.width" , i32 2 }
703- !7 = !{!"llvm.loop.interleave.count" , i32 1 }
704- !8 = !{!"llvm.loop.interleave.count" , i32 4 }
705- !9 = !{!"llvm.loop.vectorize.enable" , i1 true }
706- !10 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
696+ ; Test reductions for a VF of 1 and a UF > 1.
697+ define float @fadd_scalar_vf (float * noalias nocapture readonly %a , i64 %n ) {
698+ ; CHECK-ORDERED-LABEL: @fadd_scalar_vf
699+ ; CHECK-ORDERED: vector.body
700+ ; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, {{.*}} ], [ %[[FADD4:.*]], %vector.body ]
701+ ; CHECK-ORDERED: %[[LOAD1:.*]] = load float, float*
702+ ; CHECK-ORDERED: %[[LOAD2:.*]] = load float, float*
703+ ; CHECK-ORDERED: %[[LOAD3:.*]] = load float, float*
704+ ; CHECK-ORDERED: %[[LOAD4:.*]] = load float, float*
705+ ; CHECK-ORDERED: %[[FADD1:.*]] = fadd float %[[VEC_PHI]], %[[LOAD1]]
706+ ; CHECK-ORDERED: %[[FADD2:.*]] = fadd float %[[FADD1]], %[[LOAD2]]
707+ ; CHECK-ORDERED: %[[FADD3:.*]] = fadd float %[[FADD2]], %[[LOAD3]]
708+ ; CHECK-ORDERED: %[[FADD4]] = fadd float %[[FADD3]], %[[LOAD4]]
709+ ; CHECK-ORDERED-NOT: call float @llvm.vector.reduce.fadd
710+ ; CHECK-ORDERED: scalar.ph
711+ ; CHECK-ORDERED: %[[MERGE_RDX:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[FADD4]], %middle.block ]
712+ ; CHECK-ORDERED: for.body
713+ ; CHECK-ORDERED: %[[SUM_PHI:.*]] = phi float [ %[[MERGE_RDX]], %scalar.ph ], [ %[[FADD5:.*]], %for.body ]
714+ ; CHECK-ORDERED: %[[LOAD5:.*]] = load float, float*
715+ ; CHECK-ORDERED: %[[FADD5]] = fadd float %[[LOAD5]], %[[SUM_PHI]]
716+ ; CHECK-ORDERED: for.end
717+ ; CHECK-ORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD5]], %for.body ], [ %[[FADD4]], %middle.block ]
718+ ; CHECK-ORDERED: ret float %[[RES_PHI]]
719+
720+ ; CHECK-UNORDERED-LABEL: @fadd_scalar_vf
721+ ; CHECK-UNORDERED: vector.body
722+ ; CHECK-UNORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[FADD1:.*]], %vector.body ]
723+ ; CHECK-UNORDERED: %[[VEC_PHI2:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD2:.*]], %vector.body ]
724+ ; CHECK-UNORDERED: %[[VEC_PHI3:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD3:.*]], %vector.body ]
725+ ; CHECK-UNORDERED: %[[VEC_PHI4:.*]] = phi float [ -0.000000e+00, %vector.ph ], [ %[[FADD4:.*]], %vector.body ]
726+ ; CHECK-UNORDERED: %[[LOAD1:.*]] = load float, float*
727+ ; CHECK-UNORDERED: %[[LOAD2:.*]] = load float, float*
728+ ; CHECK-UNORDERED: %[[LOAD3:.*]] = load float, float*
729+ ; CHECK-UNORDERED: %[[LOAD4:.*]] = load float, float*
730+ ; CHECK-UNORDERED: %[[FADD1]] = fadd float %[[LOAD1]], %[[VEC_PHI1]]
731+ ; CHECK-UNORDERED: %[[FADD2]] = fadd float %[[LOAD2]], %[[VEC_PHI2]]
732+ ; CHECK-UNORDERED: %[[FADD3]] = fadd float %[[LOAD3]], %[[VEC_PHI3]]
733+ ; CHECK-UNORDERED: %[[FADD4]] = fadd float %[[LOAD4]], %[[VEC_PHI4]]
734+ ; CHECK-UNORDERED-NOT: call float @llvm.vector.reduce.fadd
735+ ; CHECK-UNORDERED: middle.block
736+ ; CHECK-UNORDERED: %[[BIN_RDX1:.*]] = fadd float %[[FADD2]], %[[FADD1]]
737+ ; CHECK-UNORDERED: %[[BIN_RDX2:.*]] = fadd float %[[FADD3]], %[[BIN_RDX1]]
738+ ; CHECK-UNORDERED: %[[BIN_RDX3:.*]] = fadd float %[[FADD4]], %[[BIN_RDX2]]
739+ ; CHECK-UNORDERED: scalar.ph
740+ ; CHECK-UNORDERED: %[[MERGE_RDX:.*]] = phi float [ 0.000000e+00, %entry ], [ %[[BIN_RDX3]], %middle.block ]
741+ ; CHECK-UNORDERED: for.body
742+ ; CHECK-UNORDERED: %[[SUM_PHI:.*]] = phi float [ %[[MERGE_RDX]], %scalar.ph ], [ %[[FADD5:.*]], %for.body ]
743+ ; CHECK-UNORDERED: %[[LOAD5:.*]] = load float, float*
744+ ; CHECK-UNORDERED: %[[FADD5]] = fadd float %[[LOAD5]], %[[SUM_PHI]]
745+ ; CHECK-UNORDERED: for.end
746+ ; CHECK-UNORDERED: %[[RES_PHI:.*]] = phi float [ %[[FADD5]], %for.body ], [ %[[BIN_RDX3]], %middle.block ]
747+ ; CHECK-UNORDERED: ret float %[[RES_PHI]]
748+
749+ ; CHECK-NOT-VECTORIZED-LABEL: @fadd_scalar_vf
750+ ; CHECK-NOT-VECTORIZED-NOT: @vector.body
751+
752+ entry:
753+ br label %for.body
754+
755+ for.body:
756+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
757+ %sum.07 = phi float [ 0 .000000e+00 , %entry ], [ %add , %for.body ]
758+ %arrayidx = getelementptr inbounds float , float * %a , i64 %iv
759+ %0 = load float , float * %arrayidx , align 4
760+ %add = fadd float %0 , %sum.07
761+ %iv.next = add nuw nsw i64 %iv , 1
762+ %exitcond.not = icmp eq i64 %iv.next , %n
763+ br i1 %exitcond.not , label %for.end , label %for.body , !llvm.loop !4
764+
765+ for.end:
766+ ret float %add
767+ }
768+
769+ !0 = distinct !{!0 , !5 , !9 , !11 }
770+ !1 = distinct !{!1 , !5 , !10 , !11 }
771+ !2 = distinct !{!2 , !6 , !9 , !11 }
772+ !3 = distinct !{!3 , !7 , !9 , !11 , !12 }
773+ !4 = distinct !{!4 , !8 , !10 , !11 }
774+ !5 = !{!"llvm.loop.vectorize.width" , i32 8 }
775+ !6 = !{!"llvm.loop.vectorize.width" , i32 4 }
776+ !7 = !{!"llvm.loop.vectorize.width" , i32 2 }
777+ !8 = !{!"llvm.loop.vectorize.width" , i32 1 }
778+ !9 = !{!"llvm.loop.interleave.count" , i32 1 }
779+ !10 = !{!"llvm.loop.interleave.count" , i32 4 }
780+ !11 = !{!"llvm.loop.vectorize.enable" , i1 true }
781+ !12 = !{!"llvm.loop.vectorize.predicate.enable" , i1 true }
0 commit comments