@@ -5,32 +5,30 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
55; CHECK-LABEL: lower_trunc_16xi8: 
66; CHECK: // %bb.0: 
77; CHECK-NEXT: fmov s0, w0 
8- ; CHECK-NEXT: ldr h1, [sp] 
8+ ; CHECK-NEXT: mov x8, sp 
9+ ; CHECK-NEXT: mov v0.b[1], w1 
10+ ; CHECK-NEXT: mov v0.b[2], w2 
11+ ; CHECK-NEXT: mov v0.b[3], w3 
12+ ; CHECK-NEXT: mov v0.b[4], w4 
13+ ; CHECK-NEXT: mov v0.b[5], w5 
14+ ; CHECK-NEXT: mov v0.b[6], w6 
15+ ; CHECK-NEXT: mov v0.b[7], w7 
16+ ; CHECK-NEXT: ld1 { v0.b }[8], [x8] 
917; CHECK-NEXT: add x8, sp, #8 
10- ; CHECK-NEXT: ld1 { v1.h  }[1 ], [x8] 
18+ ; CHECK-NEXT: ld1 { v0.b  }[9 ], [x8] 
1119; CHECK-NEXT: add x8, sp, #16 
12- ; CHECK-NEXT: mov v0.h[1], w1 
13- ; CHECK-NEXT: ld1 { v1.h }[2], [x8] 
20+ ; CHECK-NEXT: ld1 { v0.b }[10], [x8] 
1421; CHECK-NEXT: add x8, sp, #24 
15- ; CHECK-NEXT: mov v0.h[2], w2 
16- ; CHECK-NEXT: ld1 { v1.h }[3], [x8] 
22+ ; CHECK-NEXT: ld1 { v0.b }[11], [x8] 
1723; CHECK-NEXT: add x8, sp, #32 
18- ; CHECK-NEXT: mov v0.h[3], w3 
19- ; CHECK-NEXT: ld1 { v1.h }[4], [x8] 
24+ ; CHECK-NEXT: ld1 { v0.b }[12], [x8] 
2025; CHECK-NEXT: add x8, sp, #40 
21- ; CHECK-NEXT: ld1 { v1.h  }[5 ], [x8] 
26+ ; CHECK-NEXT: ld1 { v0.b  }[13 ], [x8] 
2227; CHECK-NEXT: add x8, sp, #48 
23- ; CHECK-NEXT: mov v0.h[4], w4 
24- ; CHECK-NEXT: ld1 { v1.h }[6], [x8] 
28+ ; CHECK-NEXT: ld1 { v0.b }[14], [x8] 
2529; CHECK-NEXT: add x8, sp, #56 
26- ; CHECK-NEXT: mov v0.h[5], w5 
27- ; CHECK-NEXT: ld1 { v1.h }[7], [x8] 
28- ; CHECK-NEXT: mov v0.h[6], w6 
29- ; CHECK-NEXT: add v2.8h, v1.8h, v1.8h 
30- ; CHECK-NEXT: mov v0.h[7], w7 
31- ; CHECK-NEXT: add v3.8h, v0.8h, v0.8h 
32- ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b 
33- ; CHECK-NEXT: uzp1 v1.16b, v3.16b, v2.16b 
30+ ; CHECK-NEXT: ld1 { v0.b }[15], [x8] 
31+ ; CHECK-NEXT: add v1.16b, v0.16b, v0.16b 
3432; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 
3533; CHECK-NEXT: ret 
3634 %a1  = insertelement  <16  x i16 > poison, i16  %a , i16  0 
@@ -59,18 +57,15 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
5957define  <8  x i16 > @lower_trunc_8xi16 (i32  %a , i32  %b , i32  %c , i32  %d , i32  %e , i32  %f , i32  %g , i32  %h ) {
6058; CHECK-LABEL: lower_trunc_8xi16: 
6159; CHECK: // %bb.0: 
62- ; CHECK-NEXT: fmov s0, w4 
63- ; CHECK-NEXT: fmov s1, w0 
64- ; CHECK-NEXT: mov v0.s[1], w5 
65- ; CHECK-NEXT: mov v1.s[1], w1 
66- ; CHECK-NEXT: mov v0.s[2], w6 
67- ; CHECK-NEXT: mov v1.s[2], w2 
68- ; CHECK-NEXT: mov v0.s[3], w7 
69- ; CHECK-NEXT: mov v1.s[3], w3 
70- ; CHECK-NEXT: add v2.4s, v0.4s, v0.4s 
71- ; CHECK-NEXT: add v3.4s, v1.4s, v1.4s 
72- ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h 
73- ; CHECK-NEXT: uzp1 v1.8h, v3.8h, v2.8h 
60+ ; CHECK-NEXT: fmov s0, w0 
61+ ; CHECK-NEXT: mov v0.h[1], w1 
62+ ; CHECK-NEXT: mov v0.h[2], w2 
63+ ; CHECK-NEXT: mov v0.h[3], w3 
64+ ; CHECK-NEXT: mov v0.h[4], w4 
65+ ; CHECK-NEXT: mov v0.h[5], w5 
66+ ; CHECK-NEXT: mov v0.h[6], w6 
67+ ; CHECK-NEXT: mov v0.h[7], w7 
68+ ; CHECK-NEXT: add v1.8h, v0.8h, v0.8h 
7469; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 
7570; CHECK-NEXT: ret 
7671 %a1  = insertelement  <8  x i32 > poison, i32  %a , i32  0 
@@ -91,14 +86,11 @@ define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32
9186define  <4  x i32 > @lower_trunc_4xi32 (i64  %a , i64  %b , i64  %c , i64  %d ) {
9287; CHECK-LABEL: lower_trunc_4xi32: 
9388; CHECK: // %bb.0: 
94- ; CHECK-NEXT: fmov d0, x2 
95- ; CHECK-NEXT: fmov d1, x0 
96- ; CHECK-NEXT: mov v0.d[1], x3 
97- ; CHECK-NEXT: mov v1.d[1], x1 
98- ; CHECK-NEXT: add v2.2d, v0.2d, v0.2d 
99- ; CHECK-NEXT: add v3.2d, v1.2d, v1.2d 
100- ; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s 
101- ; CHECK-NEXT: uzp1 v1.4s, v3.4s, v2.4s 
89+ ; CHECK-NEXT: fmov s0, w0 
90+ ; CHECK-NEXT: mov v0.s[1], w1 
91+ ; CHECK-NEXT: mov v0.s[2], w2 
92+ ; CHECK-NEXT: mov v0.s[3], w3 
93+ ; CHECK-NEXT: add v1.4s, v0.4s, v0.4s 
10294; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 
10395; CHECK-NEXT: ret 
10496 %a1  = insertelement  <4  x i64 > poison, i64  %a , i64  0 
@@ -115,24 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
115107define  <8  x i32 > @lower_trunc_8xi32 (i64  %a , i64  %b , i64  %c , i64  %d , i64  %e , i64  %f , i64  %g , i64  %h ) {
116108; CHECK-LABEL: lower_trunc_8xi32: 
117109; CHECK: // %bb.0: 
118- ; CHECK-NEXT: fmov d0, x2 
119- ; CHECK-NEXT: fmov d1, x0 
120- ; CHECK-NEXT: fmov d2, x6 
121- ; CHECK-NEXT: fmov d3, x4 
122- ; CHECK-NEXT: mov v0.d[1], x3 
123- ; CHECK-NEXT: mov v1.d[1], x1 
124- ; CHECK-NEXT: mov v2.d[1], x7 
125- ; CHECK-NEXT: mov v3.d[1], x5 
126- ; CHECK-NEXT: add v4.2d, v0.2d, v0.2d 
127- ; CHECK-NEXT: add v5.2d, v1.2d, v1.2d 
128- ; CHECK-NEXT: add v6.2d, v2.2d, v2.2d 
129- ; CHECK-NEXT: add v7.2d, v3.2d, v3.2d 
110+ ; CHECK-NEXT: fmov d0, x6 
111+ ; CHECK-NEXT: fmov d1, x4 
112+ ; CHECK-NEXT: fmov d2, x2 
113+ ; CHECK-NEXT: fmov d3, x0 
114+ ; CHECK-NEXT: mov v0.d[1], x7 
115+ ; CHECK-NEXT: mov v1.d[1], x5 
116+ ; CHECK-NEXT: mov v2.d[1], x3 
117+ ; CHECK-NEXT: mov v3.d[1], x1 
118+ ; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s 
130119; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s 
131- ; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s 
132- ; CHECK-NEXT: uzp1 v3.4s, v5.4s, v4.4s 
133- ; CHECK-NEXT: uzp1 v1.4s, v7.4s, v6.4s 
134- ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b 
135- ; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b 
120+ ; CHECK-NEXT: add v3.4s, v1.4s, v1.4s 
121+ ; CHECK-NEXT: add v0.4s, v2.4s, v2.4s 
122+ ; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b 
123+ ; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b 
136124; CHECK-NEXT: ret 
137125 %a1  = insertelement  <8  x i64 > poison, i64  %a , i64  0 
138126 %b1  = insertelement  <8  x i64 > %a1 , i64  %b , i64  1 
0 commit comments