1313; CHECK-LABEL: 'test_vXf64'
1414define void @test_vXf64 (<2 x double > %src128 , <4 x double > %src256 , <8 x double > %src512 , <16 x double > %src1024 ) {
1515
16- ; SSE2: cost of 2 {{.*}} %V128 = shufflevector
17- ; SSSE3: cost of 2 {{.*}} %V128 = shufflevector
18- ; SSE42: cost of 2 {{.*}} %V128 = shufflevector
19- ; AVX1: cost of 2 {{.*}} %V128 = shufflevector
20- ; AVX2: cost of 2 {{.*}} %V128 = shufflevector
16+ ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
17+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
18+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
19+ ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
20+ ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
2121 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
2222 %V128 = shufflevector <2 x double > %src128 , <2 x double > undef , <2 x i32 > <i32 1 , i32 1 >
2323
24- ; SSE2: cost of 4 {{.*}} %V256 = shufflevector
25- ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
26- ; SSE42: cost of 4 {{.*}} %V256 = shufflevector
27- ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
28- ; AVX2: cost of 6 {{.*}} %V256 = shufflevector
24+ ; SSE2: cost of 2 {{.*}} %V256 = shufflevector
25+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
26+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
27+ ; AVX1: cost of 3 {{.*}} %V256 = shufflevector
28+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
2929 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
3030 %V256 = shufflevector <4 x double > %src256 , <4 x double > undef , <4 x i32 > <i32 3 , i32 3 , i32 1 , i32 0 >
3131
32- ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
33- ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
34- ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
32+ ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
33+ ; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
34+ ; SSE42: cost of 12 {{.*}} %V512 = shufflevector
3535 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
3636 ; AVX2: cost of 12 {{.*}} %V512 = shufflevector
3737 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3838 %V512 = shufflevector <8 x double > %src512 , <8 x double > undef , <8 x i32 > <i32 7 , i32 6 , i32 6 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
3939
40- ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
41- ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
42- ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
40+ ; SSE2: cost of 56 {{.*}} %V1024 = shufflevector
41+ ; SSSE3: cost of 56 {{.*}} %V1024 = shufflevector
42+ ; SSE42: cost of 56 {{.*}} %V1024 = shufflevector
4343 ; AVX1: cost of 72 {{.*}} %V1024 = shufflevector
4444 ; AVX2: cost of 72 {{.*}} %V1024 = shufflevector
4545 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
@@ -59,17 +59,17 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
5959 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
6060 %V128 = shufflevector <2 x i64 > %src128 , <2 x i64 > undef , <2 x i32 > <i32 1 , i32 1 >
6161
62- ; SSE2: cost of 8 {{.*}} %V256 = shufflevector
63- ; SSSE3: cost of 8 {{.*}} %V256 = shufflevector
64- ; SSE42: cost of 8 {{.*}} %V256 = shufflevector
65- ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
62+ ; SSE2: cost of 2 {{.*}} %V256 = shufflevector
63+ ; SSSE3: cost of 2 {{.*}} %V256 = shufflevector
64+ ; SSE42: cost of 2 {{.*}} %V256 = shufflevector
65+ ; AVX1: cost of 3 {{.*}} %V256 = shufflevector
6666 ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
6767 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
6868 %V256 = shufflevector <4 x i64 > %src256 , <4 x i64 > undef , <4 x i32 > <i32 3 , i32 3 , i32 1 , i32 0 >
6969
70- ; SSE2: cost of 48 {{.*}} %V512 = shufflevector
71- ; SSSE3: cost of 48 {{.*}} %V512 = shufflevector
72- ; SSE42: cost of 48 {{.*}} %V512 = shufflevector
70+ ; SSE2: cost of 12 {{.*}} %V512 = shufflevector
71+ ; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
72+ ; SSE42: cost of 12 {{.*}} %V512 = shufflevector
7373 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
7474 ; AVX2: cost of 16 {{.*}} %V512 = shufflevector
7575 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
@@ -81,25 +81,25 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
8181; CHECK-LABEL: 'test_vXf32'
8282define void @test_vXf32 (<4 x float > %src128 , <8 x float > %src256 , <16 x float > %src512 ) {
8383
84- ; SSE2: cost of 6 {{.*}} %V128 = shufflevector
85- ; SSSE3: cost of 6 {{.*}} %V128 = shufflevector
86- ; SSE42: cost of 6 {{.*}} %V128 = shufflevector
87- ; AVX1: cost of 6 {{.*}} %V128 = shufflevector
88- ; AVX2: cost of 6 {{.*}} %V128 = shufflevector
84+ ; SSE2: cost of 1 {{.*}} %V128 = shufflevector
85+ ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
86+ ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
87+ ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
88+ ; AVX2: cost of 1 {{.*}} %V128 = shufflevector
8989 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
9090 %V128 = shufflevector <4 x float > %src128 , <4 x float > undef , <4 x i32 > <i32 3 , i32 3 , i32 1 , i32 0 >
9191
92- ; SSE2: cost of 12 {{.*}} %V256 = shufflevector
93- ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
94- ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
95- ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
96- ; AVX2: cost of 14 {{.*}} %V256 = shufflevector
92+ ; SSE2: cost of 4 {{.*}} %V256 = shufflevector
93+ ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
94+ ; SSE42: cost of 4 {{.*}} %V256 = shufflevector
95+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
96+ ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
9797 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
9898 %V256 = shufflevector <8 x float > %src256 , <8 x float > undef , <8 x i32 > <i32 7 , i32 6 , i32 6 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
9999
100- ; SSE2: cost of 72 {{.*}} %V512 = shufflevector
101- ; SSSE3: cost of 72 {{.*}} %V512 = shufflevector
102- ; SSE42: cost of 72 {{.*}} %V512 = shufflevector
100+ ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
101+ ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
102+ ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
103103 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
104104 ; AVX2: cost of 28 {{.*}} %V512 = shufflevector
105105 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
@@ -119,25 +119,25 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512
119119 ; AVX512: cost of 1 {{.*}} %V128 = shufflevector
120120 %V128 = shufflevector <4 x i32 > %src128 , <4 x i32 > undef , <4 x i32 > <i32 3 , i32 3 , i32 1 , i32 0 >
121121
122- ; SSE2: cost of 16 {{.*}} %V256 = shufflevector
123- ; SSSE3: cost of 16 {{.*}} %V256 = shufflevector
124- ; SSE42: cost of 16 {{.*}} %V256 = shufflevector
125- ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
122+ ; SSE2: cost of 4 {{.*}} %V256 = shufflevector
123+ ; SSSE3: cost of 4 {{.*}} %V256 = shufflevector
124+ ; SSE42: cost of 4 {{.*}} %V256 = shufflevector
125+ ; AVX1: cost of 4 {{.*}} %V256 = shufflevector
126126 ; AVX2: cost of 1 {{.*}} %V256 = shufflevector
127127 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
128128 %V256 = shufflevector <8 x i32 > %src256 , <8 x i32 > undef , <8 x i32 > <i32 7 , i32 6 , i32 5 , i32 5 , i32 3 , i32 2 , i32 1 , i32 0 >
129129
130- ; SSE2: cost of 96 {{.*}} %V512 = shufflevector
131- ; SSSE3: cost of 96 {{.*}} %V512 = shufflevector
132- ; SSE42: cost of 96 {{.*}} %V512 = shufflevector
130+ ; SSE2: cost of 24 {{.*}} %V512 = shufflevector
131+ ; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
132+ ; SSE42: cost of 24 {{.*}} %V512 = shufflevector
133133 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
134134 ; AVX2: cost of 32 {{.*}} %V512 = shufflevector
135135 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
136136 %V512 = shufflevector <16 x i32 > %src512 , <16 x i32 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 13 , i32 10 , i32 9 , i32 8 , i32 8 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
137137
138- ; SSE2: cost of 448 {{.*}} %V1024 = shufflevector
139- ; SSSE3: cost of 448 {{.*}} %V1024 = shufflevector
140- ; SSE42: cost of 448 {{.*}} %V1024 = shufflevector
138+ ; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
139+ ; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
140+ ; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
141141 ; AVX1: cost of 192 {{.*}} %V1024 = shufflevector
142142 ; AVX2: cost of 192 {{.*}} %V1024 = shufflevector
143143 ; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
@@ -148,7 +148,7 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512
148148; CHECK-LABEL: 'test_vXi16'
149149define void @test_vXi16 (<8 x i16 > %src128 , <16 x i16 > %src256 , <32 x i16 > %src512 , <64 x i16 > %src1024 ) {
150150
151- ; SSE2: cost of 16 {{.*}} %V128 = shufflevector
151+ ; SSE2: cost of 5 {{.*}} %V128 = shufflevector
152152 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
153153 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
154154 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
@@ -158,26 +158,26 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
158158 %V128 = shufflevector <8 x i16 > %src128 , <8 x i16 > undef , <8 x i32 > <i32 7 , i32 6 , i32 6 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
159159
160160 ; SSE2: cost of 32 {{.*}} %V256 = shufflevector
161- ; SSSE3: cost of 32 {{.*}} %V256 = shufflevector
162- ; SSE42: cost of 32 {{.*}} %V256 = shufflevector
163- ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
161+ ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
162+ ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
163+ ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
164164 ; AVX2: cost of 4 {{.*}} %V256 = shufflevector
165165 ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
166166 ; AVX512BW cost of 1 {{.*}} %V256 = shufflevector
167167 %V256 = shufflevector <16 x i16 > %src256 , <16 x i16 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 13 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
168168
169169 ; SSE2: cost of 192 {{.*}} %V512 = shufflevector
170- ; SSSE3: cost of 192 {{.*}} %V512 = shufflevector
171- ; SSE42: cost of 192 {{.*}} %V512 = shufflevector
170+ ; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
171+ ; SSE42: cost of 36 {{.*}} %V512 = shufflevector
172172 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
173173 ; AVX2: cost of 64 {{.*}} %V512 = shufflevector
174174 ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
175175 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
176176 %V512 = shufflevector <32 x i16 > %src512 , <32 x i16 > undef , <32 x i32 > <i32 31 , i32 30 , i32 20 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 11 , i32 9 , i32 8 , i32 7 , i32 11 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
177177
178178 ; SSE2: cost of 896 {{.*}} %V1024 = shufflevector
179- ; SSSE3: cost of 896 {{.*}} %V1024 = shufflevector
180- ; SSE42: cost of 896 {{.*}} %V1024 = shufflevector
179+ ; SSSE3: cost of 168 {{.*}} %V1024 = shufflevector
180+ ; SSE42: cost of 168 {{.*}} %V1024 = shufflevector
181181 ; AVX1: cost of 384 {{.*}} %V1024 = shufflevector
182182 ; AVX2: cost of 384 {{.*}} %V1024 = shufflevector
183183 ; AVX512F: cost of 384 {{.*}} %V1024 = shufflevector
@@ -188,7 +188,7 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
188188
189189; CHECK-LABEL: 'test_vXi8'
190190define void @test_vXi8 (<16 x i8 > %src128 , <32 x i8 > %src256 , <64 x i8 > %src512 ) {
191- ; SSE2: cost of 32 {{.*}} %V128 = shufflevector
191+ ; SSE2: cost of 10 {{.*}} %V128 = shufflevector
192192 ; SSSE3: cost of 1 {{.*}} %V128 = shufflevector
193193 ; SSE42: cost of 1 {{.*}} %V128 = shufflevector
194194 ; AVX1: cost of 1 {{.*}} %V128 = shufflevector
@@ -197,17 +197,17 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512)
197197 %V128 = shufflevector <16 x i8 > %src128 , <16 x i8 > undef , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 11 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
198198
199199 ; SSE2: cost of 64 {{.*}} %V256 = shufflevector
200- ; SSSE3: cost of 64 {{.*}} %V256 = shufflevector
201- ; SSE42: cost of 64 {{.*}} %V256 = shufflevector
202- ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
200+ ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
201+ ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
202+ ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
203203 ; AVX2: cost of 4 {{.*}} %V256 = shufflevector
204204 ; AVX512F: cost of 4 {{.*}} %V256 = shufflevector
205205 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
206206 %V256 = shufflevector <32 x i8 > %src256 , <32 x i8 > undef , <32 x i32 > <i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 8 , i32 8 , i32 7 , i32 6 , i32 8 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
207207
208208 ; SSE2: cost of 384 {{.*}} %V512 = shufflevector
209- ; SSSE3: cost of 384 {{.*}} %V512 = shufflevector
210- ; SSE42: cost of 384 {{.*}} %V512 = shufflevector
209+ ; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
210+ ; SSE42: cost of 36 {{.*}} %V512 = shufflevector
211211 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
212212 ; AVX2: cost of 128 {{.*}} %V512 = shufflevector
213213 ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
0 commit comments