@@ -26,23 +26,23 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
2626 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
2727 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
2828 ; AVX1: cost of 6 {{.*}} %V256 = shufflevector
29- ; AVX2: cost of 6 {{.*}} %V256 = shufflevector
29+ ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
3030 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
3131 %V256 = shufflevector <4 x double > %src256 , <4 x double > %src256_1 , <4 x i32 > <i32 3 , i32 3 , i32 7 , i32 6 >
3232
3333 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
3434 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
3535 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
3636 ; AVX1: cost of 12 {{.*}} %V512 = shufflevector
37- ; AVX2: cost of 12 {{.*}} %V512 = shufflevector
37+ ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
3838 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3939 %V512 = shufflevector <8 x double > %src512 , <8 x double > %src512_1 , <8 x i32 > <i32 7 , i32 6 , i32 12 , i32 4 , i32 3 , i32 2 , i32 1 , i32 15 >
4040
4141 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
4242 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
4343 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
4444 ; AVX1: cost of 24 {{.*}} %V1024 = shufflevector
45- ; AVX2: cost of 24 {{.*}} %V1024 = shufflevector
45+ ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
4646 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
4747 %V1024 = shufflevector <16 x double > %src1024 , <16 x double > %src1024_1 , <16 x i32 > <i32 30 , i32 14 , i32 13 , i32 12 , i32 13 , i32 10 , i32 18 , i32 8 , i32 8 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
4848
@@ -64,23 +64,23 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512,
6464 ; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
6565 ; SSE42: cost of 6 {{.*}} %V256 = shufflevector
6666 ; AVX1: cost of 8 {{.*}} %V256 = shufflevector
67- ; AVX2: cost of 8 {{.*}} %V256 = shufflevector
67+ ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
6868 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
6969 %V256 = shufflevector <4 x i64 > %src256 , <4 x i64 > %src256_1 , <4 x i32 > <i32 3 , i32 3 , i32 7 , i32 6 >
7070
7171 ; SSE2: cost of 28 {{.*}} %V512 = shufflevector
7272 ; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
7373 ; SSE42: cost of 28 {{.*}} %V512 = shufflevector
7474 ; AVX1: cost of 16 {{.*}} %V512 = shufflevector
75- ; AVX2: cost of 16 {{.*}} %V512 = shufflevector
75+ ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
7676 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
7777 %V512 = shufflevector <8 x i64 > %src512 , <8 x i64 > %src512_1 , <8 x i32 > <i32 7 , i32 6 , i32 12 , i32 4 , i32 3 , i32 2 , i32 1 , i32 15 >
7878
7979 ; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
8080 ; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
8181 ; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
8282 ; AVX1: cost of 32 {{.*}} %V1024 = shufflevector
83- ; AVX2: cost of 32 {{.*}} %V1024 = shufflevector
83+ ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
8484 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
8585 %V1024 = shufflevector <16 x i64 > %src1024 , <16 x i64 > %src1024_1 , <16 x i32 > <i32 30 , i32 14 , i32 13 , i32 12 , i32 13 , i32 10 , i32 18 , i32 8 , i32 8 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
8686
@@ -102,23 +102,23 @@ define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %
102102 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
103103 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
104104 ; AVX1: cost of 14 {{.*}} %V256 = shufflevector
105- ; AVX2: cost of 14 {{.*}} %V256 = shufflevector
105+ ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
106106 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
107107 %V256 = shufflevector <8 x float > %src256 , <8 x float > %src256_1 , <8 x i32 > <i32 7 , i32 6 , i32 8 , i32 4 , i32 3 , i32 2 , i32 12 , i32 0 >
108108
109109 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
110110 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
111111 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
112112 ; AVX1: cost of 28 {{.*}} %V512 = shufflevector
113- ; AVX2: cost of 28 {{.*}} %V512 = shufflevector
113+ ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
114114 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
115115 %V512 = shufflevector <16 x float > %src512 , <16 x float > %src512_1 , <16 x i32 > <i32 15 , i32 17 , i32 13 , i32 20 , i32 11 , i32 10 , i32 8 , i32 8 , i32 7 , i32 22 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
116116
117117 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
118118 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
119119 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
120120 ; AVX1: cost of 56 {{.*}} %V1024 = shufflevector
121- ; AVX2: cost of 56 {{.*}} %V1024 = shufflevector
121+ ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
122122 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
123123 %V1024 = shufflevector <32 x float > %src1024 , <32 x float > %src1024_1 , <32 x i32 > <i32 31 , i32 33 , i32 20 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 48 , i32 13 , i32 12 , i32 11 , i32 11 , i32 9 , i32 45 , i32 7 , i32 11 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
124124
@@ -140,23 +140,23 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512
140140 ; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
141141 ; SSE42: cost of 12 {{.*}} %V256 = shufflevector
142142 ; AVX1: cost of 16 {{.*}} %V256 = shufflevector
143- ; AVX2: cost of 16 {{.*}} %V256 = shufflevector
143+ ; AVX2: cost of 3 {{.*}} %V256 = shufflevector
144144 ; AVX512: cost of 1 {{.*}} %V256 = shufflevector
145145 %V256 = shufflevector <8 x i32 > %src256 , <8 x i32 > %src256_1 , <8 x i32 > <i32 7 , i32 6 , i32 8 , i32 4 , i32 3 , i32 2 , i32 12 , i32 0 >
146146
147147 ; SSE2: cost of 56 {{.*}} %V512 = shufflevector
148148 ; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
149149 ; SSE42: cost of 56 {{.*}} %V512 = shufflevector
150150 ; AVX1: cost of 32 {{.*}} %V512 = shufflevector
151- ; AVX2: cost of 32 {{.*}} %V512 = shufflevector
151+ ; AVX2: cost of 18 {{.*}} %V512 = shufflevector
152152 ; AVX512: cost of 1 {{.*}} %V512 = shufflevector
153153 %V512 = shufflevector <16 x i32 > %src512 , <16 x i32 > %src512_1 , <16 x i32 > <i32 15 , i32 17 , i32 13 , i32 20 , i32 11 , i32 10 , i32 8 , i32 8 , i32 7 , i32 22 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
154154
155155 ; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
156156 ; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
157157 ; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
158158 ; AVX1: cost of 64 {{.*}} %V1024 = shufflevector
159- ; AVX2: cost of 64 {{.*}} %V1024 = shufflevector
159+ ; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
160160 ; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
161161 %V1024 = shufflevector <32 x i32 > %src1024 , <32 x i32 > %src1024_1 , <32 x i32 > <i32 31 , i32 33 , i32 20 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 48 , i32 13 , i32 12 , i32 11 , i32 11 , i32 9 , i32 45 , i32 7 , i32 11 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
162162
@@ -180,8 +180,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
180180 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
181181 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
182182 ; AVX1: cost of 32 {{.*}} %V256 = shufflevector
183- ; AVX2: cost of 32 {{.*}} %V256 = shufflevector
184- ; AVX512F: cost of 32 {{.*}} %V256 = shufflevector
183+ ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
184+ ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
185185 ; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
186186 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
187187 %V256 = shufflevector <16 x i16 > %src256 , <16 x i16 > %src256_1 , <16 x i32 > <i32 15 , i32 14 , i32 13 , i32 20 , i32 21 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
@@ -190,8 +190,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
190190 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
191191 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
192192 ; AVX1: cost of 64 {{.*}} %V512 = shufflevector
193- ; AVX2: cost of 64 {{.*}} %V512 = shufflevector
194- ; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
193+ ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
194+ ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
195195 ; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
196196 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
197197 %V512 = shufflevector <32 x i16 > %src512 , <32 x i16 > %src512_1 , <32 x i32 > <i32 31 , i32 30 , i32 45 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 38 , i32 11 , i32 11 , i32 9 , i32 8 , i32 7 , i32 11 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
@@ -200,8 +200,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
200200 ; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
201201 ; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
202202 ; AVX1: cost of 128 {{.*}} %V1024 = shufflevector
203- ; AVX2: cost of 128 {{.*}} %V1024 = shufflevector
204- ; AVX512F: cost of 128 {{.*}} %V1024 = shufflevector
203+ ; AVX2: cost of 196 {{.*}} %V1024 = shufflevector
204+ ; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector
205205 ; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector
206206 ; AVX512VBMI: cost of 6 {{.*}} %V1024 = shufflevector
207207 %V1024 = shufflevector <64 x i16 > %src1024 , <64 x i16 > %src1024_1 , <64 x i32 > <i32 63 , i32 62 , i32 71 , i32 60 , i32 59 , i32 58 , i32 57 , i32 56 , i32 55 , i32 54 , i32 53 , i32 52 , i32 51 , i32 50 , i32 49 , i32 48 , i32 47 , i32 46 , i32 45 , i32 44 , i32 43 , i32 42 , i32 41 , i32 40 , i32 39 , i32 38 , i32 37 , i32 36 , i32 35 , i32 34 , i32 33 , i32 32 , i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 20 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 66 , i32 2 , i32 1 , i32 0 >
@@ -226,8 +226,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512,
226226 ; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
227227 ; SSE42: cost of 18 {{.*}} %V256 = shufflevector
228228 ; AVX1: cost of 64 {{.*}} %V256 = shufflevector
229- ; AVX2: cost of 64 {{.*}} %V256 = shufflevector
230- ; AVX512F: cost of 64 {{.*}} %V256 = shufflevector
229+ ; AVX2: cost of 7 {{.*}} %V256 = shufflevector
230+ ; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
231231 ; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
232232 ; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
233233 %V256 = shufflevector <32 x i8 > %src256 , <32 x i8 > %src256_1 , <32 x i32 > <i32 31 , i32 30 , i32 45 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 22 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 8 , i32 8 , i32 7 , i32 6 , i32 8 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
@@ -236,8 +236,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512,
236236 ; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
237237 ; SSE42: cost of 84 {{.*}} %V512 = shufflevector
238238 ; AVX1: cost of 128 {{.*}} %V512 = shufflevector
239- ; AVX2: cost of 128 {{.*}} %V512 = shufflevector
240- ; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
239+ ; AVX2: cost of 42 {{.*}} %V512 = shufflevector
240+ ; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
241241 ; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector
242242 ; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
243243 %V512 = shufflevector <64 x i8 > %src512 , <64 x i8 > %src512_1 , <64 x i32 > <i32 63 , i32 100 , i32 61 , i32 96 , i32 59 , i32 58 , i32 57 , i32 56 , i32 55 , i32 54 , i32 53 , i32 52 , i32 51 , i32 50 , i32 49 , i32 48 , i32 47 , i32 46 , i32 45 , i32 44 , i32 43 , i32 42 , i32 41 , i32 40 , i32 39 , i32 38 , i32 37 , i32 36 , i32 35 , i32 34 , i32 33 , i32 32 , i32 31 , i32 30 , i32 29 , i32 28 , i32 27 , i32 26 , i32 25 , i32 24 , i32 23 , i32 20 , i32 21 , i32 20 , i32 19 , i32 18 , i32 17 , i32 16 , i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >
0 commit comments