Skip to content

Commit 5728970

Browse files
committed
[CostModel][X86] Add avx2 two-src shuffle costs
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310645 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 9f5f47d commit 5728970

File tree

3 files changed

+45
-36
lines changed

3 files changed

+45
-36
lines changed

lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -842,9 +842,18 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
842842
{ TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
843843
{ TTI::SK_PermuteSingleSrc, MVT::v4i64, 1 }, // vpermq
844844
{ TTI::SK_PermuteSingleSrc, MVT::v8i32, 1 }, // vpermd
845-
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2 * vpshufb
845+
{ TTI::SK_PermuteSingleSrc, MVT::v16i16, 4 }, // vperm2i128 + 2*vpshufb
846846
// + vpblendvb
847-
{ TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 } // vperm2i128 + 2 * vpshufb
847+
{ TTI::SK_PermuteSingleSrc, MVT::v32i8, 4 }, // vperm2i128 + 2*vpshufb
848+
// + vpblendvb
849+
850+
{ TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vpermpd + vblendpd
851+
{ TTI::SK_PermuteTwoSrc, MVT::v8f32, 3 }, // 2*vpermps + vblendps
852+
{ TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vpermq + vpblendd
853+
{ TTI::SK_PermuteTwoSrc, MVT::v8i32, 3 }, // 2*vpermd + vpblendd
854+
{ TTI::SK_PermuteTwoSrc, MVT::v16i16, 7 }, // 2*vperm2i128 + 4*vpshufb
855+
// + vpblendvb
856+
{ TTI::SK_PermuteTwoSrc, MVT::v32i8, 7 }, // 2*vperm2i128 + 4*vpshufb
848857
// + vpblendvb
849858
};
850859

test/Analysis/CostModel/X86/shuffle-single-src.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,15 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
3434
; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
3535
; SSE42: cost of 12 {{.*}} %V512 = shufflevector
3636
; AVX1: cost of 12 {{.*}} %V512 = shufflevector
37-
; AVX2: cost of 12 {{.*}} %V512 = shufflevector
37+
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
3838
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3939
%V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
4040

4141
; SSE2: cost of 56 {{.*}} %V1024 = shufflevector
4242
; SSSE3: cost of 56 {{.*}} %V1024 = shufflevector
4343
; SSE42: cost of 56 {{.*}} %V1024 = shufflevector
4444
; AVX1: cost of 72 {{.*}} %V1024 = shufflevector
45-
; AVX2: cost of 72 {{.*}} %V1024 = shufflevector
45+
; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
4646
; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
4747
%V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
4848

@@ -72,7 +72,7 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512)
7272
; SSSE3: cost of 12 {{.*}} %V512 = shufflevector
7373
; SSE42: cost of 12 {{.*}} %V512 = shufflevector
7474
; AVX1: cost of 16 {{.*}} %V512 = shufflevector
75-
; AVX2: cost of 16 {{.*}} %V512 = shufflevector
75+
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
7676
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
7777
%V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 6, i32 4, i32 3, i32 2, i32 1, i32 0>
7878

@@ -102,7 +102,7 @@ define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %
102102
; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
103103
; SSE42: cost of 24 {{.*}} %V512 = shufflevector
104104
; AVX1: cost of 28 {{.*}} %V512 = shufflevector
105-
; AVX2: cost of 28 {{.*}} %V512 = shufflevector
105+
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
106106
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
107107
%V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 13, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
108108

@@ -132,15 +132,15 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512
132132
; SSSE3: cost of 24 {{.*}} %V512 = shufflevector
133133
; SSE42: cost of 24 {{.*}} %V512 = shufflevector
134134
; AVX1: cost of 32 {{.*}} %V512 = shufflevector
135-
; AVX2: cost of 32 {{.*}} %V512 = shufflevector
135+
; AVX2: cost of 6 {{.*}} %V512 = shufflevector
136136
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
137137
%V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 13, i32 10, i32 9, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
138138

139139
; SSE2: cost of 112 {{.*}} %V1024 = shufflevector
140140
; SSSE3: cost of 112 {{.*}} %V1024 = shufflevector
141141
; SSE42: cost of 112 {{.*}} %V1024 = shufflevector
142142
; AVX1: cost of 192 {{.*}} %V1024 = shufflevector
143-
; AVX2: cost of 192 {{.*}} %V1024 = shufflevector
143+
; AVX2: cost of 36 {{.*}} %V1024 = shufflevector
144144
; AVX512: cost of 2 {{.*}} %V1024 = shufflevector
145145
%V1024 = shufflevector <32 x i32> %src1024, <32 x i32> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
146146
ret void
@@ -173,8 +173,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
173173
; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
174174
; SSE42: cost of 36 {{.*}} %V512 = shufflevector
175175
; AVX1: cost of 64 {{.*}} %V512 = shufflevector
176-
; AVX2: cost of 64 {{.*}} %V512 = shufflevector
177-
; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
176+
; AVX2: cost of 14 {{.*}} %V512 = shufflevector
177+
; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
178178
; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
179179
; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
180180
%V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -183,8 +183,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
183183
; SSSE3: cost of 168 {{.*}} %V1024 = shufflevector
184184
; SSE42: cost of 168 {{.*}} %V1024 = shufflevector
185185
; AVX1: cost of 384 {{.*}} %V1024 = shufflevector
186-
; AVX2: cost of 384 {{.*}} %V1024 = shufflevector
187-
; AVX512F: cost of 384 {{.*}} %V1024 = shufflevector
186+
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
187+
; AVX512F: cost of 84 {{.*}} %V1024 = shufflevector
188188
; AVX512BW: cost of 2 {{.*}} %V1024 = shufflevector
189189
; AVX512VBMI: cost of 2 {{.*}} %V1024 = shufflevector
190190
%V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -215,8 +215,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512)
215215
; SSSE3: cost of 36 {{.*}} %V512 = shufflevector
216216
; SSE42: cost of 36 {{.*}} %V512 = shufflevector
217217
; AVX1: cost of 128 {{.*}} %V512 = shufflevector
218-
; AVX2: cost of 128 {{.*}} %V512 = shufflevector
219-
; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
218+
; AVX2: cost of 14 {{.*}} %V512 = shufflevector
219+
; AVX512F: cost of 14 {{.*}} %V512 = shufflevector
220220
; AVX512BW: cost of 8 {{.*}} %V512 = shufflevector
221221
; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
222222
%V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

test/Analysis/CostModel/X86/shuffle-two-src.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,23 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double>
2626
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
2727
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
2828
; AVX1: cost of 6 {{.*}} %V256 = shufflevector
29-
; AVX2: cost of 6 {{.*}} %V256 = shufflevector
29+
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
3030
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
3131
%V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
3232

3333
; SSE2: cost of 28 {{.*}} %V512 = shufflevector
3434
; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
3535
; SSE42: cost of 28 {{.*}} %V512 = shufflevector
3636
; AVX1: cost of 12 {{.*}} %V512 = shufflevector
37-
; AVX2: cost of 12 {{.*}} %V512 = shufflevector
37+
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
3838
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
3939
%V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
4040

4141
; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
4242
; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
4343
; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
4444
; AVX1: cost of 24 {{.*}} %V1024 = shufflevector
45-
; AVX2: cost of 24 {{.*}} %V1024 = shufflevector
45+
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
4646
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
4747
%V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
4848

@@ -64,23 +64,23 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512,
6464
; SSSE3: cost of 6 {{.*}} %V256 = shufflevector
6565
; SSE42: cost of 6 {{.*}} %V256 = shufflevector
6666
; AVX1: cost of 8 {{.*}} %V256 = shufflevector
67-
; AVX2: cost of 8 {{.*}} %V256 = shufflevector
67+
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
6868
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
6969
%V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> <i32 3, i32 3, i32 7, i32 6>
7070

7171
; SSE2: cost of 28 {{.*}} %V512 = shufflevector
7272
; SSSE3: cost of 28 {{.*}} %V512 = shufflevector
7373
; SSE42: cost of 28 {{.*}} %V512 = shufflevector
7474
; AVX1: cost of 16 {{.*}} %V512 = shufflevector
75-
; AVX2: cost of 16 {{.*}} %V512 = shufflevector
75+
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
7676
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
7777
%V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> <i32 7, i32 6, i32 12, i32 4, i32 3, i32 2, i32 1, i32 15>
7878

7979
; SSE2: cost of 120 {{.*}} %V1024 = shufflevector
8080
; SSSE3: cost of 120 {{.*}} %V1024 = shufflevector
8181
; SSE42: cost of 120 {{.*}} %V1024 = shufflevector
8282
; AVX1: cost of 32 {{.*}} %V1024 = shufflevector
83-
; AVX2: cost of 32 {{.*}} %V1024 = shufflevector
83+
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
8484
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
8585
%V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> <i32 30, i32 14, i32 13, i32 12, i32 13, i32 10, i32 18, i32 8, i32 8, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
8686

@@ -102,23 +102,23 @@ define void @test_vXf32(<4 x float> %src128, <8 x float> %src256, <16 x float> %
102102
; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
103103
; SSE42: cost of 12 {{.*}} %V256 = shufflevector
104104
; AVX1: cost of 14 {{.*}} %V256 = shufflevector
105-
; AVX2: cost of 14 {{.*}} %V256 = shufflevector
105+
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
106106
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
107107
%V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
108108

109109
; SSE2: cost of 56 {{.*}} %V512 = shufflevector
110110
; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
111111
; SSE42: cost of 56 {{.*}} %V512 = shufflevector
112112
; AVX1: cost of 28 {{.*}} %V512 = shufflevector
113-
; AVX2: cost of 28 {{.*}} %V512 = shufflevector
113+
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
114114
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
115115
%V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
116116

117117
; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
118118
; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
119119
; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
120120
; AVX1: cost of 56 {{.*}} %V1024 = shufflevector
121-
; AVX2: cost of 56 {{.*}} %V1024 = shufflevector
121+
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
122122
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
123123
%V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
124124

@@ -140,23 +140,23 @@ define void @test_vXi32(<4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512
140140
; SSSE3: cost of 12 {{.*}} %V256 = shufflevector
141141
; SSE42: cost of 12 {{.*}} %V256 = shufflevector
142142
; AVX1: cost of 16 {{.*}} %V256 = shufflevector
143-
; AVX2: cost of 16 {{.*}} %V256 = shufflevector
143+
; AVX2: cost of 3 {{.*}} %V256 = shufflevector
144144
; AVX512: cost of 1 {{.*}} %V256 = shufflevector
145145
%V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> <i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 12, i32 0>
146146

147147
; SSE2: cost of 56 {{.*}} %V512 = shufflevector
148148
; SSSE3: cost of 56 {{.*}} %V512 = shufflevector
149149
; SSE42: cost of 56 {{.*}} %V512 = shufflevector
150150
; AVX1: cost of 32 {{.*}} %V512 = shufflevector
151-
; AVX2: cost of 32 {{.*}} %V512 = shufflevector
151+
; AVX2: cost of 18 {{.*}} %V512 = shufflevector
152152
; AVX512: cost of 1 {{.*}} %V512 = shufflevector
153153
%V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> <i32 15, i32 17, i32 13, i32 20, i32 11, i32 10, i32 8, i32 8, i32 7, i32 22, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
154154

155155
; SSE2: cost of 240 {{.*}} %V1024 = shufflevector
156156
; SSSE3: cost of 240 {{.*}} %V1024 = shufflevector
157157
; SSE42: cost of 240 {{.*}} %V1024 = shufflevector
158158
; AVX1: cost of 64 {{.*}} %V1024 = shufflevector
159-
; AVX2: cost of 64 {{.*}} %V1024 = shufflevector
159+
; AVX2: cost of 84 {{.*}} %V1024 = shufflevector
160160
; AVX512: cost of 6 {{.*}} %V1024 = shufflevector
161161
%V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> <i32 31, i32 33, i32 20, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 48, i32 13, i32 12, i32 11, i32 11, i32 9, i32 45, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
162162

@@ -180,8 +180,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
180180
; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
181181
; SSE42: cost of 18 {{.*}} %V256 = shufflevector
182182
; AVX1: cost of 32 {{.*}} %V256 = shufflevector
183-
; AVX2: cost of 32 {{.*}} %V256 = shufflevector
184-
; AVX512F: cost of 32 {{.*}} %V256 = shufflevector
183+
; AVX2: cost of 7 {{.*}} %V256 = shufflevector
184+
; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
185185
; AVX512BW: cost of 1 {{.*}} %V256 = shufflevector
186186
; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
187187
%V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> <i32 15, i32 14, i32 13, i32 20, i32 21, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -190,8 +190,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
190190
; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
191191
; SSE42: cost of 84 {{.*}} %V512 = shufflevector
192192
; AVX1: cost of 64 {{.*}} %V512 = shufflevector
193-
; AVX2: cost of 64 {{.*}} %V512 = shufflevector
194-
; AVX512F: cost of 64 {{.*}} %V512 = shufflevector
193+
; AVX2: cost of 42 {{.*}} %V512 = shufflevector
194+
; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
195195
; AVX512BW: cost of 1 {{.*}} %V512 = shufflevector
196196
; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
197197
%V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 38, i32 11, i32 11, i32 9, i32 8, i32 7, i32 11, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -200,8 +200,8 @@ define void @test_vXi16(<8 x i16> %src128, <16 x i16> %src256, <32 x i16> %src51
200200
; SSSE3: cost of 360 {{.*}} %V1024 = shufflevector
201201
; SSE42: cost of 360 {{.*}} %V1024 = shufflevector
202202
; AVX1: cost of 128 {{.*}} %V1024 = shufflevector
203-
; AVX2: cost of 128 {{.*}} %V1024 = shufflevector
204-
; AVX512F: cost of 128 {{.*}} %V1024 = shufflevector
203+
; AVX2: cost of 196 {{.*}} %V1024 = shufflevector
204+
; AVX512F: cost of 196 {{.*}} %V1024 = shufflevector
205205
; AVX512BW: cost of 6 {{.*}} %V1024 = shufflevector
206206
; AVX512VBMI: cost of 6 {{.*}} %V1024 = shufflevector
207207
%V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> <i32 63, i32 62, i32 71, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 66, i32 2, i32 1, i32 0>
@@ -226,8 +226,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512,
226226
; SSSE3: cost of 18 {{.*}} %V256 = shufflevector
227227
; SSE42: cost of 18 {{.*}} %V256 = shufflevector
228228
; AVX1: cost of 64 {{.*}} %V256 = shufflevector
229-
; AVX2: cost of 64 {{.*}} %V256 = shufflevector
230-
; AVX512F: cost of 64 {{.*}} %V256 = shufflevector
229+
; AVX2: cost of 7 {{.*}} %V256 = shufflevector
230+
; AVX512F: cost of 7 {{.*}} %V256 = shufflevector
231231
; AVX512BW: cost of 3 {{.*}} %V256 = shufflevector
232232
; AVX512VBMI: cost of 1 {{.*}} %V256 = shufflevector
233233
%V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> <i32 31, i32 30, i32 45, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 8, i32 8, i32 7, i32 6, i32 8, i32 4, i32 3, i32 2, i32 1, i32 0>
@@ -236,8 +236,8 @@ define void @test_vXi8(<16 x i8> %src128, <32 x i8> %src256, <64 x i8> %src512,
236236
; SSSE3: cost of 84 {{.*}} %V512 = shufflevector
237237
; SSE42: cost of 84 {{.*}} %V512 = shufflevector
238238
; AVX1: cost of 128 {{.*}} %V512 = shufflevector
239-
; AVX2: cost of 128 {{.*}} %V512 = shufflevector
240-
; AVX512F: cost of 128 {{.*}} %V512 = shufflevector
239+
; AVX2: cost of 42 {{.*}} %V512 = shufflevector
240+
; AVX512F: cost of 42 {{.*}} %V512 = shufflevector
241241
; AVX512BW: cost of 19 {{.*}} %V512 = shufflevector
242242
; AVX512VBMI: cost of 1 {{.*}} %V512 = shufflevector
243243
%V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> <i32 63, i32 100, i32 61, i32 96, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 20, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>

0 commit comments

Comments
 (0)