Skip to content

Commit eab1287

Browse files
committed
[x86] Enable some support for lowerVectorShuffleWithUndefHalf with AVX-512
Summary: This teaches 512-bit shuffles to detect unused halfs in order to reduce shuffle size. We may need to refine the 512-bit exit point. I couldn't remember if we had good cross lane shuffles for 8/16 bit with AVX-512 or not. I believe this is step towards being able to handle D36454 without a special case. From here we need to improve our ability to combine extract_subvector with insert_subvector and other extract_subvectors. And we need to support narrowing binary operations where we don't demand all elements. This may be improvements to DAGCombiner::narrowExtractedVectorBinOp(by recognizing an insert_subvector in addition to concat) or we may need a target specific combiner. Reviewers: RKSimon, zvi, delena, jbhateja Reviewed By: RKSimon, jbhateja Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D36601 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@310724 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 1f9d19b commit eab1287

File tree

7 files changed

+52
-66
lines changed

7 files changed

+52
-66
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12256,15 +12256,16 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
1225612256
return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
1225712257
}
1225812258

12259-
/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
12259+
/// Lower shuffles where an entire half of a 256 or 512-bit vector is UNDEF.
1226012260
/// This allows for fast cases such as subvector extraction/insertion
1226112261
/// or shuffling smaller vector types which can lower more efficiently.
1226212262
static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
1226312263
SDValue V1, SDValue V2,
1226412264
ArrayRef<int> Mask,
1226512265
const X86Subtarget &Subtarget,
1226612266
SelectionDAG &DAG) {
12267-
assert(VT.is256BitVector() && "Expected 256-bit vector");
12267+
assert((VT.is256BitVector() || VT.is512BitVector()) &&
12268+
"Expected 256-bit or 512-bit vector");
1226812269

1226912270
unsigned NumElts = VT.getVectorNumElements();
1227012271
unsigned HalfNumElts = NumElts / 2;
@@ -12360,6 +12361,10 @@ static SDValue lowerVectorShuffleWithUndefHalf(const SDLoc &DL, MVT VT,
1236012361
}
1236112362
}
1236212363

12364+
// AVX512 - XXXXuuuu - always extract lowers.
12365+
if (VT.is512BitVector() && !(UndefUpper && NumUpperHalves == 0))
12366+
return SDValue();
12367+
1236312368
auto GetHalfVector = [&](int HalfIdx) {
1236412369
if (HalfIdx < 0)
1236512370
return DAG.getUNDEF(HalfVT);
@@ -13703,6 +13708,11 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
1370313708
DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
1370413709
return Insertion;
1370513710

13711+
// Handle special cases where the lower or upper half is UNDEF.
13712+
if (SDValue V =
13713+
lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
13714+
return V;
13715+
1370613716
// Check for being able to broadcast a single element.
1370713717
if (SDValue Broadcast =
1370813718
lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))

test/CodeGen/X86/madd.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,13 +323,13 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
323323
; AVX512-NEXT: cmpq %rcx, %rax
324324
; AVX512-NEXT: jne .LBB2_1
325325
; AVX512-NEXT: # BB#2: # %middle.block
326-
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
326+
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
327327
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
328-
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
328+
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
329329
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
330-
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
330+
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
331331
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
332-
; AVX512-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
332+
; AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
333333
; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0
334334
; AVX512-NEXT: vmovd %xmm0, %eax
335335
; AVX512-NEXT: vzeroupper

test/CodeGen/X86/sad.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -72,13 +72,13 @@ define i32 @sad_16i8() nounwind {
7272
; AVX512F-NEXT: addq $4, %rax
7373
; AVX512F-NEXT: jne .LBB0_1
7474
; AVX512F-NEXT: # BB#2: # %middle.block
75-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
75+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
7676
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
77-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
77+
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
7878
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
79-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
79+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
8080
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
81-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
81+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
8282
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
8383
; AVX512F-NEXT: vmovd %xmm0, %eax
8484
; AVX512F-NEXT: vzeroupper
@@ -98,13 +98,13 @@ define i32 @sad_16i8() nounwind {
9898
; AVX512BW-NEXT: addq $4, %rax
9999
; AVX512BW-NEXT: jne .LBB0_1
100100
; AVX512BW-NEXT: # BB#2: # %middle.block
101-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
101+
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
102102
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
103-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
103+
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
104104
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
105-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
105+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
106106
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
107-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
107+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
108108
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
109109
; AVX512BW-NEXT: vmovd %xmm0, %eax
110110
; AVX512BW-NEXT: vzeroupper
@@ -321,13 +321,13 @@ define i32 @sad_32i8() nounwind {
321321
; AVX512F-NEXT: jne .LBB1_1
322322
; AVX512F-NEXT: # BB#2: # %middle.block
323323
; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
324-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
324+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
325325
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
326-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
326+
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
327327
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
328-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
328+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
329329
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
330-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
330+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
331331
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
332332
; AVX512F-NEXT: vmovd %xmm0, %eax
333333
; AVX512F-NEXT: vzeroupper
@@ -349,13 +349,13 @@ define i32 @sad_32i8() nounwind {
349349
; AVX512BW-NEXT: jne .LBB1_1
350350
; AVX512BW-NEXT: # BB#2: # %middle.block
351351
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
352-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
352+
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
353353
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
354-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
354+
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
355355
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
356-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
356+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
357357
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
358-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
358+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
359359
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
360360
; AVX512BW-NEXT: vmovd %xmm0, %eax
361361
; AVX512BW-NEXT: vzeroupper
@@ -794,13 +794,13 @@ define i32 @sad_avx64i8() nounwind {
794794
; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
795795
; AVX512F-NEXT: vpaddd %zmm3, %zmm1, %zmm1
796796
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
797-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
797+
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
798798
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
799-
; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
799+
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
800800
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
801-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
801+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
802802
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
803-
; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
803+
; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
804804
; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
805805
; AVX512F-NEXT: vmovd %xmm0, %eax
806806
; AVX512F-NEXT: vzeroupper
@@ -823,13 +823,13 @@ define i32 @sad_avx64i8() nounwind {
823823
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1
824824
; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0
825825
; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
826-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
826+
; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
827827
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
828-
; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
828+
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
829829
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
830-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15]
830+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
831831
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
832-
; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15]
832+
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
833833
; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
834834
; AVX512BW-NEXT: vmovd %xmm0, %eax
835835
; AVX512BW-NEXT: vzeroupper

test/CodeGen/X86/vector-shuffle-512-v16.ll

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -262,19 +262,10 @@ define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19
262262
}
263263

264264
define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b) {
265-
; AVX512F-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
266-
; AVX512F: # BB#0:
267-
; AVX512F-NEXT: movw $8, %ax
268-
; AVX512F-NEXT: kmovw %eax, %k1
269-
; AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
270-
; AVX512F-NEXT: retq
271-
;
272-
; AVX512BW-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
273-
; AVX512BW: # BB#0:
274-
; AVX512BW-NEXT: movw $8, %ax
275-
; AVX512BW-NEXT: kmovd %eax, %k1
276-
; AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1}
277-
; AVX512BW-NEXT: retq
265+
; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
266+
; ALL: # BB#0:
267+
; ALL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
268+
; ALL-NEXT: retq
278269
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
279270
ret <16 x i32> %c
280271
}

test/CodeGen/X86/vector-shuffle-512-v32.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i1
101101
;
102102
; SKX-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
103103
; SKX: ## BB#0:
104-
; SKX-NEXT: vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
104+
; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
105105
; SKX-NEXT: retq
106106
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
107107
ret <32 x i16> %c
@@ -115,7 +115,7 @@ define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x
115115
;
116116
; SKX-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
117117
; SKX: ## BB#0:
118-
; SKX-NEXT: vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
118+
; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
119119
; SKX-NEXT: retq
120120
%c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
121121
ret <32 x i16> %c

test/CodeGen/X86/vector-shuffle-512-v64.ll

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,10 @@
55
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VBMI
66

77
define <64 x i8> @shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u(<64 x i8> %a) {
8-
; AVX512F-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
9-
; AVX512F: # BB#0:
10-
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm0
11-
; AVX512F-NEXT: retq
12-
;
13-
; AVX512BW-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
14-
; AVX512BW: # BB#0:
15-
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm0
16-
; AVX512BW-NEXT: retq
17-
;
18-
; AVX512DQ-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
19-
; AVX512DQ: # BB#0:
20-
; AVX512DQ-NEXT: vpsrld $16, %xmm0, %xmm0
21-
; AVX512DQ-NEXT: retq
22-
;
23-
; AVX512VBMI-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
24-
; AVX512VBMI: # BB#0:
25-
; AVX512VBMI-NEXT: vpsrld $16, %zmm0, %zmm0
26-
; AVX512VBMI-NEXT: retq
8+
; ALL-LABEL: shuffle_v64i8_02_03_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u_u:
9+
; ALL: # BB#0:
10+
; ALL-NEXT: vpsrld $16, %xmm0, %xmm0
11+
; ALL-NEXT: retq
2712
%b = shufflevector <64 x i8> %a, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2813
ret <64 x i8> %b
2914
}

test/CodeGen/X86/vector-shuffle-512-v8.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2241,12 +2241,12 @@ define <8 x double> @shuffle_v8f64_2301uu67(<8 x double> %a0, <8 x double> %a1)
22412241
define <8 x double> @shuffle_v8f64_2301uuuu(<8 x double> %a0, <8 x double> %a1) {
22422242
; AVX512F-LABEL: shuffle_v8f64_2301uuuu:
22432243
; AVX512F: # BB#0:
2244-
; AVX512F-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2244+
; AVX512F-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
22452245
; AVX512F-NEXT: retq
22462246
;
22472247
; AVX512F-32-LABEL: shuffle_v8f64_2301uuuu:
22482248
; AVX512F-32: # BB#0:
2249-
; AVX512F-32-NEXT: vpermpd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5]
2249+
; AVX512F-32-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
22502250
; AVX512F-32-NEXT: retl
22512251
%1 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
22522252
ret <8 x double> %1

0 commit comments

Comments
 (0)