@@ -72,13 +72,13 @@ define i32 @sad_16i8() nounwind {
7272; AVX512F-NEXT: addq $4, %rax
7373; AVX512F-NEXT: jne .LBB0_1
7474; AVX512F-NEXT: # BB#2: # %middle.block
75- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
75+ ; AVX512F-NEXT: vextracti64x4 $1, % zmm0, %ymm1
7676; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
77- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
77+ ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
7878; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
79- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
79+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
8080; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
81- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
81+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
8282; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
8383; AVX512F-NEXT: vmovd %xmm0, %eax
8484; AVX512F-NEXT: vzeroupper
@@ -98,13 +98,13 @@ define i32 @sad_16i8() nounwind {
9898; AVX512BW-NEXT: addq $4, %rax
9999; AVX512BW-NEXT: jne .LBB0_1
100100; AVX512BW-NEXT: # BB#2: # %middle.block
101- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
101+ ; AVX512BW-NEXT: vextracti64x4 $1, % zmm0, %ymm1
102102; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
103- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
103+ ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
104104; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
105- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
105+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
106106; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
107- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
107+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
108108; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
109109; AVX512BW-NEXT: vmovd %xmm0, %eax
110110; AVX512BW-NEXT: vzeroupper
@@ -321,13 +321,13 @@ define i32 @sad_32i8() nounwind {
321321; AVX512F-NEXT: jne .LBB1_1
322322; AVX512F-NEXT: # BB#2: # %middle.block
323323; AVX512F-NEXT: vpaddd %zmm0, %zmm1, %zmm0
324- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
324+ ; AVX512F-NEXT: vextracti64x4 $1, % zmm0, %ymm1
325325; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
326- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
326+ ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
327327; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
328- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
328+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
329329; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
330- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
330+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
331331; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
332332; AVX512F-NEXT: vmovd %xmm0, %eax
333333; AVX512F-NEXT: vzeroupper
@@ -349,13 +349,13 @@ define i32 @sad_32i8() nounwind {
349349; AVX512BW-NEXT: jne .LBB1_1
350350; AVX512BW-NEXT: # BB#2: # %middle.block
351351; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
352- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
352+ ; AVX512BW-NEXT: vextracti64x4 $1, % zmm0, %ymm1
353353; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
354- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
354+ ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
355355; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
356- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
356+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
357357; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
358- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
358+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
359359; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
360360; AVX512BW-NEXT: vmovd %xmm0, %eax
361361; AVX512BW-NEXT: vzeroupper
@@ -794,13 +794,13 @@ define i32 @sad_avx64i8() nounwind {
794794; AVX512F-NEXT: vpaddd %zmm2, %zmm0, %zmm0
795795; AVX512F-NEXT: vpaddd %zmm3, %zmm1, %zmm1
796796; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
797- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
797+ ; AVX512F-NEXT: vextracti64x4 $1, % zmm0, %ymm1
798798; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
799- ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
799+ ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
800800; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
801- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
801+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
802802; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
803- ; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
803+ ; AVX512F-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
804804; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0
805805; AVX512F-NEXT: vmovd %xmm0, %eax
806806; AVX512F-NEXT: vzeroupper
@@ -823,13 +823,13 @@ define i32 @sad_avx64i8() nounwind {
823823; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm1
824824; AVX512BW-NEXT: vpaddd %zmm0, %zmm0, %zmm0
825825; AVX512BW-NEXT: vpaddd %zmm0, %zmm1, %zmm0
826- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[4,5,6,7,0,1,0,1]
826+ ; AVX512BW-NEXT: vextracti64x4 $1, % zmm0, %ymm1
827827; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
828- ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1]
828+ ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
829829; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
830- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15 ]
830+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [2,3,0,1 ]
831831; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
832- ; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0 [1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15 ]
832+ ; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0 [1,1,2,3]
833833; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0
834834; AVX512BW-NEXT: vmovd %xmm0, %eax
835835; AVX512BW-NEXT: vzeroupper
0 commit comments