Skip to content

Commit a80a6b3

Browse files
authored
[X86] avg.ll - fix repeated operand typo in v64i8 avg pattern test (#163194)
1 parent 7429a08 commit a80a6b3

File tree

1 file changed

+49
-25
lines changed

1 file changed

+49
-25
lines changed

llvm/test/CodeGen/X86/avg.ll

Lines changed: 49 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -728,53 +728,77 @@ define void @avg_v32i8_2(ptr %a, ptr %b) nounwind {
728728
define void @avg_v64i8_2(ptr %a, ptr %b) nounwind {
729729
; SSE2-LABEL: avg_v64i8_2:
730730
; SSE2: # %bb.0:
731-
; SSE2-NEXT: movaps (%rsi), %xmm0
732-
; SSE2-NEXT: movaps 16(%rsi), %xmm1
733-
; SSE2-NEXT: movaps 32(%rsi), %xmm2
734-
; SSE2-NEXT: movaps 48(%rsi), %xmm3
735-
; SSE2-NEXT: movups %xmm3, (%rax)
736-
; SSE2-NEXT: movups %xmm2, (%rax)
737-
; SSE2-NEXT: movups %xmm1, (%rax)
738-
; SSE2-NEXT: movups %xmm0, (%rax)
731+
; SSE2-NEXT: movdqa (%rdi), %xmm0
732+
; SSE2-NEXT: movdqa 16(%rdi), %xmm1
733+
; SSE2-NEXT: movdqa 32(%rdi), %xmm2
734+
; SSE2-NEXT: movdqa 48(%rdi), %xmm3
735+
; SSE2-NEXT: pavgb (%rsi), %xmm0
736+
; SSE2-NEXT: pavgb 16(%rsi), %xmm1
737+
; SSE2-NEXT: pavgb 32(%rsi), %xmm2
738+
; SSE2-NEXT: pavgb 48(%rsi), %xmm3
739+
; SSE2-NEXT: movdqu %xmm3, (%rax)
740+
; SSE2-NEXT: movdqu %xmm2, (%rax)
741+
; SSE2-NEXT: movdqu %xmm1, (%rax)
742+
; SSE2-NEXT: movdqu %xmm0, (%rax)
739743
; SSE2-NEXT: retq
740744
;
741745
; AVX1-LABEL: avg_v64i8_2:
742746
; AVX1: # %bb.0:
743-
; AVX1-NEXT: vmovaps (%rsi), %ymm0
744-
; AVX1-NEXT: vmovaps 32(%rsi), %ymm1
745-
; AVX1-NEXT: vmovups %ymm1, (%rax)
746-
; AVX1-NEXT: vmovups %ymm0, (%rax)
747-
; AVX1-NEXT: vzeroupper
747+
; AVX1-NEXT: vmovdqa (%rdi), %xmm0
748+
; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1
749+
; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2
750+
; AVX1-NEXT: vmovdqa 48(%rdi), %xmm3
751+
; AVX1-NEXT: vpavgb (%rsi), %xmm0, %xmm0
752+
; AVX1-NEXT: vpavgb 16(%rsi), %xmm1, %xmm1
753+
; AVX1-NEXT: vpavgb 32(%rsi), %xmm2, %xmm2
754+
; AVX1-NEXT: vpavgb 48(%rsi), %xmm3, %xmm3
755+
; AVX1-NEXT: vmovdqu %xmm3, (%rax)
756+
; AVX1-NEXT: vmovdqu %xmm2, (%rax)
757+
; AVX1-NEXT: vmovdqu %xmm1, (%rax)
758+
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
748759
; AVX1-NEXT: retq
749760
;
750761
; AVX2-LABEL: avg_v64i8_2:
751762
; AVX2: # %bb.0:
752-
; AVX2-NEXT: vmovaps (%rsi), %ymm0
753-
; AVX2-NEXT: vmovaps 32(%rsi), %ymm1
754-
; AVX2-NEXT: vmovups %ymm1, (%rax)
755-
; AVX2-NEXT: vmovups %ymm0, (%rax)
763+
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
764+
; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
765+
; AVX2-NEXT: vpavgb (%rsi), %ymm0, %ymm0
766+
; AVX2-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1
767+
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
768+
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
756769
; AVX2-NEXT: vzeroupper
757770
; AVX2-NEXT: retq
758771
;
759-
; AVX512-LABEL: avg_v64i8_2:
760-
; AVX512: # %bb.0:
761-
; AVX512-NEXT: vmovaps (%rsi), %zmm0
762-
; AVX512-NEXT: vmovups %zmm0, (%rax)
763-
; AVX512-NEXT: vzeroupper
764-
; AVX512-NEXT: retq
772+
; AVX512F-LABEL: avg_v64i8_2:
773+
; AVX512F: # %bb.0:
774+
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
775+
; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
776+
; AVX512F-NEXT: vpavgb (%rsi), %ymm0, %ymm0
777+
; AVX512F-NEXT: vpavgb 32(%rsi), %ymm1, %ymm1
778+
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
779+
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
780+
; AVX512F-NEXT: vzeroupper
781+
; AVX512F-NEXT: retq
782+
;
783+
; AVX512BW-LABEL: avg_v64i8_2:
784+
; AVX512BW: # %bb.0:
785+
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
786+
; AVX512BW-NEXT: vpavgb (%rsi), %zmm0, %zmm0
787+
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rax)
788+
; AVX512BW-NEXT: vzeroupper
789+
; AVX512BW-NEXT: retq
765790
%1 = load <64 x i8>, ptr %a
766791
%2 = load <64 x i8>, ptr %b
767792
%3 = zext <64 x i8> %1 to <64 x i32>
768793
%4 = zext <64 x i8> %2 to <64 x i32>
769-
%5 = add nuw nsw <64 x i32> %4, %4
794+
%5 = add nuw nsw <64 x i32> %3, %4
770795
%6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
771796
%7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
772797
%8 = trunc <64 x i32> %7 to <64 x i8>
773798
store <64 x i8> %8, ptr undef, align 4
774799
ret void
775800
}
776801

777-
778802
define void @avg_v4i16_2(ptr %a, ptr %b) nounwind {
779803
; SSE2-LABEL: avg_v4i16_2:
780804
; SSE2: # %bb.0:

0 commit comments

Comments
 (0)