Skip to content

Commit e3f9b4c

Browse files
authored
[X86] Relax vector element width constraint on SSE pmul/madd asm comments (#163590)
As noticed on #163567 - if the constant pool data wasn't the expected element size for the instruction, we weren't adding the asm comment at all
1 parent 4773751 commit e3f9b4c

File tree

8 files changed

+35
-39
lines changed

8 files changed

+35
-39
lines changed

llvm/lib/Target/X86/X86MCInstLower.cpp

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,16 +1946,14 @@ static void addConstantComments(const MachineInstr *MI,
19461946
CASE_ARITH_RM(PMADDUBSW) {
19471947
unsigned SrcIdx = getSrcIdx(MI, 1);
19481948
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1949-
if (C->getType()->getScalarSizeInBits() == 8) {
1950-
std::string Comment;
1951-
raw_string_ostream CS(Comment);
1952-
unsigned VectorWidth =
1953-
X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1954-
CS << "[";
1955-
printConstant(C, VectorWidth, CS);
1956-
CS << "]";
1957-
OutStreamer.AddComment(CS.str());
1958-
}
1949+
std::string Comment;
1950+
raw_string_ostream CS(Comment);
1951+
unsigned VectorWidth =
1952+
X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1953+
CS << "[";
1954+
printConstant(C, VectorWidth, CS);
1955+
CS << "]";
1956+
OutStreamer.AddComment(CS.str());
19591957
}
19601958
break;
19611959
}
@@ -1967,16 +1965,14 @@ static void addConstantComments(const MachineInstr *MI,
19671965
CASE_ARITH_RM(PMULHRSW) {
19681966
unsigned SrcIdx = getSrcIdx(MI, 1);
19691967
if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) {
1970-
if (C->getType()->getScalarSizeInBits() == 16) {
1971-
std::string Comment;
1972-
raw_string_ostream CS(Comment);
1973-
unsigned VectorWidth =
1974-
X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1975-
CS << "[";
1976-
printConstant(C, VectorWidth, CS);
1977-
CS << "]";
1978-
OutStreamer.AddComment(CS.str());
1979-
}
1968+
std::string Comment;
1969+
raw_string_ostream CS(Comment);
1970+
unsigned VectorWidth =
1971+
X86::getVectorRegisterWidth(MI->getDesc().operands()[0]);
1972+
CS << "[";
1973+
printConstant(C, VectorWidth, CS);
1974+
CS << "]";
1975+
OutStreamer.AddComment(CS.str());
19801976
}
19811977
break;
19821978
}

llvm/test/CodeGen/X86/combine-udiv.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -502,11 +502,11 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
502502
; SSE2-NEXT: por %xmm2, %xmm1
503503
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
504504
; SSE2-NEXT: psubw %xmm1, %xmm0
505-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
505+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0]
506506
; SSE2-NEXT: paddw %xmm1, %xmm0
507507
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,0,65535,65535,0]
508508
; SSE2-NEXT: pandn %xmm0, %xmm1
509-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
509+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,16,0,8,8,0,0,0,0,0,2,0,2,0,0,0]
510510
; SSE2-NEXT: por %xmm1, %xmm0
511511
; SSE2-NEXT: retq
512512
;
@@ -517,7 +517,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
517517
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
518518
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
519519
; SSE41-NEXT: psubw %xmm1, %xmm0
520-
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
520+
; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,0,0]
521521
; SSE41-NEXT: paddw %xmm1, %xmm0
522522
; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4096,2048,8,u,u,2,2,u]
523523
; SSE41-NEXT: pmulhuw %xmm0, %xmm1
@@ -530,7 +530,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
530530
; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
531531
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
532532
; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
533-
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
533+
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0]
534534
; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
535535
; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u]
536536
; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7]
@@ -541,7 +541,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) {
541541
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
542542
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2]
543543
; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0
544-
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
544+
; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,0,0]
545545
; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0
546546
; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
547547
; XOP-NEXT: retq
@@ -630,7 +630,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
630630
; SSE2-NEXT: pand %xmm1, %xmm2
631631
; SSE2-NEXT: pxor %xmm3, %xmm3
632632
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
633-
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633+
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [171,0,0,0]
634634
; SSE2-NEXT: psrlw $15, %xmm0
635635
; SSE2-NEXT: pandn %xmm0, %xmm1
636636
; SSE2-NEXT: por %xmm2, %xmm1
@@ -641,7 +641,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
641641
; SSE41: # %bb.0:
642642
; SSE41-NEXT: movdqa %xmm0, %xmm1
643643
; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
644-
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
644+
; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [171,0,0,0]
645645
; SSE41-NEXT: psrlw $8, %xmm2
646646
; SSE41-NEXT: packuswb %xmm2, %xmm2
647647
; SSE41-NEXT: psrlw $7, %xmm2
@@ -654,7 +654,7 @@ define <16 x i8> @combine_vec_udiv_nonuniform4(<16 x i8> %x) {
654654
; AVX-LABEL: combine_vec_udiv_nonuniform4:
655655
; AVX: # %bb.0:
656656
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
657-
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
657+
; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [171,0,0,0]
658658
; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1
659659
; AVX-NEXT: vpackuswb %xmm1, %xmm1, %xmm1
660660
; AVX-NEXT: vpsrlw $7, %xmm1, %xmm1
@@ -691,7 +691,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) {
691691
; SSE2-NEXT: psubw %xmm3, %xmm0
692692
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,32768,0,0,0,0,0,32768]
693693
; SSE2-NEXT: paddw %xmm3, %xmm0
694-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
694+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [u,u,0,4,0,4,16,0,4,0,0,4,0,0,0,16]
695695
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
696696
; SSE2-NEXT: por %xmm3, %xmm0
697697
; SSE2-NEXT: pand %xmm1, %xmm0

llvm/test/CodeGen/X86/madd.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2071,7 +2071,7 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) {
20712071
; AVX1: # %bb.0:
20722072
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1
20732073
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
2074-
; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
2074+
; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,7,42,32]
20752075
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
20762076
; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0
20772077
; AVX1-NEXT: retq

llvm/test/CodeGen/X86/vector-fshr-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,7 +1872,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
18721872
; SSE2: # %bb.0:
18731873
; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535]
18741874
; SSE2-NEXT: pandn %xmm1, %xmm2
1875-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
1875+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2]
18761876
; SSE2-NEXT: por %xmm1, %xmm2
18771877
; SSE2-NEXT: paddw %xmm0, %xmm0
18781878
; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,16384,8192,4096,2048,1024,512,256]
@@ -1964,7 +1964,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
19641964
; X86-SSE2: # %bb.0:
19651965
; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [0,65535,65535,65535,65535,65535,65535,65535]
19661966
; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1967-
; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
1967+
; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2]
19681968
; X86-SSE2-NEXT: por %xmm1, %xmm2
19691969
; X86-SSE2-NEXT: paddw %xmm0, %xmm0
19701970
; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256]

llvm/test/CodeGen/X86/vector-reduce-add-mask.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ define i16 @test_v4i16_v4i8(<4 x i16> %a0) {
851851
; SSE2: # %bb.0:
852852
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
853853
; SSE2-NEXT: pandn %xmm0, %xmm1
854-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
854+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u]
855855
; SSE2-NEXT: por %xmm1, %xmm0
856856
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
857857
; SSE2-NEXT: paddw %xmm0, %xmm1

llvm/test/CodeGen/X86/vector-shift-ashr-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind {
16731673
; SSE2: # %bb.0:
16741674
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535]
16751675
; SSE2-NEXT: pandn %xmm0, %xmm1
1676-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1676+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4]
16771677
; SSE2-NEXT: por %xmm1, %xmm0
16781678
; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
16791679
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2]
@@ -1750,7 +1750,7 @@ define <16 x i8> @constant_shift_v16i8_pairs(<16 x i8> %a) nounwind {
17501750
; X86-SSE: # %bb.0:
17511751
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,0,65535,65535]
17521752
; X86-SSE-NEXT: pandn %xmm0, %xmm1
1753-
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1753+
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,128,0,16,0,2,0,32,0,64,0,0,0,8,0,4]
17541754
; X86-SSE-NEXT: por %xmm1, %xmm0
17551755
; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
17561756
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [64,64,8,8,1,1,16,16,32,32,128,128,4,4,2,2]

llvm/test/CodeGen/X86/vector-shift-lshr-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,7 +1223,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
12231223
; SSE2: # %bb.0:
12241224
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
12251225
; SSE2-NEXT: pandn %xmm0, %xmm1
1226-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1226+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2]
12271227
; SSE2-NEXT: por %xmm1, %xmm0
12281228
; SSE2-NEXT: retq
12291229
;
@@ -1275,7 +1275,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
12751275
; X86-SSE: # %bb.0:
12761276
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
12771277
; X86-SSE-NEXT: pandn %xmm0, %xmm1
1278-
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1278+
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,0,16,0,8,0,4,0,2]
12791279
; X86-SSE-NEXT: por %xmm1, %xmm0
12801280
; X86-SSE-NEXT: retl
12811281
%shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>

llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,7 +1480,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
14801480
; SSE2: # %bb.0:
14811481
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
14821482
; SSE2-NEXT: pandn %xmm0, %xmm1
1483-
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1483+
; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u]
14841484
; SSE2-NEXT: por %xmm1, %xmm0
14851485
; SSE2-NEXT: retq
14861486
;
@@ -1532,7 +1532,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind {
15321532
; X86-SSE: # %bb.0:
15331533
; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
15341534
; X86-SSE-NEXT: pandn %xmm0, %xmm1
1535-
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1535+
; X86-SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,0,128,0,64,0,32,u,u,u,u,u,u,u,u]
15361536
; X86-SSE-NEXT: por %xmm1, %xmm0
15371537
; X86-SSE-NEXT: retl
15381538
%shift = lshr <4 x i16> %a, <i16 0, i16 1, i16 2, i16 3>

0 commit comments

Comments
 (0)