- Notifications
You must be signed in to change notification settings - Fork 14.9k
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow SSE41 phminposuw intrinsic to be used in constexp #163041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Shawn K (kimsh02) ChangesFix #161336 Full diff: https://github.com/llvm/llvm-project/pull/163041.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 217589d7add1d..1cd72707bf93f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -327,8 +327,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; - def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; - def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; + def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, " + "_Vector<16, char>, _Constant char)">; } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -341,6 +341,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">; def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 922d67940e22f..1b3eb4da69ec7 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2860,6 +2860,46 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + const Pointer &Source = S.Stk.pop<Pointer>(); + const Pointer &Dest = S.Stk.peek<Pointer>(); + + unsigned SourceLen = Source.getNumElems(); + QualType ElemQT = getElemType(Source); + OptPrimType ElemPT = S.getContext().classify(ElemQT); + unsigned LaneBitWidth = S.getASTContext().getTypeSize(ElemQT); + + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + APInt MinIndex(LaneBitWidth, 0); + APInt MinVal = Source.elem<T>(0).toAPSInt(); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Val = Source.elem<T>(I).toAPSInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + bool DestUnsigned = Call->getCallReturnType(S.getASTContext()) + ->castAs<VectorType>() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + Dest.elem<T>(0) = static_cast<T>(APSInt(MinVal, DestUnsigned)); + Dest.elem<T>(1) = static_cast<T>(APSInt(MinIndex, DestUnsigned)); + for (unsigned I = 2; I != SourceLen; ++I) { + Dest.elem<T>(I) = + static_cast<T>(APSInt(APInt(LaneBitWidth, 0), DestUnsigned)); + } + }); + Dest.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, const CallExpr *Call, bool MaskZ) { assert(Call->getNumArgs() == 5); @@ -3809,6 +3849,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_phminposuw128: { + return interp__builtin_ia32_phminposuw(S, OpPC, Call); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 35a866ea5010f..e686d17522260 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12216,6 +12216,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_phminposuw128: { + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>(); + QualType ElemQT = VT->getElementType(); + unsigned LaneBitWidth = Info.Ctx.getTypeSize(ElemQT); + + APInt MinIndex(LaneBitWidth, 0); + APInt MinVal = Source.getVectorElt(0).getInt(); + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Val = Source.getVectorElt(I).getInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + bool ResultUnsigned = E->getCallReturnType(Info.Ctx) + ->castAs<VectorType>() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + SmallVector<APValue, 8> Result; + Result.reserve(SourceLen); + Result.emplace_back(APSInt(MinVal, ResultUnsigned)); + Result.emplace_back(APSInt(MinIndex, ResultUnsigned)); + for (unsigned I = 0; I != SourceLen - 2; ++I) { + Result.emplace_back(APSInt(APInt(LaneBitWidth, 0), ResultUnsigned)); + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 5e63a1ae321bc..1cc2c7b9371a2 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) { /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index eee479a755ab4..28b9b83237c88 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) { // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}}) return _mm_minpos_epu16(x); } +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0)); __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mpsadbw_epu8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a couple of minors
7427732
to d5d98b0
Compare 0b5295c
to 92e4007
Compare @RKSimon This PR can take a look |
->isUnsignedIntegerOrEnumerationType(); | ||
| ||
INT_TYPE_SWITCH_NO_BOOL(*ElemT, { | ||
APSInt MinIndex(ElemBitWidth, DestUnsigned); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Its probably better to just use:
APSInt MinIndex(ElemBitWidth, DestUnsigned); | |
unsigned MinIndex = 0; |
We never make use of APSInt/APInt features for the index.
Fix #161336