- Notifications
You must be signed in to change notification settings - Fork 15.6k
[InstCombine] Fold trunc(umin/umax(zext(x), y & mask)) to narrower umin/umax #173221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-llvm-transforms Author: XinlongWu (Xinlong-Wu) ChangesExtend Also improve this PR fix the issue #167096 Full diff: https://github.com/llvm/llvm-project/pull/173221.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 0cd2c09726a2d..733d4e44db7a2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -21,9 +21,11 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/KnownBits.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include <iterator> @@ -53,10 +55,29 @@ static Value *EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, Instruction *Res = nullptr; unsigned Opc = I->getOpcode(); switch (Opc) { + case Instruction::And: { + APInt LowBitMask = APInt::getLowBitsSet(I->getType()->getScalarSizeInBits(), + Ty->getScalarSizeInBits()); + Value *MaskedValue; + const APInt *AndMask; + if (match(I, m_And(m_Value(MaskedValue), m_APInt(AndMask)))) { + Res = CastInst::CreateIntegerCast(MaskedValue, Ty, isSigned); + // if the and operation do a standard narrowing, we can just cast the + // masked value otherwise, we also need to and the casted value with the + // low bits mask + if (LowBitMask != *AndMask) { + Value *CastedValue = IC.InsertNewInstWith(Res, I->getIterator()); + Res = BinaryOperator::CreateAnd( + CastedValue, + ConstantInt::get(Ty, AndMask->trunc(Ty->getScalarSizeInBits()))); + } + break; + } + [[fallthrough]]; + } case Instruction::Add: case Instruction::Sub: case Instruction::Mul: - case Instruction::And: case Instruction::Or: case Instruction::Xor: case Instruction::AShr: @@ -122,6 +143,17 @@ static Value *EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, Res = CallInst::Create(Fn->getFunctionType(), Fn); break; } + case Intrinsic::umin: + case Intrinsic::umax: { + Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, + IC, Processed); + Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, + IC, Processed); + Function *Fn = Intrinsic::getOrInsertDeclaration( + I->getModule(), II->getIntrinsicID(), {Ty}); + Res = CallInst::Create(Fn->getFunctionType(), Fn, {LHS, RHS}); + break; + } } } break; @@ -489,10 +521,21 @@ bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty, auto *I = cast<Instruction>(V); Type *OrigTy = V->getType(); switch (I->getOpcode()) { + case Instruction::And: { + // And can be truncated if all the truncated bits are zero. + uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!"); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + if (IC.MaskedValueIsZero(I->getOperand(0), Mask, CxtI) || + IC.MaskedValueIsZero(I->getOperand(1), Mask, CxtI)) { + return true; + } + [[fallthrough]]; + } case Instruction::Add: case Instruction::Sub: case Instruction::Mul: - case Instruction::And: case Instruction::Or: case Instruction::Xor: // These operators can all arbitrarily be extended or truncated. @@ -607,6 +650,21 @@ bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty, return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) && canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI); + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umin: { + Value *Op0 = II->getArgOperand(0); + Value *Op1 = II->getArgOperand(1); + return canEvaluateTruncatedImpl(Op0, Ty, IC, CxtI) && + canEvaluateTruncatedImpl(Op1, Ty, IC, CxtI); + } + default: + break; + } + } + break; + default: // TODO: Can handle more cases here. break; diff --git a/llvm/test/Transforms/InstCombine/cast-minmax-call.ll b/llvm/test/Transforms/InstCombine/cast-minmax-call.ll new file mode 100644 index 0000000000000..9d4fb229f3483 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/cast-minmax-call.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i16 @src(i32 %arg1, i32 %arg2, ptr %arg0) { +; CHECK-LABEL: define i16 @src( +; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], ptr [[ARG0:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = udiv i32 [[ARG2]], [[ARG1]] +; CHECK-NEXT: [[V1:%.*]] = load i16, ptr [[ARG0]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[V0]] to i16 +; CHECK-NEXT: [[V2:%.*]] = and i16 [[TMP1]], -6 +; CHECK-NEXT: [[V4:%.*]] = call i16 @llvm.umin.i16(i16 [[V2]], i16 [[V1]]) +; CHECK-NEXT: ret i16 [[V4]] +; + %v0 = udiv i32 %arg2, %arg1 + %v1 = load i16, ptr %arg0, align 2 + %v2 = and i32 %v0, 65530 + %v3 = zext i16 %v1 to i32 + %v4 = call i32 @llvm.umin.i32(i32 %v2, i32 %v3) + %v5 = trunc nuw i32 %v4 to i16 + ret i16 %v5 +} + +define i16 @src1(i32 %arg1, i32 %arg2, ptr %arg0) { +; CHECK-LABEL: define i16 @src1( +; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], ptr [[ARG0:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = udiv i32 [[ARG2]], [[ARG1]] +; CHECK-NEXT: [[V1:%.*]] = load i16, ptr [[ARG0]], align 2 +; CHECK-NEXT: [[V2:%.*]] = trunc i32 [[V0]] to i16 +; CHECK-NEXT: [[V4:%.*]] = call i16 @llvm.umin.i16(i16 [[V2]], i16 [[V1]]) +; CHECK-NEXT: ret i16 [[V4]] +; + %v0 = udiv i32 %arg2, %arg1 + %v1 = load i16, ptr %arg0, align 2 + %v2 = and i32 %v0, 65535 + %v3 = zext i16 %v1 to i32 + %v4 = call i32 @llvm.umin.i32(i32 %v2, i32 %v3) + %v5 = trunc nuw i32 %v4 to i16 + ret i16 %v5 +} |
🪟 Windows x64 Test Results
Failed Tests(click on a test name to see its output) ClangClang.CodeGen/bitfield-2.cLLVMLLVM.Transforms/IndVarSimplify/X86/2009-04-15-shorten-iv-vars-2.llLLVM.Transforms/InstCombine/cast-mul-select.llLLVM.Transforms/InstCombine/cast.llLLVM.Transforms/InstCombine/funnel.llLLVM.Transforms/InstCombine/icmp-mul-zext.llLLVM.Transforms/InstCombine/instcombine-verify-known-bits.llLLVM.Transforms/InstCombine/rotate.llLLVM.Transforms/InstCombine/sadd_sat.llLLVM.Transforms/InstCombine/sext-of-trunc-nsw.llLLVM.Transforms/InstCombine/shift.llLLVM.Transforms/InstCombine/trunc-binop-ext.llLLVM.Transforms/InstCombine/trunc-inseltpoison.llLLVM.Transforms/InstCombine/trunc.llLLVM.Transforms/LoopVectorize/reduction-inloop-pred.llLLVM.Transforms/PhaseOrdering/X86/pr52289.llLLVM.Transforms/PhaseOrdering/bitfield-bittests.llLLVM.Transforms/PhaseOrdering/cmp-logic.llLLVM.Transforms/PhaseOrdering/reassociate-after-unroll.llIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
| switch (I->getOpcode()) { | ||
| case Instruction::And: { | ||
| // And can be truncated if all the truncated bits are zero. | ||
| uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use unsigned to match the declared type of getScalarSizeInBits
| break; | ||
| } | ||
| case Intrinsic::umin: | ||
| case Intrinsic::umax: { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are no tests for umax
nikic left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm confused by your and changes. and can be unconditionally truncated, why does it need special handling?
Conversely, umin/umax can not be unconditionally truncated, we need to be checking that the high bits are zero for those, which seems to be missing? Please provide alive2 proofs.
Extend
canEvaluateTruncatedto handleumin/umaxintrinsics, allowingthem to be narrowed when the result is truncated and operands can be safely
evaluated in a narrower type.
Also improve
andinstruction handling: if high bits above the truncationwidth are known zero in either operand, the
andcan be safely narrowed(truncate first, then apply the truncated mask if needed).
this PR fix the issue #167096