-   Notifications  You must be signed in to change notification settings 
- Fork 15k
[GlobalOpt] Add range metadata to loads from constant global variables #127695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
| @llvm/pr-subscribers-llvm-transforms Author: None (Ralender) ChangesThis Change fixes #125003 I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline. Full diff: https://github.com/llvm/llvm-project/pull/127695.diff 2 Files Affected: 
 diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 9586fc97a39f7..7744dde2965e6 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -2498,6 +2499,102 @@ OptimizeGlobalAliases(Module &M, return Changed; } +static bool AddRangeMetadata(Module &M) { + const DataLayout &DL = M.getDataLayout(); + bool Changed = false; + + for (GlobalValue &Global : M.global_values()) { + + auto *GV = dyn_cast<GlobalVariable>(&Global); + if (!GV || !GV->hasDefinitiveInitializer()) + continue; + + // To be able to go to the next GlobalVariable with a return + [&] { + uint64_t GlobalByteSize = DL.getTypeAllocSize(GV->getValueType()); + unsigned BW = DL.getIndexTypeSizeInBits(GV->getType()); + + SmallVector<LoadInst *> ArrayLikeLoads; + Type *ElemTy = nullptr; + + for (User *U : GV->users()) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { + Type *GEPElemTy = GEP->getResultElementType(); + if (!GEP->isInBounds() || !GEPElemTy->isIntegerTy()) + continue; + + // This restriction that all accesses use the same type could be + // lifted + if (!ElemTy) + ElemTy = GEPElemTy; + else if (ElemTy != GEPElemTy) + return; + + SmallMapVector<Value *, APInt, 4> Index; + APInt CstOffset(BW, 0); + GEP->collectOffset(DL, BW, Index, CstOffset); + + // This check is needed for correctness of the code below. + // Be we could only traverse the range starting at the constant offset + if (!CstOffset.isAligned(DL.getPrefTypeAlign(GEPElemTy))) + return; + + // The restriction that this is a 1D array could be lifted + if (Index.size() != 1 || + Index.front().second != DL.getTypeAllocSize(GEPElemTy)) + return; + + for (User *U : GEP->users()) { + if (auto *LI = dyn_cast<LoadInst>(U)) { + // This restriction that all accesses use the same type could be + // lifted + if (LI->getType() == GEPElemTy) + ArrayLikeLoads.push_back(LI); + else + return; + } + } + } + } + + if (ArrayLikeLoads.empty()) + return; + + APInt Idx = APInt::getZero(64); + APInt Min = APInt::getSignedMaxValue( + ArrayLikeLoads[0]->getType()->getIntegerBitWidth()); + APInt Max = APInt::getSignedMinValue( + ArrayLikeLoads[0]->getType()->getIntegerBitWidth()); + + uint64_t ElemSize = DL.getTypeStoreSize(ArrayLikeLoads[0]->getType()); + uint64_t NumElem = + GlobalByteSize / DL.getTypeStoreSize(ArrayLikeLoads[0]->getType()); + for (uint64_t i = 0; i < NumElem; i++) { + Constant *Cst = ConstantFoldLoadFromConstPtr( + GV, ArrayLikeLoads[0]->getType(), Idx, DL); + + if (!Cst) + return; + + Idx += ElemSize; + + // MD_range data is expected in signed order, so we use smin and smax + // here + Min = APIntOps::smin(Min, Cst->getUniqueInteger()); + Max = APIntOps::smax(Max, Cst->getUniqueInteger()); + } + + llvm::MDBuilder MDHelper(M.getContext()); + // The Range is allowed to wrap + llvm::MDNode *RNode = MDHelper.createRange(Min, Max + 1); + for (LoadInst *LI : ArrayLikeLoads) + LI->setMetadata(LLVMContext::MD_range, RNode); + Changed = true; + }(); + } + return Changed; +} + static Function * FindAtExitLibFunc(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI, @@ -2887,6 +2984,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL, Changed |= LocalChange; } + // Add range metadata to loads from constant global variables based on the + // values that could be loaded from the variable + Changed |= AddRangeMetadata(M); + // TODO: Move all global ctors functions to the end of the module for code // layout. diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll new file mode 100644 index 0000000000000..230e9f12726be --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p globalopt -S %s | FileCheck %s + +@gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8 +@gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8 +@gvar2 = global { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 2, [253 x i64] zeroinitializer }> }, align 8 +@gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16 +@gvar4 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16 +@gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1 + +define i64 @test_basic0(i64 %3) { +; CHECK-LABEL: define i64 @test_basic0( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i64 @test_basic1(i64 %3) { +; CHECK-LABEL: define i64 @test_basic1( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i32 @test_different_type(i64 %3) { +; CHECK-LABEL: define i32 @test_different_type( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i32 @test_non_constant(i64 %3) { +; CHECK-LABEL: define i32 @test_non_constant( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i64 @test_other(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_other( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +; This could be supported but is rare and more complex for for now we dont process it. +define i64 @test_multiple_types0(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_multiple_types0( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar4, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar4, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i32 @test_multiple_types1(i8 %first_idx) { +; CHECK-LABEL: define i32 @test_multiple_types1( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar4, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i32, ptr @gvar4, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 8 + ret i32 %0 +} + +; This could be supported also be supported, but for now it not. +define dso_local noundef signext i8 @multi_dimentional(i8 noundef zeroext %0, i8 noundef zeroext %1) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef signext i8 @multi_dimentional( +; CHECK-SAME: i8 noundef zeroext [[TMP0:%.*]], i8 noundef zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: ret i8 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4 + %6 = load i8, ptr %5, align 1 + ret i8 %6 +} + +;. +; CHECK: [[RNG0]] = !{i64 -5, i64 11} +; CHECK: [[RNG1]] = !{i32 0, i32 6} +; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476} +;.  | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like a reasonable idea.
| See also the fold at llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp Lines 829 to 916 in db5bc8e 
 | 
| 
 The only thing I dont understand Why it does llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp Lines 844 to 846 in db5bc8e 
 | 
c2123a2 to ad4e92e   Compare   | ✅ With the latest revision this PR passed the C/C++ code formatter. | 
ad4e92e to 6ffd9ae   Compare   | I added the logic similar to the  after having wrote the code, maybe the process to find closed-forms of the Stride and Offset with alignment are too complicated and error prone. but this should be able to handle a wide variety of GEPs. | 
| ping | 
| @nikic ping | 
6ffd9ae to eccfa9f   Compare   | rebased | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you rebase the branch again? Then I can run some external tests for this patch.
| if (!isa_and_nonnull<ConstantInt>(Cst)) | ||
| // Lambda captures of a struct binding is only available starting | ||
| // in C++20, so we skip to the next element with goto | ||
| goto NextGroup; | ||
|  | ||
| // MD_range is order agnostics | ||
| SMin = APIntOps::smin(SMin, Cst->getUniqueInteger()); | ||
| SMax = APIntOps::smax(SMax, Cst->getUniqueInteger()); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (!isa_and_nonnull<ConstantInt>(Cst)) | |
| // Lambda captures of a struct binding is only available starting | |
| // in C++20, so we skip to the next element with goto | |
| goto NextGroup; | |
| // MD_range is order agnostics | |
| SMin = APIntOps::smin(SMin, Cst->getUniqueInteger()); | |
| SMax = APIntOps::smax(SMax, Cst->getUniqueInteger()); | |
| if (auto *CI = dyn_cast<ConstantInt>(Cst)) { | |
| // MD_range is order agnostics | |
| SMin = APIntOps::smin(SMin, CI->getValue()); | |
| SMax = APIntOps::smax(SMax, CI->getValue()); | |
| } | |
| else { | |
| // Lambda captures of a struct binding is only available starting | |
| // in C++20, so we skip to the next element with goto | |
| goto NextGroup; | |
| } | 
| ret i32 %0 | ||
| } | ||
|  | ||
| ; This could be supported also be supported, but for now it not. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; This could be supported also be supported, but for now it not. | |
| ; This could also be supported, but for now it is not. | 
| } | ||
|  | ||
| ; This could be supported also be supported, but for now it not. | ||
| define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 { | |
| define i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) { | 
| Changed = true; | ||
| if (SMin == SMax) { | ||
| for (LoadInst *LI : Loads) | ||
| LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin)); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing tests for this case.
| LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin)); | ||
| } else { | ||
| // The Range is allowed to wrap | ||
| MDNode *RNode = MDHelper.createRange(SMin, SMax + 1); | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should not add the range metadata for [INT_MIN, INT_MAX]. It will assert since we don't know if it represents a full range or an empty range.
 
llvm-project/llvm/lib/IR/ConstantRange.cpp
Lines 2268 to 2288 in 2ba5e0a
| ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) { | |
| const unsigned NumRanges = Ranges.getNumOperands() / 2; | |
| assert(NumRanges >= 1 && "Must have at least one range!"); | |
| assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); | |
| auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0)); | |
| auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1)); | |
| ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); | |
| for (unsigned i = 1; i < NumRanges; ++i) { | |
| auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); | |
| auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); | |
| // Note: unionWith will potentially create a range that contains values not | |
| // contained in any of the original N ranges. | |
| CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); | |
| } | |
| return CR; | |
| } | 
llvm-project/llvm/lib/IR/ConstantRange.cpp
Lines 56 to 57 in 2ba5e0a
| assert((Lower != Upper || (Lower.isMaxValue() || Lower.isMinValue())) && | |
| "Lower == Upper, but they aren't min or max value!"); | 
|  | ||
| // Commented out because I dont understand why we would need this | ||
| // But it was part of getStrideAndModOffsetOfGEP | ||
| // // Only keep a power of two factor for non-inbounds | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Related patch: https://reviews.llvm.org/D146622
 cc @khei4 @nikic
| } | ||
| } | ||
|  | ||
| for (auto [AP, Loads] : LoadsByAccess) { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (auto [AP, Loads] : LoadsByAccess) { | |
| for (auto &[AP, Loads] : LoadsByAccess) { | 
| if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset)) | ||
| continue; | ||
|  | ||
| for (auto [V, Scale] : VarOffsets) { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (auto [V, Scale] : VarOffsets) { | |
| for (auto &[V, Scale] : VarOffsets) { | 
This Change fixes #125003
I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline.
Also the logic only act on linear 1D arrays when all access to one global use the same type. but these resitriction could be lifted if there is interest.