[GlobalOpt] Add range metadata to loads from constant global variables #127695

Ralender · 2025-02-18T20:46:40Z

This Change fixes #125003

I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline.
Also the logic only act on linear 1D arrays when all access to one global use the same type. but these resitriction could be lifted if there is interest.

llvmbot · 2025-02-18T20:47:13Z

@llvm/pr-subscribers-llvm-transforms

Author: None (Ralender)

Changes

This Change fixes #125003

I put the process of extracting range metadata from global in GlobalOpt because it is thematically linked and GlobalOpt is only 2 times in the standard O3 pipeline.
Also the logic only act on linear 1D arrays when all access to one global use the same type. but these resitriction could be lifted if there is interest.

Full diff: https://github.com/llvm/llvm-project/pull/127695.diff

2 Files Affected:

(modified) llvm/lib/Transforms/IPO/GlobalOpt.cpp (+101)
(added) llvm/test/Transforms/GlobalOpt/add_range_metadata.ll (+129)

diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp index 9586fc97a39f7..7744dde2965e6 100644 --- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -2498,6 +2499,102 @@ OptimizeGlobalAliases(Module &M, return Changed; } +static bool AddRangeMetadata(Module &M) { + const DataLayout &DL = M.getDataLayout(); + bool Changed = false; + + for (GlobalValue &Global : M.global_values()) { + + auto *GV = dyn_cast<GlobalVariable>(&Global); + if (!GV || !GV->hasDefinitiveInitializer()) + continue; + + // To be able to go to the next GlobalVariable with a return + [&] { + uint64_t GlobalByteSize = DL.getTypeAllocSize(GV->getValueType()); + unsigned BW = DL.getIndexTypeSizeInBits(GV->getType()); + + SmallVector<LoadInst *> ArrayLikeLoads; + Type *ElemTy = nullptr; + + for (User *U : GV->users()) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) { + Type *GEPElemTy = GEP->getResultElementType(); + if (!GEP->isInBounds() || !GEPElemTy->isIntegerTy()) + continue; + + // This restriction that all accesses use the same type could be + // lifted + if (!ElemTy) + ElemTy = GEPElemTy; + else if (ElemTy != GEPElemTy) + return; + + SmallMapVector<Value *, APInt, 4> Index; + APInt CstOffset(BW, 0); + GEP->collectOffset(DL, BW, Index, CstOffset); + + // This check is needed for correctness of the code below. + // Be we could only traverse the range starting at the constant offset + if (!CstOffset.isAligned(DL.getPrefTypeAlign(GEPElemTy))) + return; + + // The restriction that this is a 1D array could be lifted + if (Index.size() != 1 || + Index.front().second != DL.getTypeAllocSize(GEPElemTy)) + return; + + for (User *U : GEP->users()) { + if (auto *LI = dyn_cast<LoadInst>(U)) { + // This restriction that all accesses use the same type could be + // lifted + if (LI->getType() == GEPElemTy) + ArrayLikeLoads.push_back(LI); + else + return; + } + } + } + } + + if (ArrayLikeLoads.empty()) + return; + + APInt Idx = APInt::getZero(64); + APInt Min = APInt::getSignedMaxValue( + ArrayLikeLoads[0]->getType()->getIntegerBitWidth()); + APInt Max = APInt::getSignedMinValue( + ArrayLikeLoads[0]->getType()->getIntegerBitWidth()); + + uint64_t ElemSize = DL.getTypeStoreSize(ArrayLikeLoads[0]->getType()); + uint64_t NumElem = + GlobalByteSize / DL.getTypeStoreSize(ArrayLikeLoads[0]->getType()); + for (uint64_t i = 0; i < NumElem; i++) { + Constant *Cst = ConstantFoldLoadFromConstPtr( + GV, ArrayLikeLoads[0]->getType(), Idx, DL); + + if (!Cst) + return; + + Idx += ElemSize; + + // MD_range data is expected in signed order, so we use smin and smax + // here + Min = APIntOps::smin(Min, Cst->getUniqueInteger()); + Max = APIntOps::smax(Max, Cst->getUniqueInteger()); + } + + llvm::MDBuilder MDHelper(M.getContext()); + // The Range is allowed to wrap + llvm::MDNode *RNode = MDHelper.createRange(Min, Max + 1); + for (LoadInst *LI : ArrayLikeLoads) + LI->setMetadata(LLVMContext::MD_range, RNode); + Changed = true; + }(); + } + return Changed; +} + static Function * FindAtExitLibFunc(Module &M, function_ref<TargetLibraryInfo &(Function &)> GetTLI, @@ -2887,6 +2984,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL, Changed |= LocalChange; } + // Add range metadata to loads from constant global variables based on the + // values that could be loaded from the variable + Changed |= AddRangeMetadata(M); + // TODO: Move all global ctors functions to the end of the module for code // layout. diff --git a/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll new file mode 100644 index 0000000000000..230e9f12726be --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/add_range_metadata.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -p globalopt -S %s | FileCheck %s + +@gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8 +@gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8 +@gvar2 = global { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 2, [253 x i64] zeroinitializer }> }, align 8 +@gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16 +@gvar4 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16 +@gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1 + +define i64 @test_basic0(i64 %3) { +; CHECK-LABEL: define i64 @test_basic0( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i64 @test_basic1(i64 %3) { +; CHECK-LABEL: define i64 @test_basic1( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]] +; CHECK-NEXT: ret i64 [[TMP2]] +; + %ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3 + %5 = load i64, ptr %ptr, align 8 + ret i64 %5 +} + +define i32 @test_different_type(i64 %3) { +; CHECK-LABEL: define i32 @test_different_type( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i32 @test_non_constant(i64 %3) { +; CHECK-LABEL: define i32 @test_non_constant( +; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %ptr = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 %3 + %5 = load i32, ptr %ptr, align 8 + ret i32 %5 +} + +define i64 @test_other(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_other( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +; This could be supported but is rare and more complex for for now we dont process it. +define i64 @test_multiple_types0(i8 %first_idx) { +; CHECK-LABEL: define i64 @test_multiple_types0( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar4, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: ret i64 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i64, ptr @gvar4, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + ret i64 %0 +} + +define i32 @test_multiple_types1(i8 %first_idx) { +; CHECK-LABEL: define i32 @test_multiple_types1( +; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar4, i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %idxprom = zext i8 %first_idx to i64 + %arrayidx = getelementptr inbounds i32, ptr @gvar4, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 8 + ret i32 %0 +} + +; This could be supported also be supported, but for now it not. +define dso_local noundef signext i8 @multi_dimentional(i8 noundef zeroext %0, i8 noundef zeroext %1) local_unnamed_addr #0 { +; CHECK-LABEL: define dso_local noundef signext i8 @multi_dimentional( +; CHECK-SAME: i8 noundef zeroext [[TMP0:%.*]], i8 noundef zeroext [[TMP1:%.*]]) local_unnamed_addr { +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1 +; CHECK-NEXT: ret i8 [[TMP6]] +; + %3 = zext i8 %0 to i64 + %4 = zext i8 %1 to i64 + %5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4 + %6 = load i8, ptr %5, align 1 + ret i8 %6 +} + +;. +; CHECK: [[RNG0]] = !{i64 -5, i64 11} +; CHECK: [[RNG1]] = !{i32 0, i32 6} +; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476} +;.

nikic

Looks like a reasonable idea.

llvm/lib/Transforms/IPO/GlobalOpt.cpp

nikic · 2025-02-18T21:34:01Z

See also the fold at

llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines 829 to 916 in db5bc8e

     // Calculate GEP Stride and accumulated const ModOffset. Return Stride and  
   // ModOffset  
   static std::pair<APInt, APInt>  
   getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {  
   unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());  
   std::optional<APInt> Stride;  
   APInt ModOffset(BW, 0);  
   // Return a minimum gep stride, greatest common divisor of consective gep  
   // index scales(c.f. Bézout's identity).  
   while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {  
   SmallMapVector<Value *, APInt, 4> VarOffsets;  
   if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))  
   break;  
    
   for (auto [V, Scale] : VarOffsets) {  
   // Only keep a power of two factor for non-inbounds  
   if (!GEP->isInBounds())  
   Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());  
    
   if (!Stride)  
   Stride = Scale;  
   else  
   Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);  
   }  
    
   PtrOp = GEP->getPointerOperand();  
   }  
    
   // Check whether pointer arrives back at Global Variable via at least one GEP.  
   // Even if it doesn't, we can check by alignment.  
   if (!isa<GlobalVariable>(PtrOp) || !Stride)  
   return {APInt(BW, 1), APInt(BW, 0)};  
    
   // In consideration of signed GEP indices, non-negligible offset become  
   // remainder of division by minimum GEP stride.  
   ModOffset = ModOffset.srem(*Stride);  
   if (ModOffset.isNegative())  
   ModOffset += *Stride;  
    
   return {*Stride, ModOffset};  
   }  
    
   /// If C is a constant patterned array and all valid loaded results for given  
   /// alignment are same to a constant, return that constant.  
   static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {  
   auto *LI = dyn_cast<LoadInst>(&I);  
   if (!LI || LI->isVolatile())  
   return false;  
    
   // We can only fold the load if it is from a constant global with definitive  
   // initializer. Skip expensive logic if this is not the case.  
   auto *PtrOp = LI->getPointerOperand();  
   auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));  
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())  
   return false;  
    
   // Bail for large initializers in excess of 4K to avoid too many scans.  
   Constant *C = GV->getInitializer();  
   uint64_t GVSize = DL.getTypeAllocSize(C->getType());  
   if (!GVSize || 4096 < GVSize)  
   return false;  
    
   Type *LoadTy = LI->getType();  
   unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());  
   auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);  
    
   // Any possible offset could be multiple of GEP stride. And any valid  
   // offset is multiple of load alignment, so checking only multiples of bigger  
   // one is sufficient to say results' equality.  
   if (auto LA = LI->getAlign();  
   LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {  
   ConstOffset = APInt(BW, 0);  
   Stride = APInt(BW, LA.value());  
   }  
    
   Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);  
   if (!Ca)  
   return false;  
    
   unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);  
   for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)  
   if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))  
   return false;  
    
   I.replaceAllUsesWith(Ca);  
    
   return true;  
   }  
 

. It basically implements the same logic you need here, in a somewhat more general way. It would probably make sense to merge that fold into yours as a followup, because it basically handles the special case where the range reduces to a single element.

Ralender · 2025-02-19T18:39:48Z

See also the fold at

llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines 829 to 916 in db5bc8e

// Calculate GEP Stride and accumulated const ModOffset. Return Stride and

// ModOffset

static std::pair<APInt, APInt>

getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {

unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

std::optional<APInt> Stride;

APInt ModOffset(BW, 0);

// Return a minimum gep stride, greatest common divisor of consective gep

// index scales(c.f. Bézout's identity).

while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {

SmallMapVector<Value *, APInt, 4> VarOffsets;

if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))

break;

for (auto [V, Scale] : VarOffsets) {

// Only keep a power of two factor for non-inbounds

if (!GEP->isInBounds())

Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());

if (!Stride)

Stride = Scale;

else

Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);

}

PtrOp = GEP->getPointerOperand();

}

// Check whether pointer arrives back at Global Variable via at least one GEP.

// Even if it doesn't, we can check by alignment.

if (!isa<GlobalVariable>(PtrOp) || !Stride)

return {APInt(BW, 1), APInt(BW, 0)};

// In consideration of signed GEP indices, non-negligible offset become

// remainder of division by minimum GEP stride.

ModOffset = ModOffset.srem(*Stride);

if (ModOffset.isNegative())

ModOffset += *Stride;

return {*Stride, ModOffset};

}

/// If C is a constant patterned array and all valid loaded results for given

/// alignment are same to a constant, return that constant.

static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {

auto *LI = dyn_cast<LoadInst>(&I);

if (!LI || LI->isVolatile())

return false;

// We can only fold the load if it is from a constant global with definitive

// initializer. Skip expensive logic if this is not the case.

auto *PtrOp = LI->getPointerOperand();

auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));

if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())

return false;

// Bail for large initializers in excess of 4K to avoid too many scans.

Constant *C = GV->getInitializer();

uint64_t GVSize = DL.getTypeAllocSize(C->getType());

if (!GVSize || 4096 < GVSize)

return false;

Type *LoadTy = LI->getType();

unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());

auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);

// Any possible offset could be multiple of GEP stride. And any valid

// offset is multiple of load alignment, so checking only multiples of bigger

// one is sufficient to say results' equality.

if (auto LA = LI->getAlign();

LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {

ConstOffset = APInt(BW, 0);

Stride = APInt(BW, LA.value());

}

Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);

if (!Ca)

return false;

unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);

for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)

if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))

return false;

I.replaceAllUsesWith(Ca);

return true;

}

. It basically implements the same logic you need here, in a somewhat more general way. It would probably make sense to merge that fold into yours as a followup, because it basically handles the special case where the range reduces to a single element.

The only thing I dont understand Why it does

llvm-project/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines 844 to 846 in db5bc8e

     // Only keep a power of two factor for non-inbounds  
   if (!GEP->isInBounds())  
   Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());  
 

github-actions · 2025-02-20T22:40:31Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Ralender · 2025-02-20T22:42:30Z

I added the logic similar to the getStrideAndModOffsetOfGEP but while traversing from the global variable to the load

after having wrote the code, maybe the process to find closed-forms of the Stride and Offset with alignment are too complicated and error prone. but this should be able to handle a wide variety of GEPs.

Ralender · 2025-05-03T08:18:31Z

ping

Ralender · 2025-06-12T21:57:58Z

@nikic ping

Ralender · 2025-06-22T09:27:21Z

rebased

dtcxzyw

Can you rebase the branch again? Then I can run some external tests for this patch.

dtcxzyw · 2025-07-20T07:48:43Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+ if (!isa_and_nonnull<ConstantInt>(Cst))
+ // Lambda captures of a struct binding is only available starting
+ // in C++20, so we skip to the next element with goto
+ goto NextGroup;
+
+ // MD_range is order agnostics
+ SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
+ SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());


Suggested change

if (!isa_and_nonnull<ConstantInt>(Cst))

// Lambda captures of a struct binding is only available starting

// in C++20, so we skip to the next element with goto

goto NextGroup;

// MD_range is order agnostics

SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());

SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());

if (auto *CI = dyn_cast<ConstantInt>(Cst)) {

// MD_range is order agnostics

SMin = APIntOps::smin(SMin, CI->getValue());

SMax = APIntOps::smax(SMax, CI->getValue());

}

else {

// Lambda captures of a struct binding is only available starting

// in C++20, so we skip to the next element with goto

goto NextGroup;

}

dtcxzyw · 2025-07-20T07:49:59Z

llvm/test/Transforms/GlobalOpt/add_range_metadata.ll

+ ret i32 %0
+}
+
+; This could be supported also be supported, but for now it not.


Suggested change

; This could be supported also be supported, but for now it not.

; This could also be supported, but for now it is not.

dtcxzyw · 2025-07-20T07:50:29Z

llvm/test/Transforms/GlobalOpt/add_range_metadata.ll

+}
+
+; This could be supported also be supported, but for now it not.
+define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 {


Suggested change

define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 {

define i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) {

dtcxzyw · 2025-07-20T07:51:23Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+ Changed = true;
+ if (SMin == SMax) {
+ for (LoadInst *LI : Loads)
+ LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin));


Missing tests for this case.

dtcxzyw · 2025-07-20T07:56:26Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+ LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin));
+ } else {
+ // The Range is allowed to wrap
+ MDNode *RNode = MDHelper.createRange(SMin, SMax + 1);


We should not add the range metadata for [INT_MIN, INT_MAX]. It will assert since we don't know if it represents a full range or an empty range.

llvm-project/llvm/lib/IR/ConstantRange.cpp

Lines 2268 to 2288 in 2ba5e0a

ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) {

const unsigned NumRanges = Ranges.getNumOperands() / 2;

assert(NumRanges >= 1 && "Must have at least one range!");

assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");

auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));

auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));

ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());

for (unsigned i = 1; i < NumRanges; ++i) {

auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));

auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));

// Note: unionWith will potentially create a range that contains values not

// contained in any of the original N ranges.

CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));

}

return CR;

}

llvm-project/llvm/lib/IR/ConstantRange.cpp

Lines 56 to 57 in 2ba5e0a

assert((Lower != Upper || (Lower.isMaxValue() || Lower.isMinValue())) &&

"Lower == Upper, but they aren't min or max value!");

dtcxzyw · 2025-07-20T07:58:18Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+
+ // Commented out because I dont understand why we would need this
+ // But it was part of getStrideAndModOffsetOfGEP
+ // // Only keep a power of two factor for non-inbounds


Related patch: https://reviews.llvm.org/D146622
cc @khei4 @nikic

dtcxzyw · 2025-07-20T08:00:58Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+ }
+ }
+
+ for (auto [AP, Loads] : LoadsByAccess) {


Suggested change

for (auto [AP, Loads] : LoadsByAccess) {

for (auto &[AP, Loads] : LoadsByAccess) {

dtcxzyw · 2025-07-20T08:01:06Z

llvm/lib/Transforms/IPO/GlobalOpt.cpp

+ if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset))
+ continue;
+
+ for (auto [V, Scale] : VarOffsets) {


Suggested change

for (auto [V, Scale] : VarOffsets) {

for (auto &[V, Scale] : VarOffsets) {

Ralender requested review from EugeneZelenko and nikic February 18, 2025 20:46

llvmbot added the llvm:transforms label Feb 18, 2025

nikic reviewed Feb 18, 2025

View reviewed changes

Ralender force-pushed the GlobalOptAddRangeMetadata branch from c2123a2 to ad4e92e Compare February 20, 2025 22:37

Ralender requested a review from nikic February 20, 2025 22:37

Ralender force-pushed the GlobalOptAddRangeMetadata branch from ad4e92e to 6ffd9ae Compare February 20, 2025 22:41

This was referenced May 6, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

pre-commit: PR127695 dtcxzyw/llvm-opt-benchmark#2329

Closed

Ralender requested a review from dtcxzyw June 12, 2025 21:57

[GlobalOpt] Add range metadata to loads from constant global variables

eccfa9f

Ralender force-pushed the GlobalOptAddRangeMetadata branch from 6ffd9ae to eccfa9f Compare June 22, 2025 09:27

Ralender mentioned this pull request Jun 23, 2025

[NFC] Various Cleanup in StackColoring, StackSlotColoring, LiveStacks #143931

Open

zyw-bot mentioned this pull request Jun 27, 2025

pre-commit: PR127695 dtcxzyw/llvm-opt-benchmark#2511

Closed

dtcxzyw reviewed Jul 20, 2025

View reviewed changes

EugeneZelenko removed their request for review July 20, 2025 14:11

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[GlobalOpt] Add range metadata to loads from constant global variables #127695

[GlobalOpt] Add range metadata to loads from constant global variables #127695

Uh oh!

Ralender commented Feb 18, 2025

llvmbot commented Feb 18, 2025

nikic left a comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic commented Feb 18, 2025

Ralender commented Feb 19, 2025

github-actions bot commented Feb 20, 2025 •

edited

Loading

Ralender commented Feb 20, 2025

Ralender commented May 3, 2025

Ralender commented Jun 12, 2025

Ralender commented Jun 22, 2025

dtcxzyw left a comment

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

dtcxzyw Jul 20, 2025

Labels

4 participants

	; This could be supported also be supported, but for now it not.
	; This could also be supported, but for now it is not.

	define dso_local signext i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) local_unnamed_addr #0 {
	define i8 @multi_dimentional0(i8 zeroext %0, i8 zeroext %1) {

	ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) {
	const unsigned NumRanges = Ranges.getNumOperands() / 2;
	assert(NumRanges >= 1 && "Must have at least one range!");
	assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");

	auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
	auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));

	ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());

	for (unsigned i = 1; i < NumRanges; ++i) {
	auto Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 i + 0));
	auto High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 i + 1));

	// Note: unionWith will potentially create a range that contains values not
	// contained in any of the original N ranges.
	CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
	}

	return CR;
	}

	assert((Lower != Upper \|\| (Lower.isMaxValue() \|\| Lower.isMinValue())) &&
	"Lower == Upper, but they aren't min or max value!");

	for (auto [AP, Loads] : LoadsByAccess) {
	for (auto &[AP, Loads] : LoadsByAccess) {

	for (auto [V, Scale] : VarOffsets) {
	for (auto &[V, Scale] : VarOffsets) {

[GlobalOpt] Add range metadata to loads from constant global variables #127695

Are you sure you want to change the base?

[GlobalOpt] Add range metadata to loads from constant global variables #127695

Uh oh!

Conversation

Ralender commented Feb 18, 2025

llvmbot commented Feb 18, 2025

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic commented Feb 18, 2025

Ralender commented Feb 19, 2025

github-actions bot commented Feb 20, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Ralender commented Feb 20, 2025

Ralender commented May 3, 2025

Ralender commented Jun 12, 2025

Ralender commented Jun 22, 2025

dtcxzyw left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Labels

4 participants

github-actions bot commented Feb 20, 2025 •

edited

Loading