Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
235 changes: 235 additions & 0 deletions llvm/lib/Transforms/IPO/GlobalOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
Expand Down Expand Up @@ -2498,6 +2499,236 @@ OptimizeGlobalAliases(Module &M,
return Changed;
}

struct AccessPattern {
Type *Ty;

APInt Stride;
APInt Offset;
};

template <> struct DenseMapInfo<AccessPattern> {
static inline AccessPattern getEmptyKey() {
return {(Type *)1, APInt(), APInt()};
}
static inline AccessPattern getTombstoneKey() {
return {(Type *)2, APInt(), APInt()};
}
static unsigned getHashValue(const AccessPattern &AP) {
return hash_combine(AP.Ty, AP.Stride, AP.Offset);
}
static bool isEqual(const AccessPattern &LHS, const AccessPattern &RHS) {
return LHS.Ty == RHS.Ty && LHS.Stride == RHS.Stride &&
LHS.Offset == RHS.Offset;
}
};

// return (gcd, x, y) such that a*x + b*y = gcd
std::tuple<APInt, APInt, APInt> ExtendedSignedGCD(APInt a, APInt b) {
unsigned BW = a.getBitWidth();
APInt x = APInt(BW, 1);
APInt y = APInt(BW, 0);
APInt x1 = APInt(BW, 0);
APInt y1 = APInt(BW, 1);

while (b != 0) {
APInt q = APInt(BW, 0);
APInt r = APInt(BW, 0);
APInt::sdivrem(a, b, q, r);
a = std::move(b);
b = std::move(r);

std::swap(x, x1);
std::swap(y, y1);
x1 -= q * x;
y1 -= q * y;
}
return {a, x, y};
}

// Build if possible a new pair of Stride and Offset that are part of the
// original but are also aligned.
std::optional<std::pair<APInt, APInt>>
AlignStrideAndOffset(const APInt &Stride, const APInt &Offset,
const APInt &Align) {
// Here Offset * Align is added only to make sure Missing is positive or zero
APInt Missing = ((Offset * Align) - Offset).urem(Align);

// fast path for common case,
if (Missing == 0)
return {
{(Stride * Align).udiv(APIntOps::GreatestCommonDivisor(Stride, Align)),
Offset}};

auto [GCD, X, Y] = ExtendedSignedGCD(Stride, Align);
assert(APIntOps::GreatestCommonDivisor(Stride, Align) == GCD);
assert((X * Stride + Y * Align) == GCD);

if (Missing.urem(GCD) != 0) {
// The new Stride + Offset cannot be created because there is no elements in
// the original that would be properly aligned
return std::nullopt;
}

APInt StrideAlign = Stride * Align;
// X could be negative, so we need to use sdiv
// Here + Offset * Align is added only to make sure Missing is positive
APInt NumStride =
(((Missing * X).sdiv(GCD)) + (StrideAlign * Align)).urem(Align);

APInt NewStride = StrideAlign.udiv(GCD);
APInt NewOffset = (Offset + (NumStride * Stride)).urem(NewStride);
return {{std::move(NewStride), std::move(NewOffset)}};
}

static bool addRangeMetadata(Module &M) {
const DataLayout &DL = M.getDataLayout();
bool Changed = false;

for (GlobalValue &Global : M.global_values()) {

auto *GV = dyn_cast<GlobalVariable>(&Global);
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
continue;

// To be able to go to the next GlobalVariable with a return
[&] {
unsigned IndexBW = DL.getIndexTypeSizeInBits(GV->getType());

struct PointerInfo {
Value *Ptr;

// Zero denotes not set
APInt Stride;
APInt Offset;
};

// GEPs only take one pointer operand, the one we will come from, so we
// dont need to do uniqueing during the DFS
SmallVector<PointerInfo> Stack;

// All loads of the global that this code can analyze grouped by access
// pattern. Loads with the same access pattern can access the same offsets
// in the global, so they can be treated the same.
SmallDenseMap<AccessPattern, SmallVector<LoadInst *>> LoadsByAccess;

Stack.push_back({GV, APInt(IndexBW, 0), APInt(IndexBW, 0)});

while (!Stack.empty()) {
PointerInfo Curr = Stack.pop_back_val();

if (!isa<GlobalVariable>(Curr.Ptr)) {
if (auto *LI = dyn_cast<LoadInst>(Curr.Ptr)) {

if (!LI->getType()->isIntegerTy())
continue;

if (LI->hasMetadata(LLVMContext::MD_range))
continue;

// This is an access at a fixed offset, I expect this is handled
// elsewhere so we skip it.
if (Curr.Stride == 0)
continue;

// This case is very rare, but what it means is that we
// dont know at compile-time what offsets into the Global arrays are
// safe to access with this load. So we give-up.
if (LI->getAlign() > GV->getAlign().valueOrOne())
continue;

auto NewStrideAndOffset =
AlignStrideAndOffset(Curr.Stride, Curr.Offset,
APInt(IndexBW, LI->getAlign().value()));

if (!NewStrideAndOffset) {
// This load cannot access an offset with the correct alignment
LI->replaceAllUsesWith(PoisonValue::get(LI->getType()));
continue;
}

AccessPattern AP{LI->getType(), NewStrideAndOffset->first,
NewStrideAndOffset->second};
assert(AP.Stride != 0);
LoadsByAccess[AP].push_back(LI);
continue;
}
auto *GEP = dyn_cast<GetElementPtrInst>(Curr.Ptr);
if (!GEP)
continue;

SmallMapVector<Value *, APInt, 4> VarOffsets;
if (!GEP->collectOffset(DL, IndexBW, VarOffsets, Curr.Offset))
continue;

for (auto [V, Scale] : VarOffsets) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (auto [V, Scale] : VarOffsets) {
for (auto &[V, Scale] : VarOffsets) {

// Commented out because I dont understand why we would need this
// But it was part of getStrideAndModOffsetOfGEP
// // Only keep a power of two factor for non-inbounds
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// if (!GEP->isInBounds())
// Scale =
// APInt::getOneBitSet(Scale.getBitWidth(),
// Scale.countr_zero());

if (Curr.Stride == 0)
Curr.Stride = Scale;
else
Curr.Stride = APIntOps::GreatestCommonDivisor(Curr.Stride, Scale);
}
}

for (User *U : Curr.Ptr->users()) {
if (isa<LoadInst, GetElementPtrInst>(U)) {
Curr.Ptr = U;
Stack.push_back(Curr);
}
}
}

for (auto [AP, Loads] : LoadsByAccess) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (auto [AP, Loads] : LoadsByAccess) {
for (auto &[AP, Loads] : LoadsByAccess) {
{
APInt SMin = APInt::getSignedMaxValue(AP.Ty->getIntegerBitWidth());
APInt SMax = APInt::getSignedMinValue(AP.Ty->getIntegerBitWidth());

APInt LastValidOffset =
APInt(IndexBW, DL.getTypeAllocSize(GV->getValueType()) -
DL.getTypeStoreSize(AP.Ty));
for (APInt Offset = AP.Offset; Offset.ule(LastValidOffset);
Offset += AP.Stride) {
assert(Offset.isAligned(Loads[0]->getAlign()));
Constant *Cst = ConstantFoldLoadFromConstPtr(GV, AP.Ty, Offset, DL);

if (!isa_and_nonnull<ConstantInt>(Cst))
// Lambda captures of a struct binding is only available starting
// in C++20, so we skip to the next element with goto
goto NextGroup;

// MD_range is order agnostics
SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());
Comment on lines +2701 to +2708
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!isa_and_nonnull<ConstantInt>(Cst))
// Lambda captures of a struct binding is only available starting
// in C++20, so we skip to the next element with goto
goto NextGroup;
// MD_range is order agnostics
SMin = APIntOps::smin(SMin, Cst->getUniqueInteger());
SMax = APIntOps::smax(SMax, Cst->getUniqueInteger());
if (auto *CI = dyn_cast<ConstantInt>(Cst)) {
// MD_range is order agnostics
SMin = APIntOps::smin(SMin, CI->getValue());
SMax = APIntOps::smax(SMax, CI->getValue());
}
else {
// Lambda captures of a struct binding is only available starting
// in C++20, so we skip to the next element with goto
goto NextGroup;
}
}

MDBuilder MDHelper(M.getContext());

Changed = true;
if (SMin == SMax) {
for (LoadInst *LI : Loads)
LI->replaceAllUsesWith(ConstantInt::get(AP.Ty, SMin));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing tests for this case.

} else {
// The Range is allowed to wrap
MDNode *RNode = MDHelper.createRange(SMin, SMax + 1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should not add the range metadata for [INT_MIN, INT_MAX]. It will assert since we don't know if it represents a full range or an empty range.

ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) {
const unsigned NumRanges = Ranges.getNumOperands() / 2;
assert(NumRanges >= 1 && "Must have at least one range!");
assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
for (unsigned i = 1; i < NumRanges; ++i) {
auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
// Note: unionWith will potentially create a range that contains values not
// contained in any of the original N ranges.
CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
}
return CR;
}

assert((Lower != Upper || (Lower.isMaxValue() || Lower.isMinValue())) &&
"Lower == Upper, but they aren't min or max value!");

for (LoadInst *LI : Loads)
LI->setMetadata(LLVMContext::MD_range, RNode);
}
}
NextGroup:
(void)0; // Label expect statements
}
}();
}
return Changed;
}

static Function *
FindAtExitLibFunc(Module &M,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
Expand Down Expand Up @@ -2887,6 +3118,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
Changed |= LocalChange;
}

// Add range metadata to loads from constant global variables based on the
// values that could be loaded from the variable
Changed |= addRangeMetadata(M);

// TODO: Move all global ctors functions to the end of the module for code
// layout.

Expand Down
Loading
Loading