Skip to content

Commit c2123a2

Browse files
committed
[GlobalOpt] Add range metadata to loads from constant global variables
1 parent bedd392 commit c2123a2

File tree

2 files changed

+230
-0
lines changed

2 files changed

+230
-0
lines changed

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "llvm/IR/Instruction.h"
4646
#include "llvm/IR/Instructions.h"
4747
#include "llvm/IR/IntrinsicInst.h"
48+
#include "llvm/IR/MDBuilder.h"
4849
#include "llvm/IR/Module.h"
4950
#include "llvm/IR/Operator.h"
5051
#include "llvm/IR/Type.h"
@@ -2498,6 +2499,102 @@ OptimizeGlobalAliases(Module &M,
24982499
return Changed;
24992500
}
25002501

2502+
static bool AddRangeMetadata(Module &M) {
2503+
const DataLayout &DL = M.getDataLayout();
2504+
bool Changed = false;
2505+
2506+
for (GlobalValue &Global : M.global_values()) {
2507+
2508+
auto *GV = dyn_cast<GlobalVariable>(&Global);
2509+
if (!GV || !GV->hasDefinitiveInitializer())
2510+
continue;
2511+
2512+
// To be able to go to the next GlobalVariable with a return
2513+
[&] {
2514+
uint64_t GlobalByteSize = DL.getTypeAllocSize(GV->getValueType());
2515+
unsigned BW = DL.getIndexTypeSizeInBits(GV->getType());
2516+
2517+
SmallVector<LoadInst *> ArrayLikeLoads;
2518+
Type *ElemTy = nullptr;
2519+
2520+
for (User *U : GV->users()) {
2521+
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
2522+
Type *GEPElemTy = GEP->getResultElementType();
2523+
if (!GEP->isInBounds() || !GEPElemTy->isIntegerTy())
2524+
continue;
2525+
2526+
// This restriction that all accesses use the same type could be
2527+
// lifted
2528+
if (!ElemTy)
2529+
ElemTy = GEPElemTy;
2530+
else if (ElemTy != GEPElemTy)
2531+
return;
2532+
2533+
SmallMapVector<Value *, APInt, 4> Index;
2534+
APInt CstOffset(BW, 0);
2535+
GEP->collectOffset(DL, BW, Index, CstOffset);
2536+
2537+
// This check is needed for correctness of the code below.
2538+
// Be we could only traverse the range starting at the constant offset
2539+
if (!CstOffset.isAligned(DL.getPrefTypeAlign(GEPElemTy)))
2540+
return;
2541+
2542+
// The restriction that this is a 1D array could be lifted
2543+
if (Index.size() != 1 ||
2544+
Index.front().second != DL.getTypeAllocSize(GEPElemTy))
2545+
return;
2546+
2547+
for (User *U : GEP->users()) {
2548+
if (auto *LI = dyn_cast<LoadInst>(U)) {
2549+
// This restriction that all accesses use the same type could be
2550+
// lifted
2551+
if (LI->getType() == GEPElemTy)
2552+
ArrayLikeLoads.push_back(LI);
2553+
else
2554+
return;
2555+
}
2556+
}
2557+
}
2558+
}
2559+
2560+
if (ArrayLikeLoads.empty())
2561+
return;
2562+
2563+
APInt Idx = APInt::getZero(64);
2564+
APInt Min = APInt::getSignedMaxValue(
2565+
ArrayLikeLoads[0]->getType()->getIntegerBitWidth());
2566+
APInt Max = APInt::getSignedMinValue(
2567+
ArrayLikeLoads[0]->getType()->getIntegerBitWidth());
2568+
2569+
uint64_t ElemSize = DL.getTypeStoreSize(ArrayLikeLoads[0]->getType());
2570+
uint64_t NumElem =
2571+
GlobalByteSize / DL.getTypeStoreSize(ArrayLikeLoads[0]->getType());
2572+
for (uint64_t i = 0; i < NumElem; i++) {
2573+
Constant *Cst = ConstantFoldLoadFromConstPtr(
2574+
GV, ArrayLikeLoads[0]->getType(), Idx, DL);
2575+
2576+
if (!Cst)
2577+
return;
2578+
2579+
Idx += ElemSize;
2580+
2581+
// MD_range data is expected in signed order, so we use smin and smax
2582+
// here
2583+
Min = APIntOps::smin(Min, Cst->getUniqueInteger());
2584+
Max = APIntOps::smax(Max, Cst->getUniqueInteger());
2585+
}
2586+
2587+
llvm::MDBuilder MDHelper(M.getContext());
2588+
// The Range is allowed to wrap
2589+
llvm::MDNode *RNode = MDHelper.createRange(Min, Max + 1);
2590+
for (LoadInst *LI : ArrayLikeLoads)
2591+
LI->setMetadata(LLVMContext::MD_range, RNode);
2592+
Changed = true;
2593+
}();
2594+
}
2595+
return Changed;
2596+
}
2597+
25012598
static Function *
25022599
FindAtExitLibFunc(Module &M,
25032600
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
@@ -2887,6 +2984,10 @@ optimizeGlobalsInModule(Module &M, const DataLayout &DL,
28872984
Changed |= LocalChange;
28882985
}
28892986

2987+
// Add range metadata to loads from constant global variables based on the
2988+
// values that could be loaded from the variable
2989+
Changed |= AddRangeMetadata(M);
2990+
28902991
// TODO: Move all global ctors functions to the end of the module for code
28912992
// layout.
28922993

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p globalopt -S %s | FileCheck %s
3+
4+
@gvar0 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 -5, i64 1, i64 10, [253 x i64] zeroinitializer }> }, align 8
5+
@gvar1 = constant { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 5, [253 x i64] zeroinitializer }> }, align 8
6+
@gvar2 = global { <{ i64, i64, i64, [253 x i64] }> } { <{ i64, i64, i64, [253 x i64] }> <{ i64 0, i64 1, i64 2, [253 x i64] zeroinitializer }> }, align 8
7+
@gvar3 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
8+
@gvar4 = constant [8 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 100, i32 -6789, i32 8388608], align 16
9+
@gvar5 = constant [2 x [6 x i8]] [[6 x i8] c"\01a_\02-0", [6 x i8] c" \0E\FF\07\08\09"], align 1
10+
11+
define i64 @test_basic0(i64 %3) {
12+
; CHECK-LABEL: define i64 @test_basic0(
13+
; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
14+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
15+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0:![0-9]+]]
16+
; CHECK-NEXT: ret i64 [[TMP2]]
17+
;
18+
%ptr = getelementptr inbounds [256 x i64], ptr @gvar0, i64 0, i64 %3
19+
%5 = load i64, ptr %ptr, align 8
20+
ret i64 %5
21+
}
22+
23+
define i64 @test_basic1(i64 %3) {
24+
; CHECK-LABEL: define i64 @test_basic1(
25+
; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
26+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 [[TMP0]]
27+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[PTR]], align 8, !range [[RNG0]]
28+
; CHECK-NEXT: ret i64 [[TMP2]]
29+
;
30+
%ptr = getelementptr inbounds [32 x i64], ptr @gvar0, i64 0, i64 %3
31+
%5 = load i64, ptr %ptr, align 8
32+
ret i64 %5
33+
}
34+
35+
define i32 @test_different_type(i64 %3) {
36+
; CHECK-LABEL: define i32 @test_different_type(
37+
; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
38+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 [[TMP0]]
39+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8, !range [[RNG1:![0-9]+]]
40+
; CHECK-NEXT: ret i32 [[TMP2]]
41+
;
42+
%ptr = getelementptr inbounds [512 x i32], ptr @gvar1, i64 0, i64 %3
43+
%5 = load i32, ptr %ptr, align 8
44+
ret i32 %5
45+
}
46+
47+
define i32 @test_non_constant(i64 %3) {
48+
; CHECK-LABEL: define i32 @test_non_constant(
49+
; CHECK-SAME: i64 [[TMP0:%.*]]) local_unnamed_addr {
50+
; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 [[TMP0]]
51+
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[PTR]], align 8
52+
; CHECK-NEXT: ret i32 [[TMP2]]
53+
;
54+
%ptr = getelementptr inbounds [512 x i32], ptr @gvar2, i64 0, i64 %3
55+
%5 = load i32, ptr %ptr, align 8
56+
ret i32 %5
57+
}
58+
59+
define i64 @test_other(i8 %first_idx) {
60+
; CHECK-LABEL: define i64 @test_other(
61+
; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
62+
; CHECK-NEXT: [[ENTRY:.*:]]
63+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
64+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar3, i64 [[IDXPROM]]
65+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8, !range [[RNG2:![0-9]+]]
66+
; CHECK-NEXT: ret i64 [[TMP0]]
67+
;
68+
entry:
69+
%idxprom = zext i8 %first_idx to i64
70+
%arrayidx = getelementptr inbounds i64, ptr @gvar3, i64 %idxprom
71+
%0 = load i64, ptr %arrayidx, align 8
72+
ret i64 %0
73+
}
74+
75+
; This could be supported but is rare and more complex for for now we dont process it.
76+
define i64 @test_multiple_types0(i8 %first_idx) {
77+
; CHECK-LABEL: define i64 @test_multiple_types0(
78+
; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
79+
; CHECK-NEXT: [[ENTRY:.*:]]
80+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
81+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr @gvar4, i64 [[IDXPROM]]
82+
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
83+
; CHECK-NEXT: ret i64 [[TMP0]]
84+
;
85+
entry:
86+
%idxprom = zext i8 %first_idx to i64
87+
%arrayidx = getelementptr inbounds i64, ptr @gvar4, i64 %idxprom
88+
%0 = load i64, ptr %arrayidx, align 8
89+
ret i64 %0
90+
}
91+
92+
define i32 @test_multiple_types1(i8 %first_idx) {
93+
; CHECK-LABEL: define i32 @test_multiple_types1(
94+
; CHECK-SAME: i8 [[FIRST_IDX:%.*]]) local_unnamed_addr {
95+
; CHECK-NEXT: [[ENTRY:.*:]]
96+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i8 [[FIRST_IDX]] to i64
97+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr @gvar4, i64 [[IDXPROM]]
98+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 8
99+
; CHECK-NEXT: ret i32 [[TMP0]]
100+
;
101+
entry:
102+
%idxprom = zext i8 %first_idx to i64
103+
%arrayidx = getelementptr inbounds i32, ptr @gvar4, i64 %idxprom
104+
%0 = load i32, ptr %arrayidx, align 8
105+
ret i32 %0
106+
}
107+
108+
; This could be supported also be supported, but for now it not.
109+
define dso_local noundef signext i8 @multi_dimentional(i8 noundef zeroext %0, i8 noundef zeroext %1) local_unnamed_addr #0 {
110+
; CHECK-LABEL: define dso_local noundef signext i8 @multi_dimentional(
111+
; CHECK-SAME: i8 noundef zeroext [[TMP0:%.*]], i8 noundef zeroext [[TMP1:%.*]]) local_unnamed_addr {
112+
; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
113+
; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP1]] to i64
114+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 [[TMP3]], i64 [[TMP4]]
115+
; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 1
116+
; CHECK-NEXT: ret i8 [[TMP6]]
117+
;
118+
%3 = zext i8 %0 to i64
119+
%4 = zext i8 %1 to i64
120+
%5 = getelementptr inbounds [2 x [6 x i8]], ptr @gvar5, i64 0, i64 %3, i64 %4
121+
%6 = load i8, ptr %5, align 1
122+
ret i8 %6
123+
}
124+
125+
;.
126+
; CHECK: [[RNG0]] = !{i64 -5, i64 11}
127+
; CHECK: [[RNG1]] = !{i32 0, i32 6}
128+
; CHECK: [[RNG2]] = !{i64 2, i64 36028801313924476}
129+
;.

0 commit comments

Comments
 (0)