- Notifications
You must be signed in to change notification settings - Fork 15.2k
[Attributor] Reordering bins of an allocation based on access patterns #95319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
vidsinghal wants to merge 2 commits into llvm:main Choose a base branch from vidsinghal:reordering_bins
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline, and old review comments may become outdated.
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
Member
| @llvm/pr-subscribers-llvm-transforms Author: Vidush Singhal (vidsinghal) ChangesPatch is 81.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95319.diff 17 Files Affected:
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index d3d3a9c43c84f..990c5ea7c7ca1 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -98,16 +98,20 @@ #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DirectedGraph.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/DDG.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -140,6 +144,7 @@ #include <limits> #include <map> #include <optional> +#include <tuple> namespace llvm { @@ -5143,9 +5148,7 @@ struct DenormalFPMathState : public AbstractState { return Mode != Other.Mode || ModeF32 != Other.ModeF32; } - bool isValid() const { - return Mode.isValid() && ModeF32.isValid(); - } + bool isValid() const { return Mode.isValid() && ModeF32.isValid(); } static DenormalMode::DenormalModeKind unionDenormalKind(DenormalMode::DenormalModeKind Callee, @@ -5185,9 +5188,7 @@ struct DenormalFPMathState : public AbstractState { // state. DenormalState getAssumed() const { return Known; } - bool isValidState() const override { - return Known.isValid(); - } + bool isValidState() const override { return Known.isValid(); } /// Return true if there are no dynamic components to the denormal mode worth /// specializing. @@ -5198,9 +5199,7 @@ struct DenormalFPMathState : public AbstractState { Known.ModeF32.Output != DenormalMode::Dynamic; } - bool isAtFixpoint() const override { - return IsAtFixedpoint; - } + bool isAtFixpoint() const override { return IsAtFixedpoint; } ChangeStatus indicateFixpoint() { bool Changed = !IsAtFixedpoint; @@ -6112,6 +6111,56 @@ struct AAPointerInfo : public AbstractAttribute { Type *Ty; }; + /// A helper containing a list of offsets computed for a Use. Ideally this + /// list should be strictly ascending, but we ensure that only when we + /// actually translate the list of offsets to a RangeList. + struct OffsetInfo { + using VecTy = SmallVector<int64_t>; + using const_iterator = VecTy::const_iterator; + VecTy Offsets; + + const_iterator begin() const { return Offsets.begin(); } + const_iterator end() const { return Offsets.end(); } + + bool operator==(const OffsetInfo &RHS) const { + return Offsets == RHS.Offsets; + } + + bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); } + + void insert(int64_t Offset) { Offsets.push_back(Offset); } + bool isUnassigned() const { return Offsets.empty(); } + + bool isUnknown() const { + if (isUnassigned()) + return false; + if (Offsets.size() == 1) + return Offsets.front() == AA::RangeTy::Unknown; + return false; + } + + void setUnknown() { + Offsets.clear(); + Offsets.push_back(AA::RangeTy::Unknown); + } + + void addToAll(int64_t Inc) { + for (auto &Offset : Offsets) + Offset += Inc; + } + + /// Copy offsets from \p R into the current list. + /// + /// Ideally all lists should be strictly ascending, but we defer that to the + /// actual use of the list. So we just blindly append here. + void merge(const OffsetInfo &R) { + Offsets.append(R.Offsets); + // ensure elements are unique. + sort(Offsets.begin(), Offsets.end()); + Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end()); + } + }; + /// Create an abstract attribute view for the position \p IRP. static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); @@ -6126,6 +6175,9 @@ struct AAPointerInfo : public AbstractAttribute { virtual const_bin_iterator begin() const = 0; virtual const_bin_iterator end() const = 0; virtual int64_t numOffsetBins() const = 0; + virtual void dumpState(raw_ostream &O) const = 0; + virtual const Access &getBinAccess(unsigned Index) const = 0; + virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0; /// Call \p CB on all accesses that might interfere with \p Range and return /// true if all such accesses were known and the callback returned true for @@ -6155,6 +6207,9 @@ struct AAPointerInfo : public AbstractAttribute { return (AA->getIdAddr() == &ID); } + /// Offsets Info Map + DenseMap<Value *, OffsetInfo> OffsetInfoMap; + /// Unique ID (due to the unique address) static const char ID; }; @@ -6291,12 +6346,139 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { return AbstractAttribute::isValidIRPositionForInit(A, IRP); } + // A helper function to check is simplified values exists for the current + // instruction. + bool simplifiedValuesExists(Attributor &A, Instruction *LocalInst) { + + // If there are potential values that replace the accessed instruction, we + // should use those instead + bool UsedAssumedInformation = false; + SmallVector<AA::ValueAndContext> Values; + if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this, + Values, AA::AnyScope, + UsedAssumedInformation)) { + + for (auto &ValAndContext : Values) { + // don't modify instruction if any simplified value exists + if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst) { + return true; + } + } + } + + return false; + } + /// Create an abstract attribute view for the position \p IRP. static AAAllocationInfo &createForPosition(const IRPosition &IRP, Attributor &A); virtual std::optional<TypeSize> getAllocatedSize() const = 0; + using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>; + virtual const NewOffsetsTy &getNewOffsets() const = 0; + struct FieldAccessGraphEdge; + struct FieldAccessGraphNode; + + struct PriorityQueueGraphNode { + PriorityQueueGraphNode(int Priority, FieldAccessGraphNode *Node) + : Priority(Priority), Node(Node) {} + + public: + int Priority; + FieldAccessGraphNode *Node; + + int getPriority() { return Priority; } + FieldAccessGraphNode *getNode() { return Node; } + + bool operator<(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + + bool operator==(const PriorityQueueGraphNode *A) { + return A->Priority == Priority; + } + + bool operator>(const PriorityQueueGraphNode *A) { + return A->Priority > Priority; + } + }; + + // A Edge Type for the field access graph edge + struct FieldAccessGraphEdge + : public DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraphEdge(FieldAccessGraphNode &TargetNode, int EdgeWeight) + : DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge>(TargetNode), + EdgeWeight(EdgeWeight) {} + + public: + FieldAccessGraphNode *SrcNode; + int EdgeWeight; + int getEdgeWeight() { return EdgeWeight; } + void setSrcNode(FieldAccessGraphNode *SourceNode) { SrcNode = SourceNode; } + FieldAccessGraphNode *getSourceNode() { return SrcNode; } + }; + + // A node type for the field access graph node + struct FieldAccessGraphNode + : public DGNode<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraphNode(const AA::RangeTy &Node, FieldAccessGraphEdge &Edge) + : DGNode<FieldAccessGraphNode, FieldAccessGraphEdge>(Edge), + BinRange(Node) {} + FieldAccessGraphNode(const AA::RangeTy &Node) : BinRange(Node) {} + + public: + const AA::RangeTy BinRange; + const AA::RangeTy &getBinRange() const { return BinRange; } + }; + + struct FieldAccessGraph + : public DirectedGraph<FieldAccessGraphNode, FieldAccessGraphEdge> { + FieldAccessGraph() {} + + public: + FieldAccessGraphNode *getNode(const AA::RangeTy &Range) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return N; + } + } + return nullptr; + } + + bool findNode(const AA::RangeTy &Range) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == Range) { + return true; + } + } + return false; + } + + bool edgeExists(const AA::RangeTy &HeadNode, + FieldAccessGraphNode *TargetNode) { + for (FieldAccessGraphNode *N : Nodes) { + if (N->getBinRange() == HeadNode) { + return N->hasEdgeTo(*TargetNode); + } + } + return false; + } + + // return all nodes that have no incoming edges. + void getAllRoots(std::vector<FieldAccessGraphNode *> &Roots) { + assert(Roots.empty() && "Root set should be empty at the begining!"); + for (FieldAccessGraphNode *N : Nodes) { + SmallVector<FieldAccessGraphEdge *> EL; + if (!findIncomingEdgesToNode(*N, EL)) { + Roots.push_back(N); + } + } + } + }; + + virtual const FieldAccessGraph &getBinAccessGraph() const = 0; + /// See AbstractAttribute::getName() const std::string getName() const override { return "AAAllocationInfo"; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 9a5732dca5b79..7393939e81dc2 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -11,6 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/ADT/APInt.h" @@ -72,9 +75,11 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include <cassert> +#include <climits> #include <numeric> #include <optional> #include <string> +#include <utility> using namespace llvm; @@ -419,7 +424,8 @@ struct AAReturnedFromReturnedValues : public BaseType { /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { StateType S(StateType::getBestState(this->getState())); - clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>( + clampReturnedValueStates<AAType, StateType, IRAttributeKind, + RecurseForSelectAndPHI>( A, *this, S, PropagateCallBaseContext ? this->getCallBaseContext() : nullptr); // TODO: If we know we visited all returned values, thus no are assumed @@ -1001,54 +1007,9 @@ ChangeStatus AA::PointerInfo::State::addAccess( namespace { -/// A helper containing a list of offsets computed for a Use. Ideally this -/// list should be strictly ascending, but we ensure that only when we -/// actually translate the list of offsets to a RangeList. -struct OffsetInfo { - using VecTy = SmallVector<int64_t>; - using const_iterator = VecTy::const_iterator; - VecTy Offsets; - - const_iterator begin() const { return Offsets.begin(); } - const_iterator end() const { return Offsets.end(); } - - bool operator==(const OffsetInfo &RHS) const { - return Offsets == RHS.Offsets; - } - - bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); } - - void insert(int64_t Offset) { Offsets.push_back(Offset); } - bool isUnassigned() const { return Offsets.size() == 0; } - - bool isUnknown() const { - if (isUnassigned()) - return false; - if (Offsets.size() == 1) - return Offsets.front() == AA::RangeTy::Unknown; - return false; - } - - void setUnknown() { - Offsets.clear(); - Offsets.push_back(AA::RangeTy::Unknown); - } - - void addToAll(int64_t Inc) { - for (auto &Offset : Offsets) { - Offset += Inc; - } - } - - /// Copy offsets from \p R into the current list. - /// - /// Ideally all lists should be strictly ascending, but we defer that to the - /// actual use of the list. So we just blindly append here. - void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); } -}; - #ifndef NDEBUG -static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) { +static raw_ostream &operator<<(raw_ostream &OS, + const AAPointerInfo::OffsetInfo &OI) { ListSeparator LS; OS << "["; for (auto Offset : OI) { @@ -1083,6 +1044,15 @@ struct AAPointerInfoImpl return State::numOffsetBins(); } + virtual const Access &getBinAccess(unsigned Index) const override { + return getAccess(Index); + } + + virtual const DenseMap<Value *, OffsetInfo> & + getOffsetInfoMap() const override { + return OffsetInfoMap; + } + bool forallInterferingAccesses( AA::RangeTy Range, function_ref<bool(const AAPointerInfo::Access &, bool)> CB) @@ -1429,7 +1399,7 @@ struct AAPointerInfoImpl void trackPointerInfoStatistics(const IRPosition &IRP) const {} /// Dump the state into \p O. - void dumpState(raw_ostream &O) { + virtual void dumpState(raw_ostream &O) const override { for (auto &It : OffsetBins) { O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size << "] : " << It.getSecond().size() << "\n"; @@ -1463,6 +1433,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl { std::optional<Value *> Content, AccessKind Kind, SmallVectorImpl<int64_t> &Offsets, ChangeStatus &Changed, Type &Ty) { + using namespace AA::PointerInfo; auto Size = AA::RangeTy::Unknown; const DataLayout &DL = A.getDataLayout(); @@ -1595,7 +1566,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { const DataLayout &DL = A.getDataLayout(); Value &AssociatedValue = getAssociatedValue(); - DenseMap<Value *, OffsetInfo> OffsetInfoMap; + OffsetInfoMap.clear(); OffsetInfoMap[&AssociatedValue].insert(0); auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) { @@ -1607,11 +1578,20 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { // // The RHS is a reference that may be invalidated by an insertion caused by // the LHS. So we ensure that the side-effect of the LHS happens first. + + if (!OffsetInfoMap.contains(Usr)) { + auto &UsrOI = OffsetInfoMap[Usr]; + auto &PtrOI = OffsetInfoMap[CurPtr]; + UsrOI = PtrOI; + Follow = true; + return true; + } + auto &UsrOI = OffsetInfoMap[Usr]; auto &PtrOI = OffsetInfoMap[CurPtr]; assert(!PtrOI.isUnassigned() && "Cannot pass through if the input Ptr was not visited!"); - UsrOI = PtrOI; + UsrOI.merge(PtrOI); Follow = true; return true; }; @@ -6973,10 +6953,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) { Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode(); if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) { - LLVM_DEBUG( - dbgs() - << "[H2S] unique free call might not be executed with the allocation " - << *UniqueFree << "\n"); + LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed " + "with the allocation " + << *UniqueFree << "\n"); return false; } } @@ -10406,11 +10385,12 @@ struct AANoFPClassFloating : public AANoFPClassImpl { struct AANoFPClassReturned final : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl, - AANoFPClassImpl::StateType, false, Attribute::None, false> { + AANoFPClassImpl::StateType, false, + Attribute::None, false> { AANoFPClassReturned(const IRPosition &IRP, Attributor &A) : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl, - AANoFPClassImpl::StateType, false, Attribute::None, false>( - IRP, A) {} + AANoFPClassImpl::StateType, false, + Attribute::None, false>(IRP, A) {} /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { @@ -12653,11 +12633,42 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A) : AAAllocationInfo(IRP, A) {} + /// See AbstractAttribute::initialize(...). + void initialize(Attributor &A) override { + + // Map an instruction to its position in the module. + // To get a relative sense of distance between instruction. + // Useful when we need a measure of + // a temporal access amongst instructions. + auto &IRP = getIRPosition(); + auto *M = IRP.getCtxI()->getModule(); + int InstructionPosition = 0; + for (const auto &F : *M) { + for (const auto &BB : F) { + for (const auto &I : BB) { + InstructionPositionMap.insert( + std::make_pair(&I, InstructionPosition)); + InstructionPosition++; + } + } + } + } + std::optional<TypeSize> getAllocatedSize() const override { assert(isValidState() && "the AA is invalid"); return AssumedAllocatedSize; } + const NewOffsetsTy &getNewOffsets() const override { + assert(isValidState() && "the AA is invalid"); + return NewComputedOffsets; + } + + const FieldAccessGraph &getBinAccessGraph() const override { + assert(isValidState() && "the AA is invalid"); + return BinAccessGraph; + } + std::optional<TypeSize> findInitialAllocationSize(Instruction *I, const DataLayout &DL) { @@ -12698,46 +12709,208 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { const DataLayout &DL = A.getDataLayout(); const auto AllocationSize = findInitialAllocationSize(I, DL); - // If allocation size is nullopt, we give up. + // If allocation size is nullopt, we give up if (!AllocationSize) return indicatePessimisticFixpoint(); - // For zero sized allocations, we give up. + // For zero sized allocations, we give up // Since we can't reduce further if (*AllocationSize == 0) return indicatePessimisticFixpoint(); - int64_t BinSize = PI->numOffsetBins(); - - // TODO: implement for multiple bins - if (BinSize > 1) - return indicatePessimisticFixpoint(); + int64_t NumBins = PI->numOffsetBins(); - if (BinSize == 0) { + if (NumBins == 0) { auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false)); if (!changeAllocationSize(NewAllocationSize)) return ChangeStatus::UNCHANGED; return ChangeStatus::CHANGED; } - // TODO: refactor this to be part of multiple bin case - const auto &It = PI->begin(); + DenseMap<Instruction *, AA::RangeTy> MapAccessedInstToBins; + // map accessed instructions to bins + for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin(); + It != PI->end(); It++) { - // TODO: handle if Offset is not zero - if (It->first.Offset != 0) - return indicatePessimisticFixpoint(); + const AA::RangeTy &Range = It->getFirst(); + auto AccessedIndices = It->getSecond(); + for (auto AccIndex : AccessedIndices) { + const auto &AccessInstruction = PI->getBinAccess(AccIndex); + Instruction *LocalInst = AccessInstruction.getLocalInst(); + MapAccessedInstToBins.insert(std::make_pair(LocalInst, Range)); + } + } - uint64_t SizeOfBin = It->first.Offset + It->first.Size; + for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin(); + It != PI->end(); It++) { + const AA::RangeTy &OldRange = It->getFirst(); - if (SizeOfBin >= *AllocationSize) - return indicatePessimisticFixpoint(); + // If a... [truncated] |
ce804be to 9a91eb0 Compare 5b5bf59 to df3df3e Compare | ✅ With the latest revision this PR passed the C/C++ code formatter. |
df3df3e to d895f94 Compare ec07d08 to aa11cee Compare aa11cee to bd80cc4 Compare …a pointer allocation.
10f9e86 to 6d4c97c Compare Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.
No description provided.