3535#include  " llvm/CodeGen/PseudoSourceValueManager.h" 
3636#include  " llvm/CodeGen/SlotIndexes.h" 
3737#include  " llvm/CodeGen/TargetOpcodes.h" 
38+ #include  " llvm/CodeGen/TargetSubtargetInfo.h" 
3839#include  " llvm/CodeGen/WinEHFuncInfo.h" 
3940#include  " llvm/Config/llvm-config.h" 
4041#include  " llvm/IR/Constants.h" 
@@ -99,7 +100,6 @@ static cl::opt<unsigned> MaxCandidatesOpt(
99100 " Max number of candidates that will be evaluated, 0 means no limit" 
100101
101102STATISTIC (NumMarkerSeen, " Number of lifetime markers found." 
102- STATISTIC (GeneratedWorse, " Number of times worse layout were generated" 
103103STATISTIC (StackSpaceSaved, " Number of bytes saved due to merging slots." 
104104STATISTIC (StackSlotMerged, " Number of stack slot merged." 
105105STATISTIC (EscapedAllocas, " Number of allocas that escaped the lifetime region" 
@@ -400,7 +400,9 @@ class StackColoring {
400400 //  Use to make overlap queries faster
401401 SmallVector<unsigned , 4 > StartLiveness;
402402
403-  uint64_t  SlotPriority = 0 ;
403+  int64_t  SlotPriority = 0 ;
404+ 
405+  unsigned  UseCount = 0 ;
404406
405407 unsigned  Offset = InvalidIdx;
406408
@@ -653,9 +655,11 @@ StackColoring::SlotInfo::dump(const StackColoring *State) const {
653655 dbgs () << " :" 
654656 if  (Offset != InvalidIdx)
655657 dbgs () << "  offset=" 
658+  dbgs () << "  uses=" 
659+  dbgs () << "  prio=" 
656660 if  (State) {
657661 if  (State->MFI ->getObjectAllocation (Slot))
658-  dbgs () << "  \" " MFI ->getObjectAllocation (Slot)->getName ()
662+  dbgs () << "  alloca= \" " MFI ->getObjectAllocation (Slot)->getName ()
659663 << " \" " 
660664 if  (State->MFI ->isSpillSlotObjectIndex (Slot))
661665 dbgs () << "  spill" 
@@ -803,6 +807,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
803807 int  Slot = MO.getIndex ();
804808 if  (Slot < 0 )
805809 continue ;
810+  Slot2Info[Slot].UseCount ++;
806811 if  (!BetweenStartEnd.test (Slot)) {
807812 ConservativeSlots.set (Slot);
808813 }
@@ -1525,35 +1530,24 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
15251530 if  (SlotStack.size () <= 1 )
15261531 return  InvalidIdx;
15271532
1528-  //  This Whole block is only used to try and order the stack, such that the
1529-  //  Slots are processed in an order that helps getting good packing
1530-  {
1531-  //  Find how much usage of every livepoint there is.
1532-  SmallVector<unsigned > CumulatedUsage;
1533-  CumulatedUsage.resize (LivenessSize, 0 );
1534- 
1535-  for  (unsigned  Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1536-  SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1537-  for  (unsigned  Pt : Info.Liveness .set_bits ()) {
1538-  CumulatedUsage[Pt] += Info.Size ;
1539-  }
1540-  }
1533+  //  This logic is optimized for x86_64, it probably needs to be adapted to
1534+  //  other targets to get good code-size/stack-size balance.
1535+  //  Its inspired from X86FrameLowering::orderFrameObjects, but modified weight
1536+  //  in alignments helping with stack size
1537+  auto  IsLower = [&](unsigned  Lhs, unsigned  Rhs) {
1538+  SlotInfo &L = Slot2Info[Lhs];
1539+  SlotInfo &R = Slot2Info[Rhs];
1540+  uint64_t  DensityLScaled = static_cast <uint64_t >(L.UseCount ) *
1541+  static_cast <uint64_t >(R.Size  + Log2 (R.Align ));
1542+  uint64_t  DensityRScaled = static_cast <uint64_t >(R.UseCount ) *
1543+  static_cast <uint64_t >(L.Size  + Log2 (L.Align ));
1544+  return  DensityLScaled < DensityRScaled;
1545+  };
1546+  std::stable_sort (SlotStack.begin (), SlotStack.end (), IsLower);
15411547
1542-  for  (unsigned  Idx = 0 ; Idx < SlotStack.size (); Idx++) {
1543-  SlotInfo &Info = Slot2Info[SlotStack[Idx]];
1544-  for  (unsigned  Pt : Info.Liveness .set_bits ()) {
1545-  //  Since the goal is to minimize the max usage, blocks that are in high
1546-  //  contention areas are given more priority
1547-  Info.SlotPriority  +=
1548-  (uint64_t )CumulatedUsage[Pt] * (uint64_t )CumulatedUsage[Pt] +
1549-  (uint64_t )Info.Size  * (uint64_t )Info.Align .value ();
1550-  }
1551-  }
1552-  std::stable_sort (
1553-  SlotStack.begin (), SlotStack.end (), [&](unsigned  Lhs, unsigned  Rhs) {
1554-  return  Slot2Info[Lhs].SlotPriority  < Slot2Info[Rhs].SlotPriority ;
1555-  });
1556-  }
1548+  int  Prio = 0 ;
1549+  for  (int  Slot : SlotStack)
1550+  Slot2Info[Slot].SlotPriority  = Prio++;
15571551
15581552 SlotInfo *LastQueryLhs = nullptr ;
15591553 SlotInfo *LastQueryRhs = nullptr ;
@@ -1666,24 +1660,27 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
16661660
16671661 Offset = alignTo (Offset, Info.Align );
16681662
1669-  LLVM_DEBUG (dbgs () << " fi#" " @" " ->" 
1670-  if  (PrevSlot == InvalidIdx) dbgs () << " bottom" 
1671-  else  dbgs () << " fi#" dbgs () << " , " 
1663+  LLVM_DEBUG ({
1664+  dbgs () << " fi#" " @" 
1665+  if  (PrevSlot != InvalidIdx)
1666+  dbgs () << " ->" " fi#" 
1667+  dbgs () << " , " 
1668+  });
16721669
16731670 bool  IsBetter = [&] {
1671+  if  (BestIdx == InvalidIdx)
1672+  return  true ;
1673+  SlotInfo &Best = Slot2Info[Candidates[BestIdx]];
16741674 if  (BestOffset != Offset)
16751675 return  BestOffset > Offset;
1676-  SlotInfo &Other = Slot2Info[Candidates[K]];
1677-  if  (Other.Align  != Info.Align )
1678-  return  Other.Align  < Info.Align ;
1679-  if  (Other.Size  != Info.Size )
1680-  return  Other.Size  < Info.Size ;
1681-  if  (Other.SlotPriority  != Info.SlotPriority )
1682-  return  Other.SlotPriority  < Info.SlotPriority ;
1676+  if  (Best.SlotPriority  != Info.SlotPriority )
1677+  return  Best.SlotPriority  < Info.SlotPriority ;
1678+  if  (Best.Align  != Info.Align )
1679+  return  Best.Align  < Info.Align ;
16831680
16841681 //  Both are always stored in Slot2Info, so this is equivalent to
16851682 //  FrameIndex comparaison
1686-  return  &Other  < &Info;
1683+  return  &Best  < &Info;
16871684 }();
16881685
16891686 if  (IsBetter) {
@@ -1726,7 +1723,6 @@ unsigned StackColoring::doMerging(unsigned NumSlots) {
17261723 LLVM_DEBUG (dbgs () << " MergedSize=" "  OrigPesSize=" 
17271724 << OrigPesSize << "  OrigOptSize" " \n " 
17281725 if  (FinalSize >= OrigPesSize) {
1729-  GeneratedWorse++;
17301726 return  InvalidIdx;
17311727 }
17321728
@@ -1774,6 +1770,7 @@ bool StackColoring::run(MachineFunction &Func) {
17741770 Intervals.reserve (NumSlots);
17751771 LiveStarts.resize (NumSlots);
17761772
1773+  Slot2Info.resize (NumSlots);
17771774 unsigned  NumMarkers = collectMarkers (NumSlots);
17781775
17791776 unsigned  TotalSize = 0 ;
@@ -1792,7 +1789,6 @@ bool StackColoring::run(MachineFunction &Func) {
17921789 return  removeAllMarkers ();
17931790 }
17941791
1795-  Slot2Info.resize (NumSlots);
17961792 for  (unsigned  i = 0 ; i < NumSlots; ++i) {
17971793 std::unique_ptr<LiveRange> LI (new  LiveRange ());
17981794 LI->getNextValue (Indexes->getZeroIndex (), VNInfoAllocator);
0 commit comments