Skip to content

Commit 86bdbf7

Browse files
Combine free and busy register allocation (#45135)
* Combine free and busy allocation Perform register allocation for a given RefPosition with a single traversal of the registers, whether free or busy. * More refactoring * More refactoring to separate evaluation of heursitics. * More refactoring to simplify configuration. Fix one source of arm32 double diffs. * Delete unused `registerIsAvailable` method Fix `nextPhysRefLocation` computation for Arm32 * Use `UNIT_MAX` not `MAXUINT` * clear constantReg when interval is moved * Fix merge issues * Fix weight * Fix spill cost update for UpperVectors * Fix a bug in reload of multireg call on Arm64 * FixedReg fixes * Review feedback and other cleanup * Call clearConstanReg before nulling out the interval, so that we can correctly handling doubles on ARM * Minor cleanup * Formatting changes * Other minor pending fixes Co-authored-by: Kunal Pathak <Kunal.Pathak@microsoft.com>
1 parent 216f96b commit 86bdbf7

File tree

9 files changed

+1631
-1249
lines changed

9 files changed

+1631
-1249
lines changed

src/coreclr/src/jit/codegenarmarch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1405,7 +1405,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode)
14051405
for (int i = regCount - 1; i >= 0; --i)
14061406
{
14071407
var_types type = op1->gtSkipReloadOrCopy()->GetRegTypeByIndex(i);
1408-
regNumber reg = op1->GetRegByIndex(i);
1408+
regNumber reg = actualOp1->GetRegByIndex(i);
14091409
if (op1->IsCopyOrReload())
14101410
{
14111411
// GT_COPY/GT_RELOAD will have valid reg for those positions

src/coreclr/src/jit/lsra.cpp

Lines changed: 1257 additions & 1173 deletions
Large diffs are not rendered by default.

src/coreclr/src/jit/lsra.h

Lines changed: 220 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -471,12 +471,11 @@ class RegRecord : public Referenceable
471471
public:
472472
RegRecord()
473473
{
474-
assignedInterval = nullptr;
475-
previousInterval = nullptr;
476-
regNum = REG_NA;
477-
isCalleeSave = false;
478-
registerType = IntRegisterType;
479-
isBusyUntilNextKill = false;
474+
assignedInterval = nullptr;
475+
previousInterval = nullptr;
476+
regNum = REG_NA;
477+
isCalleeSave = false;
478+
registerType = IntRegisterType;
480479
}
481480

482481
void init(regNumber reg)
@@ -511,8 +510,6 @@ class RegRecord : public Referenceable
511510
void tinyDump();
512511
#endif // DEBUG
513512

514-
bool isFree();
515-
516513
// RefPosition * getNextRefPosition();
517514
// LsraLocation getNextRefLocation();
518515

@@ -528,15 +525,10 @@ class RegRecord : public Referenceable
528525
// assignedInterval becomes inactive.
529526
Interval* previousInterval;
530527

531-
regNumber regNum;
532-
bool isCalleeSave;
533-
RegisterType registerType;
534-
// This register must be considered busy until the next time it is explicitly killed.
535-
// This is used so that putarg_reg can avoid killing its lclVar source, while avoiding
536-
// the problem with the reg becoming free if the last-use is encountered before the call.
537-
bool isBusyUntilNextKill;
538-
539-
bool conflictingFixedRegReference(RefPosition* refPosition);
528+
regNumber regNum;
529+
bool isCalleeSave;
530+
RegisterType registerType;
531+
unsigned char regOrder;
540532
};
541533

542534
inline bool leafInRange(GenTree* leaf, int lower, int upper)
@@ -976,18 +968,16 @@ class LinearScan : public LinearScanInterface
976968
bool isSecondHalfReg(RegRecord* regRec, Interval* interval);
977969
RegRecord* getSecondHalfRegRec(RegRecord* regRec);
978970
RegRecord* findAnotherHalfRegRec(RegRecord* regRec);
979-
bool canSpillDoubleReg(RegRecord* physRegRecord,
980-
LsraLocation refLocation,
981-
BasicBlock::weight_t* recentAssignedRefWeight);
971+
bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation);
982972
void unassignDoublePhysReg(RegRecord* doubleRegRecord);
983973
#endif
984974
void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType);
985975
void updatePreviousInterval(RegRecord* reg, Interval* interval, RegisterType regType);
986976
bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval);
987977
bool isAssignedToInterval(Interval* interval, RegRecord* regRec);
988978
bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation);
989-
bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, BasicBlock::weight_t* recentAssignedRefWeight);
990-
bool isRegInUse(RegRecord* regRec, RefPosition* refPosition);
979+
bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation);
980+
float getSpillWeight(RegRecord* physRegRecord);
991981

992982
// insert refpositions representing prolog zero-inits which will be added later
993983
void insertZeroInitRefPositions();
@@ -1056,11 +1046,7 @@ class LinearScan : public LinearScanInterface
10561046
regMaskTP allSIMDRegs();
10571047
regMaskTP internalFloatRegCandidates();
10581048

1059-
bool registerIsFree(regNumber regNum, RegisterType regType);
1060-
bool registerIsAvailable(RegRecord* physRegRecord,
1061-
LsraLocation currentLoc,
1062-
LsraLocation* nextRefLocationPtr,
1063-
RegisterType regType);
1049+
void makeRegisterInactive(RegRecord* physRegRecord);
10641050
void freeRegister(RegRecord* physRegRecord);
10651051
void freeRegisters(regMaskTP regsToFree);
10661052

@@ -1143,15 +1129,12 @@ class LinearScan : public LinearScanInterface
11431129
* Register management
11441130
****************************************************************************/
11451131
RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition);
1146-
regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition);
1147-
regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable);
1132+
1133+
regNumber allocateReg(Interval* current, RefPosition* refPosition);
11481134
regNumber assignCopyReg(RefPosition* refPosition);
11491135

11501136
bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition);
1151-
bool isSpillCandidate(Interval* current,
1152-
RefPosition* refPosition,
1153-
RegRecord* physRegRecord,
1154-
LsraLocation& nextLocation);
1137+
bool isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord);
11551138
void checkAndAssignInterval(RegRecord* regRec, Interval* interval);
11561139
void assignPhysReg(RegRecord* regRec, Interval* interval);
11571140
void assignPhysReg(regNumber reg, Interval* interval)
@@ -1160,7 +1143,6 @@ class LinearScan : public LinearScanInterface
11601143
}
11611144

11621145
bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType));
1163-
bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType));
11641146
void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition);
11651147
void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType));
11661148
void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition);
@@ -1176,6 +1158,59 @@ class LinearScan : public LinearScanInterface
11761158

11771159
void spillGCRefs(RefPosition* killRefPosition);
11781160

1161+
/*****************************************************************************
1162+
* Register selection
1163+
****************************************************************************/
1164+
regMaskTP getFreeCandidates(regMaskTP candidates, var_types regType)
1165+
{
1166+
regMaskTP result = candidates & m_AvailableRegs;
1167+
#ifdef TARGET_ARM
1168+
// For TYP_DOUBLE on ARM, we can only use register for which the odd half is
1169+
// also available.
1170+
if (regType == TYP_DOUBLE)
1171+
{
1172+
result &= (m_AvailableRegs >> 1);
1173+
}
1174+
#endif // TARGET_ARM
1175+
return result;
1176+
}
1177+
1178+
struct registerSelector
1179+
{
1180+
regMaskTP candidates;
1181+
int score;
1182+
#ifdef TARGET_ARM
1183+
var_types regType;
1184+
#endif // TARGET_ARM
1185+
1186+
// Apply a simple mask-based selection heuristic, and return 'true' if we now have a single candidate.
1187+
bool applySelection(int selectionScore, regMaskTP selectionCandidates)
1188+
{
1189+
regMaskTP newCandidates = candidates & selectionCandidates;
1190+
if (newCandidates != RBM_NONE)
1191+
{
1192+
score += selectionScore;
1193+
candidates = newCandidates;
1194+
return isSingleRegister(candidates);
1195+
}
1196+
return false;
1197+
}
1198+
1199+
// Select a single register, if it is in the candidate set.
1200+
// Return true if so.
1201+
bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate)
1202+
{
1203+
assert(isSingleRegister(selectionCandidate));
1204+
regMaskTP newCandidates = candidates & selectionCandidate;
1205+
if (newCandidates != RBM_NONE)
1206+
{
1207+
candidates = newCandidates;
1208+
return true;
1209+
}
1210+
return false;
1211+
}
1212+
};
1213+
11791214
/*****************************************************************************
11801215
* For Resolution phase
11811216
****************************************************************************/
@@ -1316,8 +1351,7 @@ class LinearScan : public LinearScanInterface
13161351
// Allocation decisions
13171352
LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED,
13181353
LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG,
1319-
LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG,
1320-
LSRA_EVENT_REUSE_REG,
1354+
LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, LSRA_EVENT_REUSE_REG,
13211355
};
13221356
void dumpLsraAllocationEvent(LsraDumpEvent event,
13231357
Interval* interval = nullptr,
@@ -1488,6 +1522,154 @@ class LinearScan : public LinearScanInterface
14881522
VARSET_TP largeVectorCalleeSaveCandidateVars;
14891523
#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE
14901524

1525+
//-----------------------------------------------------------------------
1526+
// Register status
1527+
//-----------------------------------------------------------------------
1528+
1529+
regMaskTP m_AvailableRegs;
1530+
regNumber getRegForType(regNumber reg, var_types regType)
1531+
{
1532+
#ifdef TARGET_ARM
1533+
if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg))
1534+
{
1535+
reg = REG_PREV(reg);
1536+
}
1537+
#endif // TARGET_ARM
1538+
return reg;
1539+
}
1540+
1541+
regMaskTP getRegMask(regNumber reg, var_types regType)
1542+
{
1543+
reg = getRegForType(reg, regType);
1544+
regMaskTP regMask = genRegMask(reg);
1545+
#ifdef TARGET_ARM
1546+
if (regType == TYP_DOUBLE)
1547+
{
1548+
assert(genIsValidDoubleReg(reg));
1549+
regMask |= (regMask << 1);
1550+
}
1551+
#endif // TARGET_ARM
1552+
return regMask;
1553+
}
1554+
1555+
void resetAvailableRegs()
1556+
{
1557+
m_AvailableRegs = (availableIntRegs | availableFloatRegs);
1558+
m_RegistersWithConstants = RBM_NONE;
1559+
}
1560+
1561+
bool isRegAvailable(regNumber reg, var_types regType)
1562+
{
1563+
regMaskTP regMask = getRegMask(reg, regType);
1564+
return (m_AvailableRegs & regMask) == regMask;
1565+
}
1566+
void setRegsInUse(regMaskTP regMask)
1567+
{
1568+
m_AvailableRegs &= ~regMask;
1569+
}
1570+
void setRegInUse(regNumber reg, var_types regType)
1571+
{
1572+
regMaskTP regMask = getRegMask(reg, regType);
1573+
setRegsInUse(regMask);
1574+
}
1575+
void makeRegsAvailable(regMaskTP regMask)
1576+
{
1577+
m_AvailableRegs |= regMask;
1578+
}
1579+
void makeRegAvailable(regNumber reg, var_types regType)
1580+
{
1581+
regMaskTP regMask = getRegMask(reg, regType);
1582+
makeRegsAvailable(regMask);
1583+
}
1584+
1585+
void clearNextIntervalRef(regNumber reg, var_types regType);
1586+
void updateNextIntervalRef(regNumber reg, Interval* interval);
1587+
1588+
void clearSpillCost(regNumber reg, var_types regType);
1589+
void updateSpillCost(regNumber reg, Interval* interval);
1590+
1591+
regMaskTP m_RegistersWithConstants;
1592+
void clearConstantReg(regNumber reg, var_types regType)
1593+
{
1594+
m_RegistersWithConstants &= ~getRegMask(reg, regType);
1595+
}
1596+
void setConstantReg(regNumber reg, var_types regType)
1597+
{
1598+
m_RegistersWithConstants |= getRegMask(reg, regType);
1599+
}
1600+
bool isRegConstant(regNumber reg, var_types regType)
1601+
{
1602+
reg = getRegForType(reg, regType);
1603+
regMaskTP regMask = getRegMask(reg, regType);
1604+
return (m_RegistersWithConstants & regMask) == regMask;
1605+
}
1606+
regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition);
1607+
1608+
regMaskTP fixedRegs;
1609+
LsraLocation nextFixedRef[REG_COUNT];
1610+
void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition);
1611+
LsraLocation getNextFixedRef(regNumber regNum, var_types regType)
1612+
{
1613+
LsraLocation loc = nextFixedRef[regNum];
1614+
#ifdef TARGET_ARM
1615+
if (regType == TYP_DOUBLE)
1616+
{
1617+
loc = Min(loc, nextFixedRef[regNum + 1]);
1618+
}
1619+
#endif
1620+
return loc;
1621+
}
1622+
1623+
LsraLocation nextIntervalRef[REG_COUNT];
1624+
LsraLocation getNextIntervalRef(regNumber regNum, var_types regType)
1625+
{
1626+
LsraLocation loc = nextIntervalRef[regNum];
1627+
#ifdef TARGET_ARM
1628+
if (regType == TYP_DOUBLE)
1629+
{
1630+
loc = Min(loc, nextIntervalRef[regNum + 1]);
1631+
}
1632+
#endif
1633+
return loc;
1634+
}
1635+
float spillCost[REG_COUNT];
1636+
1637+
regMaskTP regsBusyUntilKill;
1638+
regMaskTP regsInUseThisLocation;
1639+
regMaskTP regsInUseNextLocation;
1640+
bool isRegBusy(regNumber reg, var_types regType)
1641+
{
1642+
regMaskTP regMask = getRegMask(reg, regType);
1643+
return (regsBusyUntilKill & regMask) != RBM_NONE;
1644+
}
1645+
void setRegBusyUntilKill(regNumber reg, var_types regType)
1646+
{
1647+
regsBusyUntilKill |= getRegMask(reg, regType);
1648+
}
1649+
void clearRegBusyUntilKill(regNumber reg)
1650+
{
1651+
regsBusyUntilKill &= ~genRegMask(reg);
1652+
}
1653+
1654+
bool isRegInUse(regNumber reg, var_types regType)
1655+
{
1656+
regMaskTP regMask = getRegMask(reg, regType);
1657+
return (regsInUseThisLocation & regMask) != RBM_NONE;
1658+
}
1659+
1660+
void resetRegState()
1661+
{
1662+
resetAvailableRegs();
1663+
regsBusyUntilKill = RBM_NONE;
1664+
}
1665+
1666+
bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition);
1667+
1668+
// This method should not be used and is here to retain old behavior.
1669+
// It should be replaced by isRegAvailable().
1670+
// See comment in allocateReg();
1671+
bool isFree(RegRecord* regRecord);
1672+
14911673
//-----------------------------------------------------------------------
14921674
// Build methods
14931675
//-----------------------------------------------------------------------
@@ -1551,7 +1733,7 @@ class LinearScan : public LinearScanInterface
15511733

15521734
int BuildSimple(GenTree* tree);
15531735
int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1554-
int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE);
1736+
int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, regMaskTP candidates = RBM_NONE);
15551737
int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE);
15561738
int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE);
15571739
void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs);
@@ -1728,7 +1910,7 @@ class Interval : public Referenceable
17281910
// True if this interval is defined by a putArg, whose source is a non-last-use lclVar.
17291911
// During allocation, this flag will be cleared if the source is not already in the required register.
17301912
// Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as
1731-
// isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call.
1913+
// isBusyUntilKill, so that it won't be reused if the lclVar goes dead before the call.
17321914
bool isSpecialPutArg : 1;
17331915

17341916
// True if this interval interferes with a call.

src/coreclr/src/jit/lsraarm64.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,8 +1098,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
10981098
{
10991099
if (isRMW)
11001100
{
1101-
srcCount += BuildDelayFreeUses(intrin.op2);
1102-
srcCount += BuildDelayFreeUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS);
1101+
srcCount += BuildDelayFreeUses(intrin.op2, nullptr);
1102+
srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS);
11031103
}
11041104
else
11051105
{

0 commit comments

Comments
 (0)