Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -671,12 +671,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
}

// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
// memory. They should have been removed by now.
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
// memory. They should have been removed by now, except CFI Saved Reg spills.
static bool allStackObjectsAreDead(const MachineFunction &MF) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
I != E; ++I) {
if (!MFI.isDeadObjectIndex(I))
if (!MFI.isDeadObjectIndex(I)) {
// determineCalleeSaves() might have added the SGPRSpill stack IDs for
// CFI saves into scratch VGPR, ignore them
if (MFI.getStackID(I) == TargetStackID::SGPRSpill &&
FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
continue;
}
return false;
}
}

return true;
Expand All @@ -696,8 +705,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

Register ScratchRsrcReg = MFI->getScratchRSrcReg();

if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
allStackObjectsAreDead(MF.getFrameInfo())))
if (!ScratchRsrcReg ||
(!MRI.isPhysRegUsed(ScratchRsrcReg) && allStackObjectsAreDead(MF)))
return Register();

if (ST.hasSGPRInitBug() ||
Expand Down Expand Up @@ -925,7 +934,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
bool NeedsFlatScratchInit =
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
(!allStackObjectsAreDead(MF) && ST.enableFlatScratch()));

if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
Expand Down Expand Up @@ -1306,6 +1315,11 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
LiveUnits.addReg(Reg);
}
}

// Remove the spill entry created for EXEC. It is needed only for CFISaves in
// the prologue.
if (TRI.isCFISavedRegsSpillEnabled())
FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec());
}

void SIFrameLowering::emitCSRSpillRestores(
Expand Down Expand Up @@ -1789,14 +1803,14 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// can. Any remaining SGPR spills will go to memory, so move them back to the
// default stack.
bool HaveSGPRToVMemSpill =
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ true);
assert(allSGPRSpillsAreDead(MF) &&
"SGPR spill should have been removed in SILowerSGPRSpills");

// FIXME: The other checks should be redundant with allStackObjectsAreDead,
// but currently hasNonSpillStackObjects is set only from source
// allocas. Stack temps produced from legalization are not counted currently.
if (!allStackObjectsAreDead(MFI)) {
if (!allStackObjectsAreDead(MF)) {
assert(RS && "RegScavenger required if spilling");

// Add an emergency spill slot
Expand Down Expand Up @@ -1896,6 +1910,18 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
}

if (TRI->isCFISavedRegsSpillEnabled()) {
Register Exec = TRI->getExec();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) &&
"Re-reserving spill slot for EXEC");
// FIXME: Machine Copy Propagation currently optimizes away the EXEC copy to
// the scratch as we emit it only in the prolog. This optimization should
// not happen for frame related instructions. Until this is fixed ignore
// copy to scratch SGPR.
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC,
/*IncludeScratchCopy=*/false);
}

// hasFP only knows about stack objects that already exist. We're now
// determining the stack slots that will be created, so we have to predict
// them. Stack objects force FP usage with calls.
Expand All @@ -1905,8 +1931,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
//
// FIXME: Is this really hasReservedCallFrame?
const bool WillHaveFP =
FrameInfo.hasCalls() &&
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(MF));

if (WillHaveFP || hasFP(MF)) {
Register FramePtrReg = MFI->getFrameOffsetReg();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
public:
bool requiresStackPointerReference(const MachineFunction &MF) const;

/// If '-amdgpu-spill-cfi-saved-regs' is enabled, emit RA/EXEC spills to
/// a free VGPR (lanes) or memory and corresponding CFI rules.
void emitCFISavedRegSpills(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
LiveRegUnits &LiveRegs,
bool emitSpillsToMem) const;

/// Create a CFI index for CFIInst and build a MachineInstr around it.
MachineInstr *
buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
// free frame index ids by the later pass(es) like "stack slot coloring"
// which in turn could mess-up with the book keeping of "frame index to VGPR
// lane".
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ false);

MadeChange = true;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
}

bool SIMachineFunctionInfo::removeDeadFrameIndices(
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
MachineFunction &MF, bool ResetSGPRSpillStackIDs) {
MachineFrameInfo &MFI = MF.getFrameInfo();
// Remove dead frame indices from function frame, however keep FP & BP since
// spills for them haven't been inserted yet. And also make sure to remove the
// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
}) != PrologEpilogSGPRSpills.end();
}

// Remove if an entry created for \p Reg.
void removePrologEpilogSGPRSpillEntry(Register Reg) {
auto I = find_if(PrologEpilogSGPRSpills,
[&Reg](const auto &Spill) { return Spill.first == Reg; });
if (I == PrologEpilogSGPRSpills.end())
return;

PrologEpilogSGPRSpills.erase(I);
}

const PrologEpilogSGPRSaveRestoreInfo &
getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
Expand Down Expand Up @@ -830,8 +840,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,

/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
/// to the default stack.
bool removeDeadFrameIndices(MachineFrameInfo &MFI,
bool ResetSGPRSpillStackIDs);
bool removeDeadFrameIndices(MachineFunction &MF, bool ResetSGPRSpillStackIDs);

int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
cl::ReallyHidden,
cl::init(true));

static cl::opt<bool> EnableSpillCFISavedRegs(
"amdgpu-spill-cfi-saved-regs",
cl::desc("Enable spilling the registers required for CFI emission"),
cl::ReallyHidden, cl::init(false), cl::ZeroOrMore);

std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;

Expand Down Expand Up @@ -559,6 +564,10 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
return SubRegFromChannelTable[NumRegIndex - 1][Channel];
}

bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const {
return EnableSpillCFISavedRegs;
}

MCRegister
SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
const unsigned Align,
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
return SpillSGPRToVGPR;
}

bool isCFISavedRegsSpillEnabled() const;

/// Return the largest available SGPR aligned to \p Align for the register
/// class \p RC.
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
Expand Down
Loading
Loading