- Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU][NPM] Port AMDGPUMarkLastScratchLoad to NPM #131738
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
optimisan merged 2 commits into main from users/optimisan/03-18-_amdgpu_npm_port_amdgpumarklastscratchload_to_npm Mar 19, 2025
Merged
[AMDGPU][NPM] Port AMDGPUMarkLastScratchLoad to NPM #131738
optimisan merged 2 commits into main from users/optimisan/03-18-_amdgpu_npm_port_amdgpumarklastscratchload_to_npm Mar 19, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode characters
| @llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesThis finishes all passes for the optimized regalloc path. Full diff: https://github.com/llvm/llvm-project/pull/131738.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index f5c2b09c84806..eebc33aea7a86 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -195,7 +195,7 @@ extern char &AMDGPURegBankSelectID; void initializeAMDGPURegBankLegalizePass(PassRegistry &); extern char &AMDGPURegBankLegalizeID; -void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); +void initializeAMDGPUMarkLastScratchLoadLegacyPass(PassRegistry &); extern char &AMDGPUMarkLastScratchLoadID; void initializeSILowerSGPRSpillsLegacyPass(PassRegistry &); @@ -371,6 +371,13 @@ class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> { MachineFunctionAnalysisManager &AM); }; +class AMDGPUMarkLastScratchLoadPass + : public PassInfoMixin<AMDGPUMarkLastScratchLoadPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM); +}; + FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp index ba35a1d417173..218b2bff6bd56 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMarkLastScratchLoad.cpp @@ -25,18 +25,26 @@ using namespace llvm; namespace { -class AMDGPUMarkLastScratchLoad : public MachineFunctionPass { +class AMDGPUMarkLastScratchLoad { private: LiveStacks *LS = nullptr; LiveIntervals *LIS = nullptr; SlotIndexes *SI = nullptr; const SIInstrInfo *SII = nullptr; +public: + AMDGPUMarkLastScratchLoad(LiveStacks *LS, LiveIntervals *LIS, SlotIndexes *SI) + : LS(LS), LIS(LIS), SI(SI) {} + bool run(MachineFunction &MF); +}; + +class AMDGPUMarkLastScratchLoadLegacy : public MachineFunctionPass { public: static char ID; - AMDGPUMarkLastScratchLoad() : MachineFunctionPass(ID) { - initializeAMDGPUMarkLastScratchLoadPass(*PassRegistry::getPassRegistry()); + AMDGPUMarkLastScratchLoadLegacy() : MachineFunctionPass(ID) { + initializeAMDGPUMarkLastScratchLoadLegacyPass( + *PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -56,17 +64,34 @@ class AMDGPUMarkLastScratchLoad : public MachineFunctionPass { } // end anonymous namespace -bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) { +bool AMDGPUMarkLastScratchLoadLegacy::runOnMachineFunction( + MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + auto *LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS(); + auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + auto *SI = &getAnalysis<SlotIndexesWrapperPass>().getSI(); + + return AMDGPUMarkLastScratchLoad(LS, LIS, SI).run(MF); +} + +PreservedAnalyses +AMDGPUMarkLastScratchLoadPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &LS = MFAM.getResult<LiveStacksAnalysis>(MF); + auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF); + auto &SI = MFAM.getResult<SlotIndexesAnalysis>(MF); + + AMDGPUMarkLastScratchLoad(&LS, &LIS, &SI).run(MF); + return PreservedAnalyses::all(); +} + +bool AMDGPUMarkLastScratchLoad::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); if (ST.getGeneration() < AMDGPUSubtarget::GFX12) return false; - LS = &getAnalysis<LiveStacksWrapperLegacy>().getLS(); - LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); - SI = &getAnalysis<SlotIndexesWrapperPass>().getSI(); SII = ST.getInstrInfo(); SlotIndexes &Slots = *LIS->getSlotIndexes(); @@ -130,13 +155,13 @@ bool AMDGPUMarkLastScratchLoad::runOnMachineFunction(MachineFunction &MF) { return Changed; } -char AMDGPUMarkLastScratchLoad::ID = 0; +char AMDGPUMarkLastScratchLoadLegacy::ID = 0; -char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoad::ID; +char &llvm::AMDGPUMarkLastScratchLoadID = AMDGPUMarkLastScratchLoadLegacy::ID; -INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoad, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(AMDGPUMarkLastScratchLoadLegacy, DEBUG_TYPE, "AMDGPU Mark last scratch load", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveStacksWrapperLegacy) -INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoad, DEBUG_TYPE, +INITIALIZE_PASS_END(AMDGPUMarkLastScratchLoadLegacy, DEBUG_TYPE, "AMDGPU Mark last scratch load", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 6832a17c37177..388c390edad6a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS( #endif MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass()) MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this)) +MACHINE_FUNCTION_PASS("amdgpu-mark-last-scratch-load", AMDGPUMarkLastScratchLoadPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass()) MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d0454cce15756..827216f8fde59 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -497,7 +497,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURegBankSelectPass(*PR); initializeAMDGPURegBankLegalizePass(*PR); initializeSILowerWWMCopiesLegacyPass(*PR); - initializeAMDGPUMarkLastScratchLoadPass(*PR); + initializeAMDGPUMarkLastScratchLoadLegacyPass(*PR); initializeSILowerSGPRSpillsLegacyPass(*PR); initializeSIFixSGPRCopiesLegacyPass(*PR); initializeSIFixVGPRCopiesLegacyPass(*PR); diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir index cee45216968df..0a2222522b6a6 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -passes=greedy,amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s --- | define amdgpu_cs void @test_spill_12x32() "amdgpu-num-vgpr"="12" { |
arsenm approved these changes Mar 18, 2025
| @@ -1,5 +1,6 @@ | |||
| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 | |||
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s | |||
| # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -passes=greedy,amdgpu-mark-last-scratch-load | FileCheck -check-prefix=CHECK %s | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically this isn't running the right greedy, but the other line is doing the same
Co-authored-by: Matt Arsenault <Matthew.Arsenault@amd.com>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Add this suggestion to a batch that can be applied as a single commit. This suggestion is invalid because no changes were made to the code. Suggestions cannot be applied while the pull request is closed. Suggestions cannot be applied while viewing a subset of changes. Only one suggestion per line can be applied in a batch. Add this suggestion to a batch that can be applied as a single commit. Applying suggestions on deleted lines is not supported. You must change the existing code in this line in order to create a valid suggestion. Outdated suggestions cannot be applied. This suggestion has been applied or marked resolved. Suggestions cannot be applied from pending reviews. Suggestions cannot be applied on multi-line comments. Suggestions cannot be applied while the pull request is queued to merge. Suggestion cannot be applied right now. Please check back later.

This finishes all passes for the optimized regalloc path.