@@ -63,6 +63,7 @@ enum class SIAtomicScope {
6363 SINGLETHREAD,
6464 WAVEFRONT,
6565 WORKGROUP,
66+ CLUSTER, // Promoted to AGENT on targets without workgroup clusters.
6667 AGENT,
6768 SYSTEM
6869};
@@ -106,6 +107,7 @@ class SIMemOpInfo final {
106107 bool IsCooperative = false ;
107108
108109 SIMemOpInfo (
110+ const GCNSubtarget &ST,
109111 AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
110112 SIAtomicScope Scope = SIAtomicScope::SYSTEM,
111113 SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
@@ -156,6 +158,11 @@ class SIMemOpInfo final {
156158 SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
157159 this ->Scope = std::min (Scope, SIAtomicScope::AGENT);
158160 }
161+
162+ // On targets that have no concept of a workgroup cluster, use
163+ // AGENT scope as a conservatively correct alternative.
164+ if (this ->Scope == SIAtomicScope::CLUSTER && !ST.hasClusters ())
165+ this ->Scope = SIAtomicScope::AGENT;
159166 }
160167
161168public:
@@ -225,6 +232,7 @@ class SIMemOpInfo final {
225232class SIMemOpAccess final {
226233private:
227234 const AMDGPUMachineModuleInfo *MMI = nullptr ;
235+ const GCNSubtarget &ST;
228236
229237 // / Reports unsupported message \p Msg for \p MI to LLVM context.
230238 void reportUnsupported (const MachineBasicBlock::iterator &MI,
@@ -248,7 +256,7 @@ class SIMemOpAccess final {
248256public:
249257 // / Construct class to support accessing the machine memory operands
250258 // / of instructions in the machine function \p MF.
251- SIMemOpAccess (const AMDGPUMachineModuleInfo &MMI);
259+ SIMemOpAccess (const AMDGPUMachineModuleInfo &MMI, const GCNSubtarget &ST );
252260
253261 // / \returns Load info if \p MI is a load operation, "std::nullopt" otherwise.
254262 std::optional<SIMemOpInfo>
@@ -773,6 +781,8 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
773781 return std::tuple (SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true );
774782 if (SSID == MMI->getAgentSSID ())
775783 return std::tuple (SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true );
784+ if (SSID == MMI->getClusterSSID ())
785+ return std::tuple (SIAtomicScope::CLUSTER, SIAtomicAddrSpace::ATOMIC, true );
776786 if (SSID == MMI->getWorkgroupSSID ())
777787 return std::tuple (SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
778788 true );
@@ -788,6 +798,9 @@ SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
788798 if (SSID == MMI->getAgentOneAddressSpaceSSID ())
789799 return std::tuple (SIAtomicScope::AGENT,
790800 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false );
801+ if (SSID == MMI->getClusterOneAddressSpaceSSID ())
802+ return std::tuple (SIAtomicScope::CLUSTER,
803+ SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false );
791804 if (SSID == MMI->getWorkgroupOneAddressSpaceSSID ())
792805 return std::tuple (SIAtomicScope::WORKGROUP,
793806 SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false );
@@ -815,8 +828,9 @@ SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
815828 return SIAtomicAddrSpace::OTHER;
816829}
817830
818- SIMemOpAccess::SIMemOpAccess (const AMDGPUMachineModuleInfo &MMI_)
819- : MMI(&MMI_) {}
831+ SIMemOpAccess::SIMemOpAccess (const AMDGPUMachineModuleInfo &MMI_,
832+ const GCNSubtarget &ST)
833+ : MMI(&MMI_), ST(ST) {}
820834
821835std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO (
822836 const MachineBasicBlock::iterator &MI) const {
@@ -877,7 +891,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
877891 return std::nullopt ;
878892 }
879893 }
880- return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
894+ return SIMemOpInfo (ST, Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
881895 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
882896 IsNonTemporal, IsLastUse, IsCooperative);
883897}
@@ -891,7 +905,7 @@ SIMemOpAccess::getLoadInfo(const MachineBasicBlock::iterator &MI) const {
891905
892906 // Be conservative if there are no memory operands.
893907 if (MI->getNumMemOperands () == 0 )
894- return SIMemOpInfo ();
908+ return SIMemOpInfo (ST );
895909
896910 return constructFromMIWithMMO (MI);
897911}
@@ -905,7 +919,7 @@ SIMemOpAccess::getStoreInfo(const MachineBasicBlock::iterator &MI) const {
905919
906920 // Be conservative if there are no memory operands.
907921 if (MI->getNumMemOperands () == 0 )
908- return SIMemOpInfo ();
922+ return SIMemOpInfo (ST );
909923
910924 return constructFromMIWithMMO (MI);
911925}
@@ -946,8 +960,9 @@ SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const {
946960 if (SynchronizeAS)
947961 OrderingAddrSpace = *SynchronizeAS;
948962
949- return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
950- IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
963+ return SIMemOpInfo (ST, Ordering, Scope, OrderingAddrSpace,
964+ SIAtomicAddrSpace::ATOMIC, IsCrossAddressSpaceOrdering,
965+ AtomicOrdering::NotAtomic);
951966}
952967
953968std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo (
@@ -959,7 +974,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
959974
960975 // Be conservative if there are no memory operands.
961976 if (MI->getNumMemOperands () == 0 )
962- return SIMemOpInfo ();
977+ return SIMemOpInfo (ST );
963978
964979 return constructFromMIWithMMO (MI);
965980}
@@ -2377,6 +2392,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
23772392 switch (Scope) {
23782393 case SIAtomicScope::SYSTEM:
23792394 case SIAtomicScope::AGENT:
2395+ case SIAtomicScope::CLUSTER:
23802396 if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
23812397 LOADCnt |= true ;
23822398 if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
@@ -2413,6 +2429,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
24132429 switch (Scope) {
24142430 case SIAtomicScope::SYSTEM:
24152431 case SIAtomicScope::AGENT:
2432+ case SIAtomicScope::CLUSTER:
24162433 case SIAtomicScope::WORKGROUP:
24172434 // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
24182435 // not needed as LDS operations for all waves are executed in a total
@@ -2495,6 +2512,9 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
24952512 case SIAtomicScope::AGENT:
24962513 ScopeImm = AMDGPU::CPol::SCOPE_DEV;
24972514 break ;
2515+ case SIAtomicScope::CLUSTER:
2516+ ScopeImm = AMDGPU::CPol::SCOPE_SE;
2517+ break ;
24982518 case SIAtomicScope::WORKGROUP:
24992519 // GFX12.0:
25002520 // In WGP mode the waves of a work-group can be executing on either CU of
@@ -2565,6 +2585,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
25652585 .addImm (AMDGPU::CPol::SCOPE_DEV);
25662586 }
25672587 break ;
2588+ case SIAtomicScope::CLUSTER:
25682589 case SIAtomicScope::WORKGROUP:
25692590 // No WB necessary, but we still have to wait.
25702591 break ;
@@ -2649,11 +2670,8 @@ bool SIGfx12CacheControl::finalizeStore(MachineInstr &MI, bool Atomic) const {
26492670 const unsigned Scope = CPol->getImm () & CPol::SCOPE;
26502671
26512672 // GFX12.0 only: Extra waits needed before system scope stores.
2652- if (!ST.hasGFX1250Insts ()) {
2653- if (!Atomic && Scope == CPol::SCOPE_SYS)
2654- return insertWaitsBeforeSystemScopeStore (MI);
2655- return Changed;
2656- }
2673+ if (!ST.hasGFX1250Insts () && !Atomic && Scope == CPol::SCOPE_SYS)
2674+ Changed |= insertWaitsBeforeSystemScopeStore (MI.getIterator ());
26572675
26582676 return Changed;
26592677}
@@ -2684,6 +2702,9 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
26842702 case SIAtomicScope::AGENT:
26852703 Changed |= setScope (MI, AMDGPU::CPol::SCOPE_DEV);
26862704 break ;
2705+ case SIAtomicScope::CLUSTER:
2706+ Changed |= setScope (MI, AMDGPU::CPol::SCOPE_SE);
2707+ break ;
26872708 case SIAtomicScope::WORKGROUP:
26882709 // In workgroup mode, SCOPE_SE is needed as waves can executes on
26892710 // different CUs that access different L0s.
@@ -2930,8 +2951,8 @@ SIMemoryLegalizerPass::run(MachineFunction &MF,
29302951bool SIMemoryLegalizer::run (MachineFunction &MF) {
29312952 bool Changed = false ;
29322953
2933- SIMemOpAccess MOA (MMI.getObjFileInfo <AMDGPUMachineModuleInfo>());
29342954 const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
2955+ SIMemOpAccess MOA (MMI.getObjFileInfo <AMDGPUMachineModuleInfo>(), ST);
29352956 CC = SICacheControl::create (ST);
29362957
29372958 for (auto &MBB : MF) {
0 commit comments