Skip to content

Commit 6f7c77f

Browse files
authored
[AMDGPU] Check noalias.addrspace in mayAccessScratchThroughFlat (#151319)
PR #149247 made the MD accessible by the backend so we can now leverage it in the memory model. The first use case here is detecting if a flat op can access scratch memory. Benefits both the MemoryLegalizer and InsertWaitCnt.
1 parent bb3066d commit 6f7c77f

File tree

6 files changed

+43
-43
lines changed

6 files changed

+43
-43
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17793,23 +17793,9 @@ atomicSupportedIfLegalIntType(const AtomicRMWInst *RMW) {
1779317793

1779417794
/// Return if a flat address space atomicrmw can access private memory.
1779517795
static bool flatInstrMayAccessPrivate(const Instruction *I) {
17796-
const MDNode *NoaliasAddrSpaceMD =
17797-
I->getMetadata(LLVMContext::MD_noalias_addrspace);
17798-
if (!NoaliasAddrSpaceMD)
17799-
return true;
17800-
17801-
for (unsigned I = 0, E = NoaliasAddrSpaceMD->getNumOperands() / 2; I != E;
17802-
++I) {
17803-
auto *Low = mdconst::extract<ConstantInt>(
17804-
NoaliasAddrSpaceMD->getOperand(2 * I + 0));
17805-
if (Low->getValue().uge(AMDGPUAS::PRIVATE_ADDRESS)) {
17806-
auto *High = mdconst::extract<ConstantInt>(
17807-
NoaliasAddrSpaceMD->getOperand(2 * I + 1));
17808-
return High->getValue().ule(AMDGPUAS::PRIVATE_ADDRESS);
17809-
}
17810-
}
17811-
17812-
return true;
17796+
const MDNode *MD = I->getMetadata(LLVMContext::MD_noalias_addrspace);
17797+
return !MD ||
17798+
!AMDGPU::hasValueInRangeLikeMetadata(*MD, AMDGPUAS::PRIVATE_ADDRESS);
1781317799
}
1781417800

1781517801
TargetLowering::AtomicExpansionKind

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4267,12 +4267,15 @@ bool SIInstrInfo::mayAccessScratchThroughFlat(const MachineInstr &MI) const {
42674267
if (MI.memoperands_empty())
42684268
return true;
42694269

4270-
// TODO (?): Does this need to be taught how to read noalias.addrspace ?
4271-
42724270
// See if any memory operand specifies an address space that involves scratch.
42734271
return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
42744272
unsigned AS = Memop->getAddrSpace();
4275-
return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
4273+
if (AS == AMDGPUAS::FLAT_ADDRESS) {
4274+
const MDNode *MD = Memop->getAAInfo().NoAliasAddrSpace;
4275+
return !MD || !AMDGPU::hasValueInRangeLikeMetadata(
4276+
*MD, AMDGPUAS::PRIVATE_ADDRESS);
4277+
}
4278+
return AS == AMDGPUAS::PRIVATE_ADDRESS;
42764279
});
42774280
}
42784281

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/IR/IntrinsicsAMDGPU.h"
2222
#include "llvm/IR/IntrinsicsR600.h"
2323
#include "llvm/IR/LLVMContext.h"
24+
#include "llvm/IR/Metadata.h"
2425
#include "llvm/MC/MCInstrInfo.h"
2526
#include "llvm/MC/MCRegisterInfo.h"
2627
#include "llvm/MC/MCSubtargetInfo.h"
@@ -1677,6 +1678,29 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size) {
16771678
return Vals;
16781679
}
16791680

1681+
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
1682+
assert((MD.getNumOperands() % 2 == 0) && "invalid number of operands!");
1683+
for (unsigned I = 0, E = MD.getNumOperands() / 2; I != E; ++I) {
1684+
auto Low =
1685+
mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 0))->getValue();
1686+
auto High =
1687+
mdconst::extract<ConstantInt>(MD.getOperand(2 * I + 1))->getValue();
1688+
// There are two types of [A; B) ranges:
1689+
// A < B, e.g. [4; 5) which is a range that only includes 4.
1690+
// A > B, e.g. [5; 4) which is a range that wraps around and includes
1691+
// everything except 4.
1692+
if (Low.ult(High)) {
1693+
if (Low.ule(Val) && High.ugt(Val))
1694+
return true;
1695+
} else {
1696+
if (Low.uge(Val) && High.ult(Val))
1697+
return true;
1698+
}
1699+
}
1700+
1701+
return false;
1702+
}
1703+
16801704
unsigned getVmcntBitMask(const IsaVersion &Version) {
16811705
return (1 << (getVmcntBitWidthLo(Version.Major) +
16821706
getVmcntBitWidthHi(Version.Major))) -

llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class MCInstrInfo;
3535
class MCRegisterClass;
3636
class MCRegisterInfo;
3737
class MCSubtargetInfo;
38+
class MDNode;
3839
class StringRef;
3940
class Triple;
4041
class raw_ostream;
@@ -1064,6 +1065,9 @@ SmallVector<unsigned> getIntegerVecAttribute(const Function &F, StringRef Name,
10641065
std::optional<SmallVector<unsigned>>
10651066
getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
10661067

1068+
/// Checks if \p Val is inside \p MD, a !range-like metadata.
1069+
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1070+
10671071
/// Represents the counter values to wait for in an s_waitcnt instruction.
10681072
///
10691073
/// Large values (including the maximum possible integer) can be used to

llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,20 +39,19 @@ define void @test_flat_store_no_scratch_alloc(ptr %ptr, i32 %val) #0 {
3939
ret void
4040
}
4141

42-
; TODO: handle
4342
define void @test_flat_store_noalias_addrspace(ptr %ptr, i32 %val) {
4443
; GCN-LABEL: test_flat_store_noalias_addrspace:
4544
; GCN: ; %bb.0:
4645
; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
4746
; GCN-NEXT: s_wait_kmcnt 0x0
48-
; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
47+
; GCN-NEXT: flat_store_b32 v[0:1], v2
4948
; GCN-NEXT: s_wait_dscnt 0x0
5049
; GCN-NEXT: s_set_pc_i64 s[30:31]
5150
store i32 %val, ptr %ptr, !noalias.addrspace !{i32 5, i32 6}
5251
ret void
5352
}
5453

55-
; TODO: would be nice to handle too
54+
; TODO: would be nice to handle
5655
define void @test_flat_store_select(ptr addrspace(1) %a, ptr addrspace(3) %b, i1 %cond, i32 %val) {
5756
; GCN-SDAG-LABEL: test_flat_store_select:
5857
; GCN-SDAG: ; %bb.0:

llvm/test/Transforms/AtomicExpand/AMDGPU/expand-cmpxchg-flat-maybe-private.ll

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -308,24 +308,7 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__noalias_addrspace_edge_case0(ptr %pt
308308
define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(ptr %ptr, i64 %val, i64 %swap) {
309309
; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(
310310
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
311-
; CHECK-NEXT: [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
312-
; CHECK-NEXT: br i1 [[IS_PRIVATE]], label %[[ATOMICRMW_PRIVATE:.*]], label %[[ATOMICRMW_GLOBAL:.*]]
313-
; CHECK: [[ATOMICRMW_PRIVATE]]:
314-
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
315-
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 8
316-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], [[VAL]]
317-
; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 [[SWAP]], i64 [[TMP2]]
318-
; CHECK-NEXT: store i64 [[TMP4]], ptr addrspace(5) [[TMP1]], align 8
319-
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP2]], 0
320-
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue { i64, i1 } [[TMP5]], i1 [[TMP3]], 1
321-
; CHECK-NEXT: br label %[[ATOMICRMW_PHI:.*]]
322-
; CHECK: [[ATOMICRMW_GLOBAL]]:
323-
; CHECK-NEXT: [[TMP7:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META0]]
324-
; CHECK-NEXT: br label %[[ATOMICRMW_PHI]]
325-
; CHECK: [[ATOMICRMW_PHI]]:
326-
; CHECK-NEXT: [[RESULT:%.*]] = phi { i64, i1 } [ [[TMP6]], %[[ATOMICRMW_PRIVATE]] ], [ [[TMP7]], %[[ATOMICRMW_GLOBAL]] ]
327-
; CHECK-NEXT: br label %[[ATOMICRMW_END:.*]]
328-
; CHECK: [[ATOMICRMW_END]]:
311+
; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META4:![0-9]+]]
329312
; CHECK-NEXT: ret { i64, i1 } [[RESULT]]
330313
;
331314
%result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !7
@@ -335,7 +318,7 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_6(ptr %ptr, i64 %val, i64 %swap
335318
define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(ptr %ptr, i64 %val, i64 %swap) {
336319
; CHECK-LABEL: define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(
337320
; CHECK-SAME: ptr [[PTR:%.*]], i64 [[VAL:%.*]], i64 [[SWAP:%.*]]) {
338-
; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META4:![0-9]+]]
321+
; CHECK-NEXT: [[RESULT:%.*]] = cmpxchg ptr [[PTR]], i64 [[VAL]], i64 [[SWAP]] syncscope("agent") monotonic seq_cst, align 8, !noalias.addrspace [[META5:![0-9]+]]
339322
; CHECK-NEXT: ret { i64, i1 } [[RESULT]]
340323
;
341324
%result = cmpxchg ptr %ptr, i64 %val, i64 %swap syncscope("agent") monotonic seq_cst, !noalias.addrspace !8
@@ -357,5 +340,6 @@ define { i64, i1 } @cmpxchg_flat_agent_i64__no_2_3_5(ptr %ptr, i64 %val, i64 %sw
357340
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META3:![0-9]+]]}
358341
; CHECK: [[META2]] = !{!"foo", !"bar"}
359342
; CHECK: [[META3]] = !{!"bux", !"baz"}
360-
; CHECK: [[META4]] = !{i32 2, i32 4, i32 5, i32 6}
343+
; CHECK: [[META4]] = !{i32 2, i32 6}
344+
; CHECK: [[META5]] = !{i32 2, i32 4, i32 5, i32 6}
361345
;.

0 commit comments

Comments
 (0)