Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3359,8 +3359,8 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
mlir::Value barrier = convertPtrToNVVMSpace(
builder, loc, fir::getBase(args[0]), mlir::NVVM::NVVMMemorySpace::Shared);
mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier,
fir::getBase(args[1]), {});
mlir::NVVM::MBarrierInitOp::create(builder, loc, barrier,
fir::getBase(args[1]), {});
auto kind = mlir::NVVM::ProxyKindAttr::get(
builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
auto space = mlir::NVVM::SharedSpaceAttr::get(
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/CUDA/cuda-device-proc.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ end subroutine
! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
! CHECK: nvvm.mbarrier.init %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}

! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
Expand Down
71 changes: 28 additions & 43 deletions mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,8 @@ def NVVM_PMEventOp : NVVM_PTXBuilder_Op<"pmevent">,

/// mbarrier.init instruction with generic pointer type
def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">,
Arguments<(ins LLVM_AnyPointer:$addr, I32:$count, PtxPredicate:$predicate)> {
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
I32:$count, PtxPredicate:$predicate)> {
let summary = "MBarrier Initialization Op";
let description = [{
The `nvvm.mbarrier.init` operation initializes an *mbarrier object* at the specified
Expand All @@ -592,48 +593,35 @@ def NVVM_MBarrierInitOp : NVVM_PTXBuilder_Op<"mbarrier.init">,
- Transaction count (tx-count): 0

The operation takes the following operands:
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
addressing, but the address must still be in the shared memory space.
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
must be a pointer to generic or shared::cta memory. When it is generic, the
underlying address must be within the shared::cta memory space; otherwise
the behavior is undefined.
- `count`: Integer specifying the number of threads that will participate in barrier
synchronization. Must be in the range [1, 2²⁰ - 1].
- `predicate`: Optional predicate for conditional execution.

[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init)
}];
string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init, {$addr, $count});
}];
let assemblyFormat = "$addr `,` $count (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)";

let extraClassDeclaration = [{
bool hasIntrinsic() { if(getPredicate()) return false; return true; }
}];
let extraClassDefinition = [{
std::string $cppClass::getPtx() { return std::string("mbarrier.init.b64 [%0], %1;"); }
}];
}

/// mbarrier.init instruction with shared pointer type
def NVVM_MBarrierInitSharedOp : NVVM_PTXBuilder_Op<"mbarrier.init.shared", [NVVMRequiresSM<80>, DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>,
Arguments<(ins LLVM_PointerShared:$addr, I32:$count, PtxPredicate:$predicate)> {
let summary = "Shared MBarrier Initialization Op";
let description = [{
This Op is the same as `nvvm.mbarrier.init` except that the *mbarrier object*
should be accessed using a shared-memory pointer instead of a generic-memory pointer.

[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-init)
static mlir::NVVM::IDArgPair
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase& builder);
}];

string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_init_shared, {$addr, $count});
}];
let assemblyFormat = "$addr `,` $count (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)";
let extraClassDeclaration = "bool hasIntrinsic() { return !getPredicate(); }";
let extraClassDefinition = [{
std::string $cppClass::getPtx() { return std::string("mbarrier.init.shared.b64 [%0], %1;"); }
auto [id, args] = NVVM::MBarrierInitOp::getIntrinsicIDAndArgs(
*op, moduleTranslation, builder);
createIntrinsicCall(builder, id, args);
}];
}

def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
Arguments<(ins LLVM_AnyPointer:$addr)> {
Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
let summary = "MBarrier Invalidation Operation";
let description = [{
The `nvvm.mbarrier.inval` operation invalidates an *mbarrier object* at the
Expand All @@ -644,30 +632,27 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
It is undefined behavior if the *mbarrier object* is already invalid.

The operation takes the following operand:
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
addressing, but the address must still be in the shared memory space.
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
must be a pointer to generic or shared::cta memory. When it is generic, the
underlying address must be within the shared::cta memory space; otherwise
the behavior is undefined.

[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-inval)
}];
string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_inval, {$addr});
}];
let assemblyFormat = "$addr attr-dict `:` type(operands)";
}

def NVVM_MBarrierInvalSharedOp : NVVM_Op<"mbarrier.inval.shared">,
Arguments<(ins LLVM_PointerShared:$addr)> {
let summary = "Shared MBarrier Invalidation Operation";
let description = [{
This Op is the same as `nvvm.mbarrier.inval` except that the *mbarrier object*
should be accessed using a shared-memory pointer instead of a generic-memory pointer.
let assemblyFormat = "$addr attr-dict `:` type(operands)";

[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-inval)
let extraClassDeclaration = [{
static mlir::NVVM::IDArgPair
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::IRBuilderBase& builder);
}];

string llvmBuilder = [{
createIntrinsicCall(builder, llvm::Intrinsic::nvvm_mbarrier_inval_shared, {$addr});
auto [id, args] = NVVM::MBarrierInvalOp::getIntrinsicIDAndArgs(
*op, moduleTranslation, builder);
createIntrinsicCall(builder, id, args);
}];
let assemblyFormat = "$addr attr-dict `:` type(operands)";
}

def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
Expand Down
9 changes: 2 additions & 7 deletions mlir/lib/Conversion/NVGPUToNVVM/NVGPUToNVVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -846,13 +846,8 @@ struct NVGPUMBarrierInitLowering
Value barrier = getMbarrierPtr(b, mbarrierType, adaptor.getBarriers(),
adaptor.getMbarId(), rewriter);
Value count = truncToI32(b, adaptor.getCount());
if (isMbarrierShared(mbarrierType)) {
rewriter.replaceOpWithNewOp<NVVM::MBarrierInitSharedOp>(
op, barrier, count, adaptor.getPredicate());
} else {
rewriter.replaceOpWithNewOp<NVVM::MBarrierInitOp>(op, barrier, count,
adaptor.getPredicate());
}
rewriter.replaceOpWithNewOp<NVVM::MBarrierInitOp>(op, barrier, count,
adaptor.getPredicate());
return success();
}
};
Expand Down
43 changes: 43 additions & 0 deletions mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1607,10 +1607,53 @@ void Tcgen05MmaSmemDescOp::createSmemDescriptor(Operation &op,
mt.mapValue(thisOp.getRes()) = smemDesc;
}

//===----------------------------------------------------------------------===//
// getPtx methods
//===----------------------------------------------------------------------===//

std::string NVVM::MBarrierInitOp::getPtx() {
unsigned addressSpace =
llvm::cast<LLVM::LLVMPointerType>(getAddr().getType()).getAddressSpace();
return (addressSpace == NVVMMemorySpace::Shared)
? std::string("mbarrier.init.shared.b64 [%0], %1;")
: std::string("mbarrier.init.b64 [%0], %1;");
}

//===----------------------------------------------------------------------===//
// getIntrinsicID/getIntrinsicIDAndArgs methods
//===----------------------------------------------------------------------===//

mlir::NVVM::IDArgPair MBarrierInitOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInitOp>(op);
unsigned addressSpace =
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
.getAddressSpace();
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
? llvm::Intrinsic::nvvm_mbarrier_init_shared
: llvm::Intrinsic::nvvm_mbarrier_init;

// Fill the Intrinsic Args
llvm::SmallVector<llvm::Value *> args;
args.push_back(mt.lookupValue(thisOp.getAddr()));
args.push_back(mt.lookupValue(thisOp.getCount()));

return {id, std::move(args)};
}

mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierInvalOp>(op);
unsigned addressSpace =
llvm::cast<LLVM::LLVMPointerType>(thisOp.getAddr().getType())
.getAddressSpace();
llvm::Intrinsic::ID id = (addressSpace == NVVMMemorySpace::Shared)
? llvm::Intrinsic::nvvm_mbarrier_inval_shared
: llvm::Intrinsic::nvvm_mbarrier_inval;

return {id, {mt.lookupValue(thisOp.getAddr())}};
}

#define CP_ASYNC_ID_IMPL(mod, size, suffix) \
llvm::Intrinsic::nvvm_cp_async_##mod##_shared_global_##size##suffix

Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ func.func @mbarrier() {
// CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[base:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr:.+]] = llvm.getelementptr %[[base]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
// CHECK: nvvm.mbarrier.init.shared %[[barPtr]]
// CHECK: nvvm.mbarrier.init %[[barPtr]]
nvgpu.mbarrier.init %barrier[%c0], %num_threads : !barrierType

// CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
Expand Down Expand Up @@ -516,7 +516,7 @@ func.func @mbarrier_nocomplete() {
// CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[base:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr:.+]] = llvm.getelementptr %[[base]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
// CHECK: nvvm.mbarrier.init.shared %[[barPtr]]
// CHECK: nvvm.mbarrier.init %[[barPtr]]
nvgpu.mbarrier.init %barrier[%c0], %num_threads : !barrierType

// CHECK: %[[base2:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
Expand Down Expand Up @@ -592,7 +592,7 @@ func.func @mbarrier_txcount() {
// CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[base:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr:.+]] = llvm.getelementptr %[[base]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
// CHECK: nvvm.mbarrier.init.shared %[[barPtr]]
// CHECK: nvvm.mbarrier.init %[[barPtr]]
nvgpu.mbarrier.init %barrier[%c0], %num_threads : !barrierType

%tidxreg = nvvm.read.ptx.sreg.tid.x : i32
Expand Down Expand Up @@ -643,7 +643,7 @@ func.func @mbarrier_txcount_pred() {
// CHECK: %[[barStr:.+]] = builtin.unrealized_conversion_cast %[[barMemref]] : memref<1xi64, 3> to !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[base:.+]] = llvm.extractvalue %[[barStr]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK: %[[barPtr:.+]] = llvm.getelementptr %[[base]][%[[mid]]] : (!llvm.ptr<3>, i64) -> !llvm.ptr<3>, i64
// CHECK: nvvm.mbarrier.init.shared %[[barPtr]], {{.*}}, predicate = %[[P]]
// CHECK: nvvm.mbarrier.init %[[barPtr]], {{.*}}, predicate = %[[P]]
nvgpu.mbarrier.init %barrier[%c0], %mine, predicate = %pred : !barrierType

%txcount = arith.constant 256 : index
Expand Down
2 changes: 1 addition & 1 deletion mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
// CHECK-LABEL: @init_mbarrier
llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %count : i32, %pred : i1) {
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.shared.b64 [$0], $1;", "r,r,b"
nvvm.mbarrier.init.shared %barrier, %count, predicate = %pred : !llvm.ptr<3>, i32, i1
nvvm.mbarrier.init %barrier, %count, predicate = %pred : !llvm.ptr<3>, i32, i1
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.init.b64 [$0], $1;", "l,r,b"
nvvm.mbarrier.init %barrier_gen, %count, predicate = %pred : !llvm.ptr, i32, i1
llvm.return
Expand Down
8 changes: 4 additions & 4 deletions mlir/test/Dialect/LLVMIR/nvvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -419,8 +419,8 @@ llvm.func private @mbarrier_init_generic(%barrier: !llvm.ptr) {

llvm.func private @mbarrier_init_shared(%barrier: !llvm.ptr<3>) {
%count = nvvm.read.ptx.sreg.ntid.x : i32
// CHECK: nvvm.mbarrier.init.shared %{{.*}}, %{{.*}} : !llvm.ptr<3>, i32
nvvm.mbarrier.init.shared %barrier, %count : !llvm.ptr<3>, i32
// CHECK: nvvm.mbarrier.init %{{.*}}, %{{.*}} : !llvm.ptr<3>, i32
nvvm.mbarrier.init %barrier, %count : !llvm.ptr<3>, i32
llvm.return
}

Expand All @@ -433,8 +433,8 @@ llvm.func private @mbarrier_inval_generic(%barrier: !llvm.ptr) {


llvm.func private @mbarrier_inval_shared(%barrier: !llvm.ptr<3>) {
// CHECK: nvvm.mbarrier.inval.shared %{{.*}} : !llvm.ptr<3>
nvvm.mbarrier.inval.shared %barrier : !llvm.ptr<3>
// CHECK: nvvm.mbarrier.inval %{{.*}} : !llvm.ptr<3>
nvvm.mbarrier.inval %barrier : !llvm.ptr<3>
llvm.return
}

Expand Down