So, I was trying with this MLIR code:
module { func @matmul_linalg(%A: memref<8x8xf32>, %B: memref<8x8xf32>, %C: memref<8x8xf32>) { linalg.matmul ins(%A, %B : memref<8x8xf32>, memref<8x8xf32>) outs(%C: memref<8x8xf32>) return } func @main() { %A = memref.alloc() : memref<8x8xf32> %B = memref.alloc() : memref<8x8xf32> %C = memref.alloc() : memref<8x8xf32> %cf1 = constant 1.0 : f32 linalg.fill(%A, %cf1) : memref<8x8xf32>, f32 linalg.fill(%B, %cf1) : memref<8x8xf32>, f32 linalg.fill(%C, %cf1) : memref<8x8xf32>, f32 call @matmul_linalg(%A, %B, %C) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> () return } } and this is my pass:
mlir-opt matmul-gpu-02.mlir.in \ --linalg-tile-to-parallel-loops="linalg-tile-sizes=4,2" \ --convert-linalg-to-parallel-loops \ --test-gpu-greedy-parallel-loop-mapping \ --convert-parallel-loops-to-gpu \ --gpu-kernel-outlining \ --lower-affine \ --convert-scf-to-std \ --canonicalize \ --pass-pipeline="gpu.module(strip-debuginfo, convert-gpu-to-nvvm, gpu-to-cubin)" \ --gpu-to-llvm 2>&1 >matmul-gpu-02.mlir.out and this is how I’m generating the object:
mlir-translate matmul-gpu-02.mlir.out --mlir-to-llvmir | opt -O3 -S | llc -O3 | as - -o matmul-gpu-02.mlir.o I didn’t get any complain up to this point, but when I was trying to generate the executable –
clang++-11 matmul-gpu-02.mlir.o -lcuda \ $HOME/opt/llvm/lib/libmlir_cuda_runtime.so \ $HOME/opt/llvm/lib/libmlir_runner_utils.so \ -o matmul-gpu-02 I get these errors –
'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuModuleUnload(module)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuModuleLoadData(&module, data)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuModuleGetFunction(&function, module, name)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING)' failed with 'CUDA_ERROR_ILLEGAL_ADDRESS' 'cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, smem, stream, params, extra)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamSynchronize(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuStreamDestroy(stream)' failed with 'CUDA_ERROR_INVALID_HANDLE' 'cuModuleUnload(module)' failed with 'CUDA_ERROR_INVALID_HANDLE' Any idea?