typedef CUresult CUDAAPI tcuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
typedef CUresult CUDAAPI tcuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
typedef CUresult CUDAAPI tcuMemFree_v2(CUdeviceptr dptr);
+typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t bytesize);
+typedef CUresult CUDAAPI tcuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t bytesize, CUstream hStream);
typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy);
typedef CUresult CUDAAPI tcuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pcopy, CUstream hStream);
typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr);
tcuMemAllocPitch_v2 *cuMemAllocPitch;
tcuMemsetD8Async *cuMemsetD8Async;
tcuMemFree_v2 *cuMemFree;
+ tcuMemcpy *cuMemcpy;
+ tcuMemcpyAsync *cuMemcpyAsync;
tcuMemcpy2D_v2 *cuMemcpy2D;
tcuMemcpy2DAsync_v2 *cuMemcpy2DAsync;
tcuGetErrorName *cuGetErrorName;
LOAD_SYMBOL(cuMemAllocPitch, tcuMemAllocPitch_v2, "cuMemAllocPitch_v2");
LOAD_SYMBOL(cuMemsetD8Async, tcuMemsetD8Async, "cuMemsetD8Async");
LOAD_SYMBOL(cuMemFree, tcuMemFree_v2, "cuMemFree_v2");
+ LOAD_SYMBOL(cuMemcpy, tcuMemcpy, "cuMemcpy");
+ LOAD_SYMBOL(cuMemcpyAsync, tcuMemcpyAsync, "cuMemcpyAsync");
LOAD_SYMBOL(cuMemcpy2D, tcuMemcpy2D_v2, "cuMemcpy2D_v2");
LOAD_SYMBOL(cuMemcpy2DAsync, tcuMemcpy2DAsync_v2, "cuMemcpy2DAsync_v2");
LOAD_SYMBOL(cuGetErrorName, tcuGetErrorName, "cuGetErrorName");