Hi Guys,
I am trying to code a cuda program to do a PDE solving. I need to first copy a 3D array from host memory to device memory.
I have the following code:
float *phi = new float[DimX*DimY*DimZ]; // some computations on phi ; extern "C" void ReinitializeCUDA(float *phi, const char *obj_cpu, const char *movobj_cpu, const char *source, bool init, int iterations, int I, int J, int K, float delta, float dtau, float eps, float limit, int dim[]){ // dim[0] = DimX, dim[1] = DimY, dim[2] = DimZ cudaPitchedPtr phi_gpu1; cudaExtent ca_extent = make_cudaExtent(dim[0]*sizeof(float), dim[1], dim[2]); cudaMalloc3D( &phi_gpu1, ca_extent); cudaMemset3D( phi_gpu1, 0, ca_extent); cudaMemcpy3DParms cpy_params = {0}; *****cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] ); cpy_params.dstPtr = phi_gpu1; cpy_params.extent = ca_extent; cpy_params.kind = cudaMemcpyHostToDevice; cudaMemcpy3D( &cpy_params ); } Later on, I will copy back the results obtained from a kernel to phi as below:
cudaMemcpy3DParms dhcpy_params = {0}; dhcpy_params.srcPtr = phi_gpu1; *****dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] ); dhcpy_params.extent = ca_extent; dhcpy_params.kind = cudaMemcpyDeviceToHost; cudaMemcpy3D( &dhcpy_params ); printf("cudaMemcpy3D: %s\n", cudaGetErrorString(cudaGetLastError())); My question is about the two lines of code beginning with ****. Shall I use
cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] ); // .... dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[1], dim[2] ); or
cpy_params.srcPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[0], dim[1] ); // .... dhcpy_params.dstPtr = make_cudaPitchedPtr( (void*)phi, dim[0] * sizeof(float), dim[0], dim[1] ); Or to put it in another way, how to properly copy a 3D array from host to device (pointed to by a cudaPitchedPtr).
Thanks.
merlin