Hi all
After a week of frustration I have the following question.
I am making a static library of my cuda raytracer which I then want to link with my programs. It looks like I can link the static library but I’m not able to use printf() statements anymore.
My library is build from several files:
GPURaytracer.h
#ifndef __GPURAYTRACER_H #define __GPURAYTRACER_H #include "raytracer_kernel.h" #include <vector> #include "Dose/Photon/cube/cube.h" #include "Dose/Photon/cube/cubeinfo.h" #include "Dose/Photon/cube/cubecalc.h" #include "CUDA_SDK/cutil.h" #include "cuda.h" #include "cuda_runtime.h" #include "constants.h" #include <iostream> #include <cmath> template <class VType> class GPURaytracer { public: GPURaytracer(const float x1, const float y1, const float z1, const std::vector<float> & x_plane, const std::vector<float> & y_plane, const std::vector<float> & z_plane, const short int dimx, const short int dimy, const short int dimz, const float dx, const float dy, const float dz, const VType *sData); ~GPURaytracer(); void DoTrace(); void RadiologicalDepth(float *radiologicalDepth_); void offSet(int offX, int offY); void blockSize(int blockX, int blockY); private: // Memory size of the array holding the RD. unsigned int memSize_; // Array holding radiologicalDepth. float* d_radiologicalDepth_; // The CT dataset. VType *d_CT_dataset_; // Grid dimension in the y-direction (?). int gridX_; short int dimx_, dimy_, dimz_; unsigned int blockSize_x_, blockSize_y_, offset_x_, offset_y_; }; template <class VType> GPURaytracer<VType>::GPURaytracer(const float x1, const float y1, const float z1, const std::vector<float> & x_plane, const std::vector<float> & y_plane, const std::vector<float> & z_plane, const short int dimx, const short int dimy, const short int dimz, const float dx, const float dy, const float dz, const VType *sData) { some cudaMemcpyToSymbols... } //destructor template <class VType> GPURaytracer<VType>::~GPURaytracer() { std::cout << "Destructor called." << std::endl; } template <class VType> void GPURaytracer<VType>::RadiologicalDepth(float* h_radiologicalDepth){ std::cout << "Returning radiological depth..." << std::endl; CUDA_SAFE_CALL(cudaMemcpy(h_radiologicalDepth, d_radiologicalDepth_, memSize_, cudaMemcpyDeviceToHost) ); } template <class VType> void GPURaytracer<VType>::offSet(int offX, int offY) { offset_x_ = offX; offset_y_ = offY; } template <class VType> void GPURaytracer<VType>::blockSize(int blockX, int blockY) { blockSize_x_ = blockX; blockSize_y_ = blockY; } template <class VType> void GPURaytracer<VType>::DoTrace() { //XXX TODO //Implementatie grid met generieke afmetingen != veelvoud 16 // std::cout << "Raytracing..." << std::endl; int a = floor(dimx_ / blockSize_x_); int b = floor(dimy_ / blockSize_y_); int blockSize_x_temp = blockSize_x_; std::cout << "a: " << a << "\t" << "b: " << b << std::endl; if(a > 0 && b > 0) { printf("line 133\n"); dim3 dimBlock(blockSize_x_, blockSize_y_); dim3 dimGrid(a*dimz_, b); gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_, (dimGrid.x / dimz_), blockSize_x_, blockSize_y_, offset_x_, offset_y_, dimBlock, dimGrid); blockSize_x_ = dimx_ - blockSize_x_ * a; offset_x_ = blockSize_x_ * a; if(blockSize_x_ > 0) { dim3 dimBlock(blockSize_x_, blockSize_y_); dim3 dimGrid(1*dimz_, b); gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_, (dimGrid.x / dimz_), blockSize_x_, blockSize_y_, offset_x_, offset_y_, dimBlock, dimGrid); } blockSize_y_ = dimy_ - blockSize_y_ * b; offset_y_ = blockSize_y_ * b; if(blockSize_x_ > 0 && blockSize_y_ > 0) { dim3 dimBlock(blockSize_x_, blockSize_y_); dim3 dimGrid(1*dimz_, 1); gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_, (dimGrid.x / dimz_), blockSize_x_, blockSize_y_, offset_x_, offset_y_, dimBlock, dimGrid); } offset_x_ = 0; blockSize_x_ = blockSize_x_temp; if(blockSize_y_ > 0) { dim3 dimBlock(blockSize_x_, blockSize_y_); dim3 dimGrid(a*dimz_, 1); gpu_raytracer(d_radiologicalDepth_, d_CT_dataset_, (dimGrid.x / dimz_), blockSize_x_, blockSize_y_, offset_x_, offset_y_, dimBlock, dimGrid); } } } #endif this calls raytracer_kernel.h
#ifndef __RAYTRACER_KERNEL__ #define __RAYTRACER_KERNEL__ //#ifdef __cplusplus #include "cuda.h" #include "cuda_runtime.h" void gpu_raytracer(float *radiologicalPath, short int *rho, int gridX, unsigned int blockSize_x, unsigned int blockSize_y, unsigned int offset_x, unsigned int offset_y, dim3 dimBlock, dim3 dimGrid); #endif raytracer_kernel.cu
//calculate for all voxels the delta x, y and z #include <stdio.h> #include <cstdio> #include <cassert> #include "constants.h" #include "raytracer_kernel.h" #include "compute_RD_shell.cu" __global__ void raytracer_kernel(float *radiologicalPath, short int *rho, int gridX, unsigned int blockSize_x, unsigned int blockSize_y, unsigned int offset_x, unsigned int offset_y) { Some fancy stuff happens here... and also the printf()s } void gpu_raytracer(float *radiologicalPath, short int *rho, int gridX, unsigned int blockSize_x, unsigned int blockSize_y, unsigned int offset_x, unsigned int offset_y, dim3 dimBlock, dim3 dimGrid) { //printf("Calling raytracer kernel...\n"); raytracer_kernel<<<dimGrid,dimBlock>>>(radiologicalPath, rho, gridX, blockSize_x, blockSize_y, offset_x, offset_y); } And the raytracer_kernel.cu calls a function in compute_RD_shell.cu which calls some functions in computeRD.cu
If I make these files using the nvcc -c -deviceemu mode
and after that combine all the object files into an .a file with ar
it all looks like it is going fine.
but when I then make my TestRayTracer.C file and link the good libraries I’m not getting any errors but its also not showing any printf()s
I hope some1 can comment on this strange behavior.
Thanks,
Jordy