#define _num_steps 1000000000 #define _divisor 40000 #define _step 1.0 / _num_steps #define _intrnCnt _num_steps / _divisor /+------------------------------------------------------------------+ //| | /+------------------------------------------------------------------+ string D2S(double arg, int digits) { return DoubleToString(arg, digits); } string I2S(int arg) { return IntegerToString(arg); } //--- OpenCL 프로그램 코드 const string clSource= "#define _step "+D2S(_step, 12)+" \r\n" "#define _intrnCnt "+I2S(_intrnCnt)+" \r\n" " \r\n" "__kernel void Pi( __global double *out ) \r\n" "{ \r\n" " int i = get_global_id( 0 ); \r\n" " double partsum = 0.0; \r\n" " double x = 0.0; \r\n" " long from = i * _intrnCnt; \r\n" " long to = from + _intrnCnt; \r\n" " for( long j = from; j < to; j ++ ) \r\n" " { \r\n" " x = ( j + 0.5 ) * _step; \r\n" " partsum += 4.0 / ( 1. + x * x ); \r\n" " } \r\n" " out[ i ] = partsum; \r\n" "} \r\n"; /+------------------------------------------------------------------+ //| 프로그램 시작 함수 스크립트 | /+------------------------------------------------------------------+ int OnStart() { Print("Pi Calculation: step = "+D2S(_step, 12)+"; _intrnCnt = "+I2S(_intrnCnt)); //--- OpenCL 컨텍스트 준비 int clCtx; if((clCtx=CLContextCreate(CL_USE_GPU_ONLY))==INVALID_HANDLE) { Print("OpenCL not found"); return(-1); } int clPrg = CLProgramCreate(clCtx, clSource); int clKrn = CLKernelCreate(clPrg, "Pi"); int clMem=CLBufferCreate(clCtx, _divisor*sizeof(double), CL_MEM_READ_WRITE); CLSetKernelArgMem(clKrn, 0, clMem); const uint offs[1] = {0}; const uint works[1] = {_divisor}; //--- OpenCL 프로그램 실행 ulong start=GetMicrosecondCount(); if(!CLExecute(clKrn, 1, offs, works)) { Print("CLExecute(clKrn, 1, offs, works) failed! Error ", GetLastError()); CLFreeAll(clMem, clKrn, clPrg, clCtx); return(-1); } //--- OpenCL 기기로부터 결과 vector buffer(_divisor); if(!CLBufferRead(clMem, 0, buffer)) { Print("CLBufferRead(clMem, 0, buffer) failed! Error ", GetLastError()); CLFreeAll(clMem, clKrn, clPrg, clCtx); return(-1); } //--- Pi를 계산하기 위해 모든 값을 더함 double Pi=buffer.Sum()*_step; double time=(GetMicrosecondCount()-start)/1000.; Print("OpenCL: Pi calculated for "+D2S(time, 2)+" ms"); Print("Pi = "+DoubleToString(Pi, 12)); //--- 메모리 비우기 CLFreeAll(clMem, clKrn, clPrg, clCtx); //--- 성공 return(0); } /* Pi Calculation: step = 0.000000001000; _intrnCnt = 25000 OpenCL: GPU device 'Ellesmere' selected OpenCL: Pi calculated for 99.98 ms Pi = 3.141592653590 */ /+------------------------------------------------------------------+ //| 메모리를 확보하는 보조 루틴 | /+------------------------------------------------------------------+ void CLFreeAll(const int clMem, const int clKrn, const int clPrg, const int clCtx) { CLBufferFree(clMem); CLKernelFree(clKrn); CLProgramFree(clPrg); CLContextFree(clCtx); } |