@@ -264,20 +264,47 @@ if get_option('USE_SYCL')
264264 mlink_args = [' -fsycl' ]
265265 has_backends = true
266266 message (' Building SYCL' )
267- add_project_arguments (' -O3' , language : ' cpp' )
268- add_project_arguments (' -fsycl' , language : ' cpp' )
269- add_project_arguments (' -ffast-math' , language : ' cpp' )
270- add_project_arguments (' -fsycl-unnamed-lambda' , language : ' cpp' )
271- add_project_arguments (' -Wall' , language : ' cpp' )
272- add_project_arguments (' -Wextra' , language : ' cpp' )
273267
274268 files += ' src/neural/sycl/layers.cc.dp.cpp'
275269 files += ' src/neural/sycl/network_sycl.cc.dp.cpp'
276270 files += ' src/neural/sycl/common_kernels.dp.cpp'
277271
272+
273+ DEF_INTEL_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
274+ DEF_INTEL_WL_CXX_FLAGS = [' -DDEFAULT_MINIBATCH_SIZE=248' , ' -DMKL_ILP64' ]
275+ DEF_AMD_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
276+ DEF_AMD_WL_CXX_FLAGS = [' -DUSE_HIPBLAS' , ' -DINLINE' , ' -D__HIP_PLATFORM_AMD__' ]
277+ DEF_NVIDIA_GENERAL_CXX_FLAGS = [' -O3' ,' -fsycl' ,' -ffast-math' ,' -fsycl-unnamed-lambda' ,' -Wall' , ' -Wextra' ]
278+ DEF_NVIDIA_WL_CXX_FLAGS= [' -DUSE_CUBLAS' , ' -DINLINE' , ' -DNVIDIABE' ]
279+
280+
281+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
282+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
283+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
284+ message (' Using DEFAULT compilation flags' )
285+ INTEL_GPU_CXX_FLAGS = DEF_INTEL_GENERAL_CXX_FLAGS + DEF_INTEL_WL_CXX_FLAGS
286+ NVIDIA_GPU_CXX_FLAGS = DEF_NVIDIA_GENERAL_CXX_FLAGS + DEF_NVIDIA_WL_CXX_FLAGS
287+ AMD_GPU_CXX_FLAGS = DEF_AMD_GENERAL_CXX_FLAGS + DEF_AMD_WL_CXX_FLAGS
288+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
289+ message (' OVERRIDING GENERAL compilation flags' )
290+ INTEL_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_INTEL_WL_CXX_FLAGS
291+ NVIDIA_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_NVIDIA_WL_CXX_FLAGS
292+ AMD_GPU_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_AMD_WL_CXX_FLAGS
293+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
294+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
295+ INTEL_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
296+ NVIDIA_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
297+ AMD_GPU_CXX_FLAGS = get_option (' CMAKE_CXX_FLAGS' )
298+ endif
299+
300+ INTEL_GPU_CXX_FLAGS += [get_option (' GPU_AOT' )]
301+ NVIDIA_GPU_CXX_FLAGS += [' -fsycl-targets=nvidia_gpu_sm_' + get_option (' USE_SM' )]
302+ AMD_GPU_CXX_FLAGS += [' -fsycl-targets=amd_gpu_gfx' + get_option (' USE_SM' )]
303+
304+
278305 if (get_option (' USE_L0_BACKEND' ) == true )
279306 message (' Building SYCL for the L0 backend' )
280- add_project_arguments (' -DMKL_ILP64 ' , language : ' cpp' )
307+ add_project_arguments (INTEL_GPU_CXX_FLAGS , language : ' cpp' )
281308 deps += cc.find_library (' sycl' , required : true )
282309 deps += cc.find_library (' mkl_sycl' , required : true )
283310 deps += cc.find_library (' mkl_intel_ilp64' , required : true )
@@ -286,39 +313,48 @@ if get_option('USE_SYCL')
286313 deps += cc.find_library (' OpenCL' , required : true )
287314 deps += cc.find_library (' dl' , required : true )
288315 deps += cc.find_library (' m' , required : true )
289- add_project_arguments (' -DDEFAULT_MINIBATCH_SIZE=248' , language : ' cpp' )
290- add_project_arguments (get_option (' GPU_AOT' ), language : ' cpp' )
291- mlink_args += get_option (' GPU_AOT' )
316+ mlink_args += INTEL_GPU_CXX_FLAGS
292317 elif (get_option (' USE_AMD_BACKEND' ) == true )
293318 message (' Building SYCL for AMD backend' )
294- sm_level = ' amd_gpu_' + get_option (' USE_SM' )
295- add_project_arguments (' -fsycl-targets=' + sm_level , language : ' cpp' )
296- add_project_arguments (' -DUSE_HIPBLAS' , language : ' cpp' )
297- add_project_arguments (' -D__HIP_PLATFORM_AMD__' , language : ' cpp' )
298- add_project_arguments (' -DINLINE' , language : ' cpp' )
319+ add_project_arguments (AMD_GPU_CXX_FLAGS, language : ' cpp' )
299320 hip_blas = cc.find_library (' hipblas' , required : true )
300321 hip_dart = cc.find_library (' amdhip64' , required : true )
301322 deps += [hip_blas, hip_dart]
302323 deps += cc.find_library (' sycl' , required : true )
303- mlink_args+= [ ' -fsycl ' , ' -fsycl-targets= ' + sm_level]
324+ mlink_args+= AMD_GPU_CXX_FLAGS
304325 else
305- sm_level = ' nvidia_gpu_sm_' + get_option (' USE_SM' )
306326 message (' Building SYCL for the NVIDIA backend' )
307- add_project_arguments (' -fsycl-targets=' + sm_level, language : ' cpp' )
308- add_project_arguments (' -DUSE_CUBLAS' , language : ' cpp' )
309- add_project_arguments (' -DINLINE' , language : ' cpp' )
310- add_project_arguments (' -DNVIDIABE' , language : ' cpp' )
327+ add_project_arguments (NVIDIA_GPU_CXX_FLAGS, language : ' cpp' )
311328 cu_blas = cc.find_library (' cublas' , required : true )
312329 cu_dart = cc.find_library (' cudart' , required : true )
313- deps += [cu_blas, cu_dart]
330+ cu_da = cc.find_library (' cuda' , required : true )
331+ deps += [cu_blas, cu_dart, cu_da]
314332 deps += cc.find_library (' sycl' , required : true )
315333 deps += cc.find_library (' pthread' , required : true )
316- mlink_args+= [ ' -fsycl ' , ' -fsycl-targets= ' + sm_level]
334+ mlink_args+= NVIDIA_GPU_CXX_FLAGS
317335 endif
318-
319- #message('Using link arguements ' + mlink_args)
320- executable ( ' lc0_sycl ' , ' src/main.cc ' , files, include_directories : includes, dependencies : deps, install : true , link_args : mlink_args)
336+
337+ executable ( ' lc0_sycl ' , ' src/main.cc ' , files, include_directories : includes, dependencies : deps, install : true , link_args : mlink_args)
338+
321339elif get_option (' USE_CUDA' )
340+
341+ DEF_WL_CXX_FLAGS = [' -Xcompiler' , ' -fPIC' ]
342+ DEF_GENERAL_CXX_FLAGS = [' -O2' ]
343+ DEF_COMBINED_CXX_FLAGS = DEF_WL_CXX_FLAGS + DEF_GENERAL_CXX_FLAGS
344+
345+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
346+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
347+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
348+ message (' Using DEFAULT compilation flags' )
349+ CMAKE_CXX_FLAGS = DEF_COMBINED_CXX_FLAGS
350+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
351+ message (' OVERRIDING GENERAL compilation flags' )
352+ CMAKE_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_WL_CXX_FLAGS
353+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
354+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
355+ endif
356+
357+
322358 cudnn_libdirs = get_option (' cudnn_libdirs' )
323359 cu_blas = cc.find_library (' cublas' , dirs : cudnn_libdirs, required : false )
324360 cu_dnn = cc.find_library (' cudnn' , dirs : cudnn_libdirs, required : false )
@@ -356,7 +392,8 @@ elif get_option('USE_CUDA')
356392 cuda_arguments += [' -Xcompiler' , ' -MD' ]
357393 endif
358394 else
359- cuda_arguments += [' --std=c++14' , ' -Xcompiler' , ' -fPIC' ]
395+ cuda_arguments += CMAKE_CXX_FLAGS
396+ #cuda_arguments += ['--std=c++14', '-Xcompiler', '-fPIC']
360397 endif
361398 if get_option (' nvcc_ccbin' ) != ''
362399 cuda_arguments += [' -ccbin=' + get_option (' nvcc_ccbin' )]
@@ -374,6 +411,7 @@ elif get_option('USE_CUDA')
374411 else
375412 outputname = ' @BASENAME@.o'
376413 endif
414+ nvcc_extra_args += get_option (' CUDA_NVCC_FLAGS' )
377415 files += cuda_files
378416 files += custom_target (' cuda fp32 code' ,
379417 input : ' src/neural/cuda/common_kernels.cu' ,
@@ -383,18 +421,19 @@ elif get_option('USE_CUDA')
383421 )
384422
385423 # Handling of fp16 cuda code.
386- nvcc_arch = ' -arch=compute_' + get_option (' USE_SM' )
424+ # nvcc_arch = '-arch=compute_' + get_option('USE_SM')
387425 nvcc_sm_list = [' sm_' + get_option (' USE_SM' )]
388426 # Ignore the given CC for fp16 when it is not in the supported list.
389427 if cuda_cc == '' or not nvcc_sm_list.contains(' sm_' + cuda_cc)
390- nvcc_extra_args = [nvcc_arch ]
428+ nvcc_extra_args = []
391429 nvcc_help = run_command (nvcc, ' -h' ).stdout()
392430 foreach x : nvcc_sm_list
393431 if nvcc_help.contains(x)
394- nvcc_extra_args += ' -code =' + x
432+ nvcc_extra_args += ' -arch =' + x
395433 endif
396434 endforeach
397435 endif
436+ nvcc_extra_args += get_option (' CUDA_NVCC_FLAGS' )
398437 files += custom_target (' cuda fp16 code' ,
399438 input : ' src/neural/cuda/fp16_kernels.cu' ,
400439 output : outputname,
@@ -411,8 +450,23 @@ elif get_option('USE_AMD')
411450 files += ' src/neural/amd/network_amd.cpp'
412451 files += ' src/neural/amd/common_kernels.cpp'
413452
414- add_project_arguments (' -D__HIP_PLATFORM_AMD__' , language : ' cpp' )
415- add_project_arguments (' -O3' , language : ' cpp' )
453+ DEF_WL_CXX_FLAGS = [' -D__HIP_PLATFORM_AMD__' ]
454+ DEF_GENERAL_CXX_FLAGS = [' -O3' ]
455+ DEF_COMBINED_CXX_FLAGS = DEF_WL_CXX_FLAGS + DEF_GENERAL_CXX_FLAGS
456+
457+ if (get_option (' CMAKE_CXX_FLAGS' ) != [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
458+ message (' Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together' )
459+ elif (get_option (' CMAKE_CXX_FLAGS' )== [] and get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) == [])
460+ message (' Using DEFAULT compilation flags' )
461+ CMAKE_CXX_FLAGS = DEF_COMBINED_CXX_FLAGS
462+ elif (get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) != [])
463+ message (' OVERRIDING GENERAL compilation flags' )
464+ CMAKE_CXX_FLAGS = get_option (' OVERRIDE_GENERAL_CXX_FLAGS' ) + DEF_WL_CXX_FLAGS
465+ elif (get_option (' CMAKE_CXX_FLAGS' ) != [])
466+ message (' OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags' )
467+ endif
468+
469+ add_project_arguments (CMAKE_CXX_FLAGS, language : ' cpp' )
416470
417471 hip_blas_lib = cc.find_library (' hipblas' , required : true )
418472 hip_blas_runtime = cc.find_library (' hipblas' , required : true )
427481endif
428482
429483
484+
430485
431486
0 commit comments