@@ -68,13 +68,15 @@ event __SYCL_EXPORT submit_kernel_direct_with_event_impl(
6868 const queue &Queue, const nd_range<Dims> &Range,
6969 detail::HostKernelRefBase &HostKernel,
7070 detail::DeviceKernelInfo *DeviceKernelInfo,
71+ const detail::KernelPropertyHolderStructTy &Props,
7172 const detail::code_location &CodeLoc, bool IsTopCodeLoc);
7273
7374template <int Dims>
7475void __SYCL_EXPORT submit_kernel_direct_without_event_impl (
7576 const queue &Queue, const nd_range<Dims> &Range,
7677 detail::HostKernelRefBase &HostKernel,
7778 detail::DeviceKernelInfo *DeviceKernelInfo,
79+ const detail::KernelPropertyHolderStructTy &Props,
7880 const detail::code_location &CodeLoc, bool IsTopCodeLoc);
7981
8082namespace detail {
@@ -159,16 +161,14 @@ class __SYCL_EXPORT SubmissionInfo {
159161
160162template <detail::WrapAs WrapAs, typename LambdaArgType,
161163 typename KernelName = detail::auto_name, bool EventNeeded = false ,
162- typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
164+ typename PropertiesT = ext::oneapi::experimental::empty_properties_t ,
165+ typename KernelTypeUniversalRef, int Dims>
163166auto submit_kernel_direct (
164- const queue &Queue, [[maybe_unused]] PropertiesT Props,
165- const nd_range<Dims> &Range, KernelTypeUniversalRef &&KernelFunc,
167+ const queue &Queue, const nd_range<Dims> &Range,
168+ KernelTypeUniversalRef &&KernelFunc,
169+ const PropertiesT &ExtraProps =
170+ ext::oneapi::experimental::empty_properties_t {},
166171 const detail::code_location &CodeLoc = detail::code_location::current()) {
167- // TODO Properties not supported yet
168- static_assert (
169- std::is_same_v<PropertiesT,
170- ext::oneapi::experimental::empty_properties_t >,
171- " Setting properties not supported yet for no-CGH kernel submit." );
172172 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
173173
174174 using KernelType =
@@ -210,22 +210,42 @@ auto submit_kernel_direct(
210210 " -fsycl-host-compiler-options='/std:c++latest' "
211211 " might also help." );
212212
213+ detail::KernelPropertyHolderStructTy ParsedProperties;
214+ if constexpr (ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
215+ const KernelType &>::value) {
216+ // Merge properties via get() and manually specified properties.
217+ // get() method is used for specifying kernel properties but properties
218+ // passed via launch_config (ExtraProps) should be kernel launch properties.
219+ // They are mutually exclusive, so there should not be any conflict when
220+ // merging properties. merge_properties() throws if there's a conflict.
221+ auto MergedProps =
222+ sycl::ext::oneapi::experimental::detail::merge_properties (
223+ ExtraProps,
224+ KernelFunc.get (ext::oneapi::experimental::properties_tag{}));
225+
226+ ParsedProperties = extractKernelProperties (MergedProps);
227+ } else {
228+ ParsedProperties = extractKernelProperties (ExtraProps);
229+ }
230+
213231 if constexpr (EventNeeded) {
214232 return submit_kernel_direct_with_event_impl (
215- Queue, Range, HostKernel, DeviceKernelInfoPtr,
233+ Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
216234 TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
217235 } else {
218236 submit_kernel_direct_without_event_impl (
219- Queue, Range, HostKernel, DeviceKernelInfoPtr,
237+ Queue, Range, HostKernel, DeviceKernelInfoPtr, ParsedProperties,
220238 TlsCodeLocCapture.query (), TlsCodeLocCapture.isToplevel ());
221239 }
222240}
223241
224242template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
225- typename PropertiesT, typename KernelTypeUniversalRef, int Dims>
243+ typename PropertiesT = ext::oneapi::experimental::empty_properties_t ,
244+ typename KernelTypeUniversalRef, int Dims>
226245auto submit_kernel_direct_parallel_for (
227- const queue &Queue, PropertiesT Props, const nd_range<Dims> &Range,
246+ const queue &Queue, const nd_range<Dims> &Range,
228247 KernelTypeUniversalRef &&KernelFunc,
248+ const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t {},
229249 const detail::code_location &CodeLoc = detail::code_location::current()) {
230250
231251 using KernelType =
@@ -246,21 +266,23 @@ auto submit_kernel_direct_parallel_for(
246266 return submit_kernel_direct<detail::WrapAs::parallel_for, TransformedArgType,
247267 KernelName, EventNeeded, PropertiesT,
248268 KernelTypeUniversalRef, Dims>(
249- Queue, Props, Range, std::forward<KernelTypeUniversalRef>(KernelFunc),
269+ Queue, Range, std::forward<KernelTypeUniversalRef>(KernelFunc), Props ,
250270 CodeLoc);
251271}
252272
253273template <typename KernelName = detail::auto_name, bool EventNeeded = false ,
254- typename PropertiesT, typename KernelTypeUniversalRef>
274+ typename PropertiesT = ext::oneapi::experimental::empty_properties_t ,
275+ typename KernelTypeUniversalRef>
255276auto submit_kernel_direct_single_task (
256- const queue &Queue, PropertiesT Props, KernelTypeUniversalRef &&KernelFunc,
277+ const queue &Queue, KernelTypeUniversalRef &&KernelFunc,
278+ const PropertiesT &Props = ext::oneapi::experimental::empty_properties_t {},
257279 const detail::code_location &CodeLoc = detail::code_location::current()) {
258280
259281 return submit_kernel_direct<detail::WrapAs::single_task, void , KernelName,
260282 EventNeeded, PropertiesT, KernelTypeUniversalRef,
261283 1 >(
262- Queue, Props, nd_range<1 >{1 , 1 },
263- std::forward<KernelTypeUniversalRef>(KernelFunc), CodeLoc);
284+ Queue, nd_range<1 >{1 , 1 },
285+ std::forward<KernelTypeUniversalRef>(KernelFunc), Props, CodeLoc);
264286}
265287
266288} // namespace detail
@@ -2775,18 +2797,12 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
27752797
27762798 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
27772799
2778- // TODO The handler-less path does not support kernel
2779- // function properties and kernel functions with the kernel_handler
2780- // type argument yet.
2781- if constexpr (
2782- std::is_same_v<PropertiesT,
2783- ext::oneapi::experimental::empty_properties_t > &&
2784- !(ext::oneapi::experimental::detail::HasKernelPropertiesGetMethod<
2785- const KernelType &>::value) &&
2786- !(detail::KernelLambdaHasKernelHandlerArgT<KernelType, void >::value)) {
2800+ // TODO The handler-less path does not support kernel functions
2801+ // with the kernel_handler type argument yet.
2802+ if constexpr (!(detail::KernelLambdaHasKernelHandlerArgT<KernelType,
2803+ void >::value)) {
27872804 return detail::submit_kernel_direct_single_task<KernelName, true >(
2788- *this , ext::oneapi::experimental::empty_properties_t {}, KernelFunc,
2789- TlsCodeLocCapture.query ());
2805+ *this , KernelFunc, Properties, TlsCodeLocCapture.query ());
27902806 } else {
27912807 return submit (
27922808 [&](handler &CGH) {
@@ -3323,11 +3339,22 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
33233339 RestT &&...Rest) {
33243340 constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
33253341 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
3326- return submit (
3327- [&](handler &CGH) {
3328- CGH.template parallel_for <KernelName>(Range, Properties, Rest...);
3329- },
3330- TlsCodeLocCapture.query ());
3342+ using KernelType = std::tuple_element_t <0 , std::tuple<RestT...>>;
3343+
3344+ // TODO The handler-less path does not support reductions, and
3345+ // kernel functions with the kernel_handler type argument yet.
3346+ if constexpr (sizeof ...(RestT) == 1 &&
3347+ !(detail::KernelLambdaHasKernelHandlerArgT<
3348+ KernelType, sycl::nd_item<Dims>>::value)) {
3349+
3350+ return detail::submit_kernel_direct_parallel_for<KernelName, true >(
3351+ *this , Range, Rest..., Properties, TlsCodeLocCapture.query ());
3352+ } else
3353+ return submit (
3354+ [&](handler &CGH) {
3355+ CGH.template parallel_for <KernelName>(Range, Properties, Rest...);
3356+ },
3357+ TlsCodeLocCapture.query ());
33313358 }
33323359
33333360 // / parallel_for version with a kernel represented as a lambda + nd_range that
@@ -3344,18 +3371,15 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
33443371 detail::tls_code_loc_t TlsCodeLocCapture (CodeLoc);
33453372 using KernelType = std::tuple_element_t <0 , std::tuple<RestT...>>;
33463373
3347- // TODO The handler-less path does not support reductions, kernel
3348- // function properties and kernel functions with the kernel_handler
3349- // type argument yet.
3374+ // TODO The handler-less path does not support reductions, and
3375+ // kernel functions with the kernel_handler type argument yet.
33503376 if constexpr (sizeof ...(RestT) == 1 &&
3351- !(ext::oneapi::experimental::detail::
3352- HasKernelPropertiesGetMethod<
3353- const KernelType &>::value) &&
33543377 !(detail::KernelLambdaHasKernelHandlerArgT<
33553378 KernelType, sycl::nd_item<Dims>>::value)) {
33563379 return detail::submit_kernel_direct_parallel_for<KernelName, true >(
3357- *this , ext::oneapi::experimental::empty_properties_t {}, Range,
3358- Rest..., TlsCodeLocCapture.query ());
3380+ *this , Range, Rest...,
3381+ ext::oneapi::experimental::empty_properties_t {},
3382+ TlsCodeLocCapture.query ());
33593383 } else {
33603384 return submit (
33613385 [&](handler &CGH) {
0 commit comments