@@ -18,7 +18,7 @@ function(kernel_declare TARGET_LIST)
1818 file (READ ${kernel_path} kernel_impl)
1919 # TODO(chenweihang): rename PT_REGISTER_CTX_KERNEL to PT_REGISTER_KERNEL
2020 # NOTE(chenweihang): now we don't recommend to use digit in kernel name
21- string (REGEX MATCH "(PT_REGISTER_CTX_KERNEL|PT_REGISTER_GENERAL_KERNEL)\\ ([ \t\r\n ]*[a-z_ ]*," first_registry "${kernel_impl} " )
21+ string (REGEX MATCH "(PT_REGISTER_CTX_KERNEL|PT_REGISTER_GENERAL_KERNEL)\\ ([ \t\r\n ]*[a-z0-9_ ]*," first_registry "${kernel_impl} " )
2222 if (NOT first_registry STREQUAL "" )
2323 # parse the first kernel name
2424 string (REPLACE "PT_REGISTER_CTX_KERNEL(" "" kernel_name "${first_registry} " )
@@ -33,8 +33,6 @@ function(kernel_declare TARGET_LIST)
3333 file (APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name} , GPU, ALL_LAYOUT);\n " )
3434 elseif (${kernel_path} MATCHES "./xpu\/ " )
3535 file (APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name} , XPU, ALL_LAYOUT);\n " )
36- elseif (${kernel_path} MATCHES "./npu\/ *" )
37- file (APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name} , NPU, ALL_LAYOUT);\n " )
3836 else ()
3937 # deal with device independent kernel, now we use CPU temporaary
4038 file (APPEND ${kernel_declare_file} "PT_DECLARE_KERNEL(${kernel_name} , CPU, ALL_LAYOUT);\n " )
@@ -48,7 +46,9 @@ function(kernel_library TARGET)
4846 set (cpu_srcs)
4947 set (gpu_srcs)
5048 set (xpu_srcs)
51- set (npu_srcs)
49+ # parse and save the deps kerenl targets
50+ set (all_srcs)
51+ set (kernel_deps)
5252
5353 set (oneValueArgs "" )
5454 set (multiValueArgs SRCS DEPS)
@@ -57,7 +57,6 @@ function(kernel_library TARGET)
5757
5858 list (LENGTH kernel_library_SRCS kernel_library_SRCS_len)
5959 # one kernel only match one impl file in each backend
60- # TODO(chenweihang): parse compile deps by include headers
6160 if (${kernel_library_SRCS_len} EQUAL 0)
6261 if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR} /${TARGET} .cc)
6362 list (APPEND common_srcs ${CMAKE_CURRENT_SOURCE_DIR} /${TARGET} .cc)
@@ -75,57 +74,68 @@ function(kernel_library TARGET)
7574 list (APPEND xpu_srcs ${CMAKE_CURRENT_SOURCE_DIR} /xpu/${TARGET} .cc)
7675 endif ()
7776 endif ()
78- if (WITH_ASCEND_CL)
79- if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR} /npu/${TARGET} .cc)
80- list (APPEND npu_srcs ${CMAKE_CURRENT_SOURCE_DIR} /npu/${TARGET} .cc)
81- endif ()
82- endif ()
8377 else ()
8478 # TODO(chenweihang): impl compile by source later
8579 endif ()
8680
81+ list (APPEND all_srcs ${CMAKE_CURRENT_SOURCE_DIR} /${TARGET} .h)
82+ list (APPEND all_srcs ${common_srcs} )
83+ list (APPEND all_srcs ${cpu_srcs} )
84+ list (APPEND all_srcs ${gpu_srcs} )
85+ list (APPEND all_srcs ${xpu_srcs} )
86+ foreach (src ${all_srcs} )
87+ file (READ ${src} target_content)
88+ string (REGEX MATCHALL "#include \" paddle\/ pten\/ kernels\/ [a-z0-9_]+_kernel.h\" " include_kernels ${target_content} )
89+ foreach (include_kernel ${include_kernels} )
90+ string (REGEX REPLACE "#include \" paddle\/ pten\/ kernels\/ " "" kernel_name ${include_kernel} )
91+ string (REGEX REPLACE ".h\" " "" kernel_name ${kernel_name} )
92+ list (APPEND kernel_deps ${kernel_name} )
93+ endforeach ()
94+ endforeach ()
95+ list (REMOVE_DUPLICATES kernel_deps)
96+ list (REMOVE_ITEM kernel_deps ${TARGET} )
97+
8798 list (LENGTH common_srcs common_srcs_len)
8899 list (LENGTH cpu_srcs cpu_srcs_len)
89100 list (LENGTH gpu_srcs gpu_srcs_len)
90101 list (LENGTH xpu_srcs xpu_srcs_len)
91- list (LENGTH npu_srcs npu_srcs_len)
92102
93103 if (${common_srcs_len} GREATER 0)
94104 # If the kernel has a device independent public implementation,
95105 # we will use this implementation and will not adopt the implementation
96106 # under specific devices
97107 if (WITH_GPU)
98- nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} )
108+ nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
99109 elseif (WITH_ROCM)
100- hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} )
110+ hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
101111 else ()
102- cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} )
112+ cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
103113 endif ()
104114 else ()
105115 # If the kernel has a header file declaration, but no corresponding
106116 # implementation can be found, this is not allowed
107117 if (${cpu_srcs_len} EQUAL 0 AND ${gpu_srcs_len} EQUAL 0 AND
108- ${xpu_srcs_len} EQUAL 0 AND ${npu_srcs_len} EQUAL 0 )
118+ ${xpu_srcs_len} EQUAL 0)
109119 message (FATAL_ERROR "Cannot find any implementation for ${TARGET} " )
110120 else ()
111121 if (WITH_GPU)
112122 if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
113- nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} )
123+ nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
114124 endif ()
115125 elseif (WITH_ROCM)
116126 if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0)
117- hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} )
127+ hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} )
118128 endif ()
119129 else ()
120- if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${npu_srcs_len} GREATER 0 )
121- cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} ${npu_srcs} DEPS ${kernel_library_DEPS } )
130+ if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
131+ cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps } )
122132 endif ()
123133 endif ()
124134 endif ()
125135 endif ()
126136
127- if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
128- ${xpu_srcs_len } GREATER 0 OR ${npu_srcs_len } GREATER 0)
137+ if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR
138+ ${gpu_srcs_len } GREATER 0 OR ${xpu_srcs_len } GREATER 0)
129139 # append target into PTEN_KERNELS property
130140 get_property (pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
131141 set (pten_kernels ${pten_kernels} ${TARGET} )
@@ -147,9 +157,6 @@ function(kernel_library TARGET)
147157 if (${xpu_srcs_len} GREATER 0)
148158 kernel_declare(${xpu_srcs} )
149159 endif ()
150- if (${npu_srcs_len} GREATER 0)
151- kernel_declare(${npu_srcs} )
152- endif ()
153160endfunction ()
154161
155162function (register_kernels)
0 commit comments