Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions cmake/external/xpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,22 @@ set(XPU_XPTI_LIB_NAME "libxpti.so")
set(XPU_XBLAS_LIB_NAME "libxpu_blas.so")
set(XPU_XFA_LIB_NAME "libxpu_flash_attention.so")
set(XPU_XPUDNN_LIB_NAME "libxpu_dnn.so")
set(XPU_XPUDNN_OMP_LIB_NAME "libomp.so")
set(XPU_FFT_LIB_NAME "libcufft.so")
# Avoid deprecated int32 apis:
add_compile_definitions(XPUAPI_NOT_INCLUDE_DEPRECATED)

if(NOT DEFINED XPU_XHPC_BASE_DATE)
set(XPU_XHPC_BASE_DATE "dev/20990602")
set(XPU_XHPC_BASE_DATE "dev/20250722")
endif()
set(XPU_XCCL_BASE_VERSION "3.0.2.7") # For XRE5
set(XPU_XCCL_BASE_VERSION "3.0.3.1") # For XRE5
if(NOT DEFINED XPU_XFT_BASE_VERSION)
set(XPU_XFT_BASE_VERSION "20250507/xpu3")
endif()

if(NOT DEFINED XPU_XRE_BASE_VERSION)
if(WITH_XPU_XRE5)
set(XPU_XRE_BASE_VERSION "5.0.21.19")
set(XPU_XRE_BASE_VERSION "5.0.21.26")
else()
set(XPU_XRE_BASE_VERSION "4.32.0.1")
endif()
Expand Down Expand Up @@ -183,6 +184,7 @@ set(XPU_CUDA_RT_LIB "${XPU_LIB_DIR}/${XPU_CUDA_RT_LIB_NAME}")
set(XPU_ML_LIB "${XPU_LIB_DIR}/${XPU_ML_LIB_NAME}")
set(XPU_XFA_LIB "${XPU_LIB_DIR}/${XPU_XFA_LIB_NAME}")
set(XPU_XPUDNN_LIB "${XPU_LIB_DIR}/${XPU_XPUDNN_LIB_NAME}")
set(XPU_XPUDNN_OMP_LIB "${XPU_LIB_DIR}/${XPU_XPUDNN_OMP_LIB_NAME}")

set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")

Expand Down Expand Up @@ -251,8 +253,9 @@ if(WITH_XPU_XRE5)
DOWNLOAD_COMMAND
bash ${CMAKE_SOURCE_DIR}/tools/xpu/pack_paddle_dependence.sh
${XPU_XRE_URL} ${XPU_XRE_DIR_NAME} ${XPU_XHPC_URL} ${XPU_XHPC_DIR_NAME}
${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} 1 && wget ${XPU_XFT_GET_DEPENCE_URL}
&& bash ${XFT_COMMAND} ${XPU_XFT_URL} ${XPU_XFT_DIR_NAME} && bash
${XPU_XCCL_URL} ${XPU_XCCL_DIR_NAME} 1 ${WITH_MKL}
"${CMAKE_SOURCE_DIR}/build" && wget ${XPU_XFT_GET_DEPENCE_URL} && bash
${XFT_COMMAND} ${XPU_XFT_URL} ${XPU_XFT_DIR_NAME} && bash
${CMAKE_SOURCE_DIR}/tools/xpu/get_xpti_dependence.sh ${XPU_XPTI_URL}
${XPU_XPTI_DIR_NAME} && bash
${CMAKE_SOURCE_DIR}/tools/xpu/get_xpufft_dependence.sh ${XPU_FFT_URL}
Expand All @@ -263,6 +266,7 @@ if(WITH_XPU_XRE5)
BUILD_BYPRODUCTS ${XPU_API_LIB}
BUILD_BYPRODUCTS ${XPU_XBLAS_LIB}
BUILD_BYPRODUCTS ${XPU_XPUDNN_LIB}
BUILD_BYPRODUCTS ${XPU_XPUDNN_OMP_LIB}
BUILD_BYPRODUCTS ${XPU_XFA_LIB}
BUILD_BYPRODUCTS ${XPU_RT_LIB}
BUILD_BYPRODUCTS ${XPU_CUDA_RT_LIB}
Expand Down Expand Up @@ -360,6 +364,10 @@ if(WITH_XPU_XRE5)
${XPU_XFA_LIB}
${XPU_XPUDNN_LIB}
${XPU_ML_LIB})

if(NOT WITH_MKL)
target_link_libraries(xpulib ${XPU_XPUDNN_OMP_LIB})
endif()
else()
target_link_libraries(xpulib ${XPU_RT_LIB} ${XPU_API_LIB})
endif()
Expand Down
18 changes: 9 additions & 9 deletions paddle/phi/kernels/fusion/xpu/fast_where_xpu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ void FastWhereXPUKernel(const Context& dev_ctx,
#ifndef PADDLE_WITH_XPU_PLUGIN
LOG(INFO)
<< "Add -DWITH_XPU_PLUGIN=ON to build xpu::plugin::fast_where(), or use "
"xpu::select() instead, which leads low performance.";
int r = xpu::select<XPUType>(dev_ctx.x_context(),
condition_data,
x_data,
y_data,
out_data,
condition_dims,
x_dims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "select");
"xpu::where() instead, which leads low performance.";
int r = xpu::where<XPUType>(dev_ctx.x_context(),
condition_data,
x_data,
y_data,
out_data,
condition_dims,
x_dims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "where");
#else
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
if (condition_dims != x_dims) {
Expand Down
18 changes: 9 additions & 9 deletions paddle/phi/kernels/xpu/activation_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,21 +368,21 @@ struct XPUSiluGradFunctor : public funcs::BaseActivationFunctor<T> {

if (std::getenv("XPU_PADDLE_ACT_LUT") != nullptr) {
if (!std::is_same<T, ::phi::dtype::bfloat16>::value) {
// use fast_swish_grad if NOT bf16
int r = xpu::fast_swish_grad(
// use fast_silu_grad if NOT bf16
int r = xpu::fast_silu_grad(
dev_ctx.x_context(), x_data, y_grad, x_grad, dx->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_swish_grad");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_silu_grad");
} else {
// use plain swish_grad
int r = xpu::swish_grad(
// use plain silu_grad
int r = xpu::silu_grad(
dev_ctx.x_context(), x_data, y_grad, x_grad, dx->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "silu_grad");
}
} else {
// use plain swish_grad
int r = xpu::swish_grad(
// use plain silu_grad
int r = xpu::silu_grad(
dev_ctx.x_context(), x_data, y_grad, x_grad, dx->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish_grad");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "silu_grad");
}
}
};
Expand Down
27 changes: 14 additions & 13 deletions paddle/phi/kernels/xpu/activation_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,13 @@ struct XPURoundFunctor : public funcs::BaseActivationFunctor<T> {
const DenseTensor& x,
DenseTensor* out) const {
using XPUType = typename XPUTypeTrait<T>::Type;
int r = xpu::round<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel(),
decimals);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "round");
int r = xpu::paddle_round<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x.numel(),
decimals);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_round");
}
};

Expand Down Expand Up @@ -344,20 +345,20 @@ struct XPUSiluFunctor : public funcs::BaseActivationFunctor<T> {
if (std::getenv("XPU_PADDLE_ACT_LUT") != nullptr) {
if (!std::is_same<T, ::phi::dtype::bfloat16>::value) {
// use fast_swish if NOT bf16
int r = xpu::fast_swish(
int r = xpu::fast_silu(
xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_swish");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "fast_silu");
} else {
// use plain swish
int r = xpu::swish(
xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
int r =
xpu::silu(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "silu");
}
} else {
// use plain swish
int r =
xpu::swish(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "swish");
xpu::silu(xpu_context, x_data, y_data, x.numel(), nullptr, nullptr);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "silu");
}
}
};
Expand Down
22 changes: 11 additions & 11 deletions paddle/phi/kernels/xpu/add_n_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ void AddNKernel(const Context& dev_ctx,
} else if (ptrs.size() < x.size()) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* out_t = RAII_GUARD.alloc_l3_or_gm<XPUType>(out->numel());
int r = xpu::sum(dev_ctx.x_context(), ptrs, out_t, out->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum");
int r = xpu::add_n(dev_ctx.x_context(), ptrs, out_t, out->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add_n");

r = xpu::add(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(out->data<T>()),
Expand All @@ -86,12 +86,12 @@ void AddNKernel(const Context& dev_ctx,
out->numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add");
} else {
int r = xpu::sum(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->data<T>()),
out->numel());
int r = xpu::add_n(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->data<T>()),
out->numel());

PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "add_n");
}
}

Expand Down Expand Up @@ -149,10 +149,10 @@ void AddNArrayKernel(const Context& dev_ctx,

// int sum(Context* xpu_ctx, const std::vector<const T*>& x_list, T*
// y, int64_t len);
int r = xpu::sum(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->at(j).data<T>()),
out->at(j).numel());
int r = xpu::add_n(dev_ctx.x_context(),
ptrs,
reinterpret_cast<XPUType*>(out->at(j).data<T>()),
out->at(j).numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "sum");
}
}
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/xpu/c_softmax_with_cross_entropy_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,15 @@ void FixLossAccordingToIgnoreIndex(const phi::XPUContext& dev_ctx,
// int select(Context* xpu_ctx, const bool* condition, const T* x, const T* y,
// T* z, const std::vector<int64_t>& condition_shape, const
// std::vector<int64_t>& xshape);
ret = xpu::select(
ret = xpu::where(
dev_ctx.x_context(),
reinterpret_cast<const bool*>(bool_tensor_for_mask_label.data<bool>()),
reinterpret_cast<const XPUType*>(zeros_constant.data<T>()),
reinterpret_cast<const XPUType*>(loss->data<T>()),
reinterpret_cast<XPUType*>(loss->data<T>()),
common::vectorize(predicted_logits->dims()),
common::vectorize(predicted_logits->dims()));
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "select");
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "where");
}
template <typename T>
struct CSoftmaxWithCrossEntropyFunctor<phi::XPUContext, T> {
Expand Down
3 changes: 2 additions & 1 deletion paddle/phi/kernels/xpu/flash_attn_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ void FlashAttnKernelBase(
: 0, // flash_mask_head_num
nullptr, // flashmask_maxmin
is_flashmask ? flashmask_stream : nullptr, // side_stream
0 // fixlen_batch_num
0, // fixlen_batch_num
false // unpadded_lse
);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "mha_varlen_fwd");
if (is_flashmask && flashmask_stream != nullptr) {
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/xpu/index_select_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ void IndexSelectKernel(const Context& dev_ctx,
const int64_t* index_data =
index_ptr ? reinterpret_cast<const int64_t*>(index_ptr)
: index.template data<int64_t>();
r = xpu::paddle_gather<XPUType, int64_t>(
r = xpu::index_select<XPUType, int64_t>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(in_data),
reinterpret_cast<const int64_t*>(index_data),
Expand All @@ -84,7 +84,7 @@ void IndexSelectKernel(const Context& dev_ctx,
} else {
const int* index_data = index_ptr ? reinterpret_cast<const int*>(index_ptr)
: index.template data<int>();
r = xpu::paddle_gather<XPUType, int>(
r = xpu::index_select<XPUType, int>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(in_data),
reinterpret_cast<const int*>(index_data),
Expand All @@ -93,7 +93,7 @@ void IndexSelectKernel(const Context& dev_ctx,
index_len,
dim);
}
PADDLE_ENFORCE_XDNN_SUCCESS(r, "paddle_gather");
PADDLE_ENFORCE_XDNN_SUCCESS(r, "index_select");
}

} // namespace phi
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/xpu/reduce_max_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,14 @@ void ReduceMaxGradKernel(const Context& dev_ctx,
r = xpu::constant(
dev_ctx.x_context(), broadcast1, x.numel(), static_cast<XPUDataType>(0));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::select(dev_ctx.x_context(),
equal,
broadcast2,
broadcast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "select");
r = xpu::where(dev_ctx.x_context(),
equal,
broadcast2,
broadcast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "where");
}

} // namespace phi
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/xpu/reduce_min_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,14 @@ void ReduceMinGradKernel(const Context& dev_ctx,
// step 3. get x_grad
r = xpu::constant<T>(dev_ctx.x_context(), broadcast1, x.numel(), 0);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "constant");
r = xpu::select<T>(dev_ctx.x_context(),
equal,
broadcast2,
broadcast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "select");
r = xpu::where<T>(dev_ctx.x_context(),
equal,
broadcast2,
broadcast1,
x_grad_data,
xdims,
xdims);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "where");
}

} // namespace phi
Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/xpu/swiglu_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ void SwiGluGradKernel(const Context& dev_ctx,
}
int ret = xpu::swiglu_grad(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
y_ptr,
reinterpret_cast<const XPUType*>(dz_data),
reinterpret_cast<XPUType*>(dx_data),
dy_ptr,
dims_vec,
axis,
true,
y_ptr,
dy_ptr);
true);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "swiglu_grad");
}
} // namespace phi
Expand Down
7 changes: 2 additions & 5 deletions paddle/phi/kernels/xpu/swiglu_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ void SwiGluKernel(const Context& dev_ctx,
const auto& dims = x.dims();
int64_t axis = dims.size() - 1;
auto dims_vec = common::vectorize<int64_t>(dims);
const XPUTypefp32* const_nullptr = nullptr;
const XPUType* y_ptr = nullptr;

if (y) {
Expand All @@ -48,13 +47,11 @@ void SwiGluKernel(const Context& dev_ctx,
}
int ret = xpu::swiglu(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x_data),
y_ptr,
reinterpret_cast<XPUType*>(z_data),
dims_vec,
axis,
true,
const_nullptr,
nullptr,
y_ptr);
true);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "swiglu");
}
} // namespace phi
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/xpu/where_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ void WhereKernel(const Context& dev_ctx,
x_dims = std::vector<int64_t>({1});
}

int ret = xpu::select(dev_ctx.x_context(),
cond_data,
x_data,
y_data,
out_data,
cond_dims,
x_dims);
int ret = xpu::where(dev_ctx.x_context(),
cond_data,
x_data,
y_data,
out_data,
cond_dims,
x_dims);

PADDLE_ENFORCE_XDNN_SUCCESS(ret, "xpu::select");
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "where");
}

} // namespace phi
Expand Down
2 changes: 2 additions & 0 deletions python/env_dict.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ env_dict={
'XPU_XFA_LIB_NAME':'@XPU_XFA_LIB_NAME@',
'XPU_XPUDNN_LIB':'@XPU_XPUDNN_LIB@',
'XPU_XPUDNN_LIB_NAME':'@XPU_XPUDNN_LIB_NAME@',
'XPU_XPUDNN_OMP_LIB':'@XPU_XPUDNN_OMP_LIB@',
'XPU_XPUDNN_OMP_LIB_NAME':'@XPU_XPUDNN_OMP_LIB_NAME@',
'THIRD_PARTY_PATH':'@THIRD_PARTY_PATH@',
'SETUP_LOG_FILE':'@SETUP_LOG_FILE@',
'WITH_STRIP':'@WITH_STRIP@',
Expand Down
3 changes: 3 additions & 0 deletions python/setup.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -1243,6 +1243,9 @@ if '${WITH_XPU}' == 'ON':
package_data['paddle.libs'] += ['${XPU_XFA_LIB_NAME}']
shutil.copy('${XPU_XPUDNN_LIB}', libs_path)
package_data['paddle.libs'] += ['${XPU_XPUDNN_LIB_NAME}']
shutil.copy('${XPU_XPUDNN_OMP_LIB}', libs_path)
package_data['paddle.libs'] += ['${XPU_XPUDNN_OMP_LIB_NAME}']


if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path)
Expand Down
Loading