Skip to content

Commit 0d3d805

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into transpose_conv1d
test=develop
2 parents 924eb5f + b6eb37f commit 0d3d805

File tree

202 files changed

+13421
-1251
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

202 files changed

+13421
-1251
lines changed

CMakeLists.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@ include(generic) # simplify cmake module
2828
# TODO(Shibo Tao): remove find_package(CUDA) completely.
2929
find_package(CUDA QUIET)
3030
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
31-
31+
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN" OFF)
32+
if (WITH_GPU AND WITH_XPU)
33+
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
34+
endif()
3235
# cmake 3.12, 3.13, 3.14 will append gcc link options to nvcc, and nvcc doesn't recognize them.
3336
if(WITH_GPU AND (${CMAKE_VERSION} VERSION_GREATER_EQUAL 3.12) AND (${CMAKE_VERSION} VERSION_LESS 3.15))
3437
message(FATAL_ERROR "cmake ${CMAKE_VERSION} is not supported when WITH_GPU=ON because of bug https://cmake.org/pipermail/cmake/2018-September/068195.html. "

cmake/configure.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@ if(WITH_BOX_PS)
6363
add_definitions(-DPADDLE_WITH_BOX_PS)
6464
endif()
6565

66+
if(WITH_XPU)
67+
message(STATUS "Compile with XPU!")
68+
add_definitions(-DPADDLE_WITH_XPU)
69+
endif()
70+
6671
if(WITH_GPU)
6772
add_definitions(-DPADDLE_WITH_CUDA)
6873
add_definitions(-DEIGEN_USE_GPU)

cmake/external/cub.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ include(ExternalProject)
1717
set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub)
1818
set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub)
1919
set(CUB_REPOSITORY https://github.com/NVlabs/cub.git)
20-
set(CUB_TAG 1.9.8)
20+
set(CUB_TAG 1.8.0)
2121

2222
cache_third_party(extern_cub
2323
REPOSITORY ${CUB_REPOSITORY}

cmake/external/xpu.cmake

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
if (NOT WITH_XPU)
2+
return()
3+
endif()
4+
5+
INCLUDE(ExternalProject)
6+
SET(XPU_PROJECT "extern_xpu")
7+
SET(XPU_URL "https://kunlun1.su.bcebos.com/xpu.tar.gz" CACHE STRING "" FORCE)
8+
SET(XPU_SOURCE_DIR "${THIRD_PARTY_PATH}/xpu")
9+
SET(XPU_DOWNLOAD_DIR "${XPU_SOURCE_DIR}/src/${XPU_PROJECT}")
10+
SET(XPU_INSTALL_DIR "${THIRD_PARTY_PATH}/install/xpu")
11+
SET(XPU_API_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/api/include")
12+
SET(XPU_RUNTIME_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/runtime/include")
13+
SET(XPU_LIB_DIR "${THIRD_PARTY_PATH}/install/xpu/lib")
14+
15+
SET(XPU_API_LIB_NAME "libxpuapi.so")
16+
SET(XPU_RT_LIB_NAME "libxpurt.so")
17+
SET(XPU_SIM_LIB_NAME "libxpusim.so")
18+
SET(XPU_API_LIB "${XPU_LIB_DIR}/${XPU_API_LIB_NAME}")
19+
SET(XPU_RT_LIB "${XPU_LIB_DIR}/${XPU_RT_LIB_NAME}")
20+
SET(XPU_SIM_LIB "${XPU_LIB_DIR}/${XPU_SIM_LIB_NAME}")
21+
22+
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${XPU_INSTALL_DIR}/lib")
23+
24+
INCLUDE_DIRECTORIES(${XPU_API_INC_DIR})
25+
INCLUDE_DIRECTORIES(${XPU_RUNTIME_INC_DIR})
26+
27+
FILE(WRITE ${XPU_DOWNLOAD_DIR}/CMakeLists.txt
28+
"PROJECT(XPU)\n"
29+
"cmake_minimum_required(VERSION 3.0)\n"
30+
"install(DIRECTORY xpu/api xpu/runtime xpu/lib \n"
31+
" DESTINATION ${XPU_INSTALL_DIR})\n")
32+
33+
ExternalProject_Add(
34+
${XPU_PROJECT}
35+
${EXTERNAL_PROJECT_LOG_ARGS}
36+
PREFIX ${XPU_SOURCE_DIR}
37+
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
38+
DOWNLOAD_COMMAND wget --no-check-certificate ${XPU_URL} -c -q -O xpu.tar.gz
39+
&& tar xvf xpu.tar.gz
40+
DOWNLOAD_NO_PROGRESS 1
41+
UPDATE_COMMAND ""
42+
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
43+
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
44+
)
45+
46+
ADD_LIBRARY(shared_xpuapi SHARED IMPORTED GLOBAL)
47+
set_property(TARGET shared_xpuapi PROPERTY IMPORTED_LOCATION "${XPU_API_LIB}")
48+
49+
# generate a static dummy target to track xpulib dependencies
50+
# for cc_library(xxx SRCS xxx.c DEPS xpulib)
51+
generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake")
52+
53+
TARGET_LINK_LIBRARIES(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_SIM_LIB})
54+
ADD_DEPENDENCIES(xpulib ${XPU_PROJECT})

cmake/operators.cmake

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ function(op_library TARGET)
88
set(hip_cu_srcs)
99
set(miopen_hip_cc_srcs)
1010
set(cu_cc_srcs)
11+
set(xpu_cc_srcs)
1112
set(cudnn_cu_cc_srcs)
1213
set(cudnn_cu_srcs)
1314
set(CUDNN_FILE)
@@ -60,6 +61,12 @@ function(op_library TARGET)
6061
list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
6162
endif()
6263
endif()
64+
if(WITH_XPU)
65+
string(REPLACE "_op" "_xpu_op" XPU_FILE "${TARGET}")
66+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/xpu/${XPU_FILE}.cc)
67+
list(APPEND xpu_cc_srcs xpu/${XPU_FILE}.cc)
68+
endif()
69+
endif()
6370
else()
6471
foreach(src ${op_library_SRCS})
6572
if (${src} MATCHES ".*\\.hip.cu$")
@@ -76,6 +83,8 @@ function(op_library TARGET)
7683
list(APPEND mkldnn_cc_srcs ${src})
7784
elseif(${src} MATCHES ".*\\.cu.cc$")
7885
list(APPEND cu_cc_srcs ${src})
86+
elseif(WITH_XPU AND ${src} MATCHES ".*_xpu_op.cc$")
87+
list(APPEND xpu_cc_srcs ${src})
7988
elseif(${src} MATCHES ".*\\.cc$")
8089
list(APPEND cc_srcs ${src})
8190
else()
@@ -109,7 +118,7 @@ function(op_library TARGET)
109118
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cu_srcs} ${miopen_hip_cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
110119
${op_common_deps})
111120
else()
112-
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} DEPS ${op_library_DEPS}
121+
cc_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} DEPS ${op_library_DEPS}
113122
${op_common_deps})
114123
endif()
115124

@@ -150,10 +159,11 @@ function(op_library TARGET)
150159
list(LENGTH cu_srcs cu_srcs_len)
151160
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
152161
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
162+
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
153163
list(LENGTH hip_cu_srcs hip_cu_srcs_len)
154164
list(LENGTH miopen_hip_cc_srcs miopen_hip_cc_srcs_len)
155165
if (${pybind_flag} EQUAL 0 AND ${mkldnn_cc_srcs_len} EQUAL 0 AND ${cu_srcs_len} EQUAL 0 AND ${cu_cc_srcs_len} EQUAL 0 AND
156-
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0)
166+
${hip_cu_srcs_len} EQUAL 0 AND ${miopen_hip_cc_srcs_len} EQUAL 0 AND ${xpu_cc_srcs_len} EQUAL 0)
157167
file(APPEND ${pybind_file} "USE_CPU_ONLY_OP(${TARGET});\n")
158168
set(pybind_flag 1)
159169
endif()
@@ -179,6 +189,9 @@ function(op_library TARGET)
179189
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, MIOPEN);\n")
180190
endif()
181191

192+
if (WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
193+
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, XPU);\n")
194+
endif()
182195
# pybind USE_OP_DEVICE_KERNEL for MKLDNN
183196
if (WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
184197
# Append first implemented MKLDNN activation operator
@@ -228,6 +241,7 @@ function(register_operators)
228241

229242
file(GLOB OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_op.cc")
230243
string(REPLACE "_mkldnn" "" OPS "${OPS}")
244+
string(REPLACE "_xpu" "" OPS "${OPS}")
231245
string(REPLACE ".cc" "" OPS "${OPS}")
232246
list(REMOVE_DUPLICATES OPS)
233247
list(LENGTH register_operators_DEPS register_operators_DEPS_len)

cmake/third_party.cmake

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,11 @@ if(WITH_GPU)
250250
file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage
251251
endif(WITH_GPU)
252252

253+
if(WITH_XPU)
254+
include(external/xpu) # download, build, install xpu
255+
list(APPEND third_party_deps extern_xpu)
256+
endif(WITH_XPU)
257+
253258
if(WITH_PSLIB)
254259
include(external/pslib) # download, build, install pslib
255260
list(APPEND third_party_deps extern_pslib)

paddle/fluid/framework/dlpack_tensor.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,11 @@ struct DLContextVisitor : public boost::static_visitor<::DLContext> {
7070
return ctx;
7171
}
7272

73+
inline ::DLContext operator()(const platform::XPUPlace &place) const {
74+
PADDLE_THROW(
75+
platform::errors::Unimplemented("platform::XPUPlace is not supported"));
76+
}
77+
7378
inline ::DLContext operator()(const platform::CUDAPlace &place) const {
7479
#ifdef PADDLE_WITH_CUDA
7580
::DLContext ctx;

paddle/fluid/framework/executor.cc

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -444,22 +444,31 @@ void Executor::RunPartialPreparedContext(ExecutorPrepareContext* ctx,
444444
int64_t max_memory_size = GetEagerDeletionThreshold();
445445
std::unique_ptr<GarbageCollector> gc;
446446
if (!ctx->force_disable_gc_ && max_memory_size >= 0) {
447-
#ifdef PADDLE_WITH_CUDA
448447
if (platform::is_gpu_place(place_)) {
448+
#ifdef PADDLE_WITH_CUDA
449449
if (IsFastEagerDeletionModeEnabled()) {
450450
gc.reset(new UnsafeFastGPUGarbageCollector(
451451
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
452452
} else {
453453
gc.reset(new DefaultStreamGarbageCollector(
454454
BOOST_GET_CONST(platform::CUDAPlace, place_), max_memory_size));
455455
}
456-
} else if (platform::is_cpu_place(place_)) {
456+
#else
457+
PADDLE_THROW(
458+
platform::errors::Unimplemented("No GPU gc found in CPU/XPU paddle"));
457459
#endif
460+
} else if (platform::is_cpu_place(place_)) {
458461
gc.reset(new CPUGarbageCollector(
459462
BOOST_GET_CONST(platform::CPUPlace, place_), max_memory_size));
460-
#ifdef PADDLE_WITH_CUDA
461-
}
463+
} else if (platform::is_xpu_place(place_)) {
464+
#ifdef PADDLE_WITH_XPU
465+
gc.reset(new XPUGarbageCollector(
466+
BOOST_GET_CONST(platform::XPUPlace, place_), max_memory_size));
467+
#else
468+
PADDLE_THROW(
469+
platform::errors::Unimplemented("No XPU gc found in CPU/GPU paddle"));
462470
#endif
471+
}
463472
}
464473

465474
for (int64_t i = start_op_index; i < end_op_index; ++i) {

paddle/fluid/framework/garbage_collector.cc

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@ void CPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
5050
callback();
5151
}
5252

53+
#ifdef PADDLE_WITH_XPU
54+
XPUGarbageCollector::XPUGarbageCollector(const platform::XPUPlace &place,
55+
size_t max_memory_size)
56+
: GarbageCollector(place, max_memory_size) {}
57+
void XPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
58+
callback();
59+
}
60+
#endif
61+
5362
#ifdef PADDLE_WITH_CUDA
5463
UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector(
5564
const platform::CUDAPlace &place, size_t max_memory_size)

paddle/fluid/framework/garbage_collector.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,16 @@ class CPUGarbageCollector : public GarbageCollector {
5959
void ClearCallback(const std::function<void()> &callback) override;
6060
};
6161

62+
#ifdef PADDLE_WITH_XPU
63+
class XPUGarbageCollector : public GarbageCollector {
64+
public:
65+
XPUGarbageCollector(const platform::XPUPlace &place, size_t max_memory_size);
66+
67+
protected:
68+
void ClearCallback(const std::function<void()> &callback) override;
69+
};
70+
#endif
71+
6272
#ifdef PADDLE_WITH_CUDA
6373
class UnsafeFastGPUGarbageCollector : public GarbageCollector {
6474
public:

0 commit comments

Comments
 (0)