Skip to content

Commit 1973837

Browse files
authored
Merge branch 'PaddlePaddle:develop' into move_viterbi
2 parents e47d657 + e0866dc commit 1973837

File tree

368 files changed

+19071
-6998
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

368 files changed

+19071
-6998
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,14 @@ paddle/fluid/eager/api/generated/*
66
paddle/fluid/op_use_default_grad_maker_DEV.spec
77
paddle/fluid/op_use_default_grad_maker_PR.spec
88
paddle/phi/api/backward/backward_api.h
9+
paddle/phi/api/backward/sparse_bw_api.h
910
paddle/phi/api/include/api.h
1011
paddle/phi/api/include/sparse_api.h
1112
paddle/phi/api/lib/api.cc
1213
paddle/phi/api/lib/dygraph_api.*
1314
paddle/phi/api/lib/backward_api.cc
1415
paddle/phi/api/lib/sparse_api.cc
16+
paddle/phi/api/lib/sparse_bw_api.cc
1517
paddle/phi/extension.h
1618
paddle/phi/include/*
1719
paddle/phi/infermeta/generated.*

cmake/external/llvm.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,8 @@ endfunction()
100100
function(mlir_add_rewriter td_base)
101101
set(LLVM_TARGET_DEFINITIONS ${td_base}.td)
102102
mlir_tablegen(${td_base}.cpp.inc -gen-rewriters "-I${CMAKE_SOURCE_DIR}/infrt/dialect/pass")
103-
add_public_tablegen_target(${td_base}_IncGen)
104-
add_custom_target(${td_base}_inc DEPENDS ${td_base}_IncGen)
103+
add_public_tablegen_target(MLIR${td_base}IncGen)
104+
add_dependencies(mlir-headers MLIR${td_base}IncGen)
105105
endfunction()
106106

107107
# Execute the mlir script with infrt-exec program.

cmake/operators.cmake

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,11 +293,11 @@ function(op_library TARGET)
293293
# Define operators that don't need pybind here.
294294
foreach(manual_pybind_op "compare_all_op" "compare_op" "logical_op" "bitwise_op" "nccl_op"
295295
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op")
296-
297-
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
298-
set(pybind_flag 1)
299-
endif()
300-
endforeach()
296+
297+
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
298+
set(pybind_flag 1)
299+
endif()
300+
endforeach()
301301

302302
# The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
303303
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
@@ -478,7 +478,7 @@ function(op_library TARGET)
478478
if (${pybind_flag} EQUAL 0)
479479
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
480480
if(${TARGET} STREQUAL "activation")
481-
file(APPEND ${pybind_file} "USE_OP(relu);\n")
481+
file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
482482
elseif(${TARGET} STREQUAL "fake_dequantize")
483483
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
484484
elseif(${TARGET} STREQUAL "fake_quantize")

cmake/phi.cmake

Lines changed: 59 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,8 @@ function(kernel_library TARGET)
134134
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
135135
list(APPEND gpu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/kps/${TARGET}.cu)
136136
endif()
137-
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
138-
list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}_gpudnn.cu)
137+
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
138+
list(APPEND gpudnn_srcs ${CMAKE_CURRENT_SOURCE_DIR}/gpudnn/${TARGET}.cu)
139139
endif()
140140
endif()
141141
if (WITH_XPU)
@@ -197,92 +197,88 @@ function(kernel_library TARGET)
197197

198198
# kernel source file level
199199
# level 1: base device kernel
200-
# - cpu_srcs / gpu_srcs / xpu_srcs / kps_srcs
200+
# - cpu_srcs / gpu_srcs / xpu_srcs / gpudnn_srcs / kps_srcs
201201
# level 2: device-independent kernel
202202
# - common_srcs
203203
# level 3: Kernel implemented by reusing device-independent kernel
204204
# - selected_rows_srcs
205+
set(base_device_kernels)
206+
set(device_independent_kernel)
207+
set(high_level_kernels)
205208

206-
# Build Target according different src organization
207-
if((${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR
208-
${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0) AND
209-
(${common_srcs_len} GREATER 0 OR ${selected_rows_srcs_len} GREATER 0))
210-
# If the common_srcs/selected_rows_srcs depends on specific device srcs, build target using this rule.
209+
# 1. Base device kernel compile
210+
if (${cpu_srcs_len} GREATER 0)
211+
cc_library(${TARGET}_cpu SRCS ${cpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
212+
list(APPEND base_device_kernels ${TARGET}_cpu)
213+
endif()
214+
if (${gpu_srcs_len} GREATER 0)
211215
if (WITH_GPU)
212-
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
213-
nv_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
214-
nv_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
215-
endif()
216+
nv_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
216217
elseif (WITH_ROCM)
217-
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
218-
hip_library(${TARGET}_part SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
219-
hip_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
220-
endif()
221-
elseif (WITH_XPU_KP)
222-
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
223-
xpu_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
224-
xpu_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
225-
endif()
226-
else()
227-
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
228-
cc_library(${TARGET}_part SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
229-
cc_library(${TARGET} SRCS ${common_srcs} ${selected_rows_srcs} DEPS ${TARGET}_part)
230-
endif()
218+
hip_library(${TARGET}_gpu SRCS ${gpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
231219
endif()
232-
# If there are only specific device srcs, build target using this rule.
233-
elseif (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
220+
list(APPEND base_device_kernels ${TARGET}_gpu)
221+
endif()
222+
if (${xpu_srcs_len} GREATER 0)
223+
cc_library(${TARGET}_xpu SRCS ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
224+
list(APPEND base_device_kernels ${TARGET}_xpu)
225+
endif()
226+
if (${gpudnn_srcs_len} GREATER 0)
234227
if (WITH_GPU)
235-
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
236-
nv_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
237-
endif()
228+
nv_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
238229
elseif (WITH_ROCM)
239-
if (${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${gpudnn_srcs_len} GREATER 0)
240-
hip_library(${TARGET} SRCS ${cpu_srcs} ${gpu_srcs} ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
241-
endif()
242-
elseif (WITH_XPU_KP)
243-
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0 OR ${kps_srcs_len} GREATER 0)
244-
xpu_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
245-
endif()
246-
else()
247-
if (${cpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0)
248-
cc_library(${TARGET} SRCS ${cpu_srcs} ${xpu_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
249-
endif()
230+
hip_library(${TARGET}_gpudnn SRCS ${gpudnn_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
250231
endif()
251-
# If the selected_rows_srcs depends on common_srcs, build target using this rule.
252-
elseif (${common_srcs_len} GREATER 0 AND ${selected_rows_srcs_len} GREATER 0)
232+
list(APPEND base_device_kernels ${TARGET}_gpudnn)
233+
endif()
234+
if (${kps_srcs_len} GREATER 0)
235+
# only when WITH_XPU_KP, the kps_srcs_len can be > 0
236+
xpu_library(${TARGET}_kps SRCS ${kps_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
237+
list(APPEND base_device_kernels ${TARGET}_kps)
238+
endif()
239+
240+
# 2. Device-independent kernel compile
241+
if (${common_srcs_len} GREATER 0)
253242
if (WITH_GPU)
254-
nv_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
255-
nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
243+
nv_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
256244
elseif (WITH_ROCM)
257-
hip_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
258-
hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
245+
hip_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
259246
elseif (WITH_XPU_KP)
260-
xpu_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
261-
xpu_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
247+
xpu_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
262248
else()
263-
cc_library(${TARGET}_part SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
264-
cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${TARGET}_part)
249+
cc_library(${TARGET}_common SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels})
265250
endif()
266-
# If there are only common_srcs or selected_rows_srcs, build target using below rules.
267-
elseif (${common_srcs_len} GREATER 0)
251+
list(APPEND device_independent_kernel ${TARGET}_common)
252+
endif()
253+
254+
# 3. Reusing kernel compile
255+
if (${selected_rows_srcs_len} GREATER 0)
268256
if (WITH_GPU)
269-
nv_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
257+
nv_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
270258
elseif (WITH_ROCM)
271-
hip_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
259+
hip_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
272260
elseif (WITH_XPU_KP)
273-
xpu_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
261+
xpu_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
274262
else()
275-
cc_library(${TARGET} SRCS ${common_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
263+
cc_library(${TARGET}_sr SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel})
276264
endif()
277-
elseif (${selected_rows_srcs_len} GREATER 0)
265+
list(APPEND high_level_kernels ${TARGET}_sr)
266+
endif()
267+
268+
# 4. Unify target compile
269+
list(LENGTH base_device_kernels base_device_kernels_len)
270+
list(LENGTH device_independent_kernel device_independent_kernel_len)
271+
list(LENGTH high_level_kernels high_level_kernels_len)
272+
if (${base_device_kernels_len} GREATER 0 OR ${device_independent_kernel_len} GREATER 0 OR
273+
${high_level_kernels_len} GREATER 0)
278274
if (WITH_GPU)
279-
nv_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
275+
nv_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
280276
elseif (WITH_ROCM)
281-
hip_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
277+
hip_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
282278
elseif (WITH_XPU_KP)
283-
xpu_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
279+
xpu_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
284280
else()
285-
cc_library(${TARGET} SRCS ${selected_rows_srcs} DEPS ${kernel_library_DEPS} ${kernel_deps})
281+
cc_library(${TARGET} DEPS ${kernel_library_DEPS} ${kernel_deps} ${base_device_kernels} ${device_independent_kernel} ${high_level_kernels})
286282
endif()
287283
else()
288284
set(target_build_flag 0)

paddle/fluid/distributed/collective/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,6 @@ cc_library(eager_reducer SRCS reducer.cc DEPS eager_api processgroup)
77
if(WITH_NCCL)
88
cc_library(processgroup_nccl SRCS ProcessGroupNCCL.cc DEPS place cuda_stream enforce collective_helper device_context phi phi_api eager_api)
99
endif()
10+
if(WITH_ASCEND_CL)
11+
cc_library(processgroup_hccl SRCS ProcessGroupHCCL.cc DEPS place npu_stream enforce collective_helper device_context phi phi_api eager_api)
12+
endif()
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <error.h>
18+
#include <string>
19+
20+
#include "boost/variant.hpp"
21+
#include "paddle/fluid/framework/data_type.h"
22+
#include "paddle/fluid/framework/variable.h"
23+
#include "paddle/fluid/platform/collective_helper.h"
24+
#include "paddle/fluid/platform/device/npu/enforce_npu.h"
25+
#include "paddle/fluid/platform/device/npu/npu_info.h"
26+
#include "paddle/fluid/platform/device_context.h"
27+
#include "paddle/fluid/platform/enforce.h"
28+
29+
namespace paddle {
30+
namespace distributed {
31+
32+
class NPUEventManager {
33+
public:
34+
NPUEventManager() = default;
35+
36+
~NPUEventManager() {
37+
if (is_created_) {
38+
platform::NPUDeviceGuard guard(device_index_);
39+
platform::NPUEventDestroy(event_);
40+
}
41+
}
42+
43+
NPUEventManager(const NPUEventManager&) = delete;
44+
NPUEventManager& operator=(const NPUEventManager&) = delete;
45+
46+
NPUEventManager(NPUEventManager&& other) {
47+
std::swap(is_created_, other.is_created_);
48+
std::swap(device_index_, other.device_index_);
49+
std::swap(event_, other.event_);
50+
}
51+
52+
NPUEventManager& operator=(NPUEventManager&& other) {
53+
std::swap(is_created_, other.is_created_);
54+
std::swap(device_index_, other.device_index_);
55+
std::swap(event_, other.event_);
56+
return *this;
57+
}
58+
59+
bool IsCreated() const { return is_created_; }
60+
bool DeviceId() const { return device_index_; }
61+
aclrtEvent GetRawNPUEvent() const { return event_; }
62+
63+
void Record(const paddle::platform::NPUDeviceContext& ctx) {
64+
auto device_index = ctx.GetPlace().device;
65+
if (!is_created_) {
66+
CreateEvent(device_index);
67+
}
68+
PADDLE_ENFORCE_EQ(device_index, device_index_,
69+
platform::errors::PreconditionNotMet(
70+
"NPUDeviceContext's device %d does not match"
71+
"Event's device %d",
72+
device_index, device_index_));
73+
74+
platform::NPUDeviceGuard guard(device_index_);
75+
platform::NPUEventRecord(event_, ctx.stream());
76+
}
77+
78+
bool Query() const {
79+
aclrtEventStatus status = ACL_EVENT_STATUS_COMPLETE;
80+
platform::NPUEventQuery(event_, &status);
81+
if (status == ACL_EVENT_STATUS_COMPLETE) {
82+
return true;
83+
}
84+
return false;
85+
}
86+
87+
void Block(const paddle::platform::NPUDeviceContext& ctx) const {
88+
if (is_created_) {
89+
auto device_index = ctx.GetPlace().device;
90+
PADDLE_ENFORCE_EQ(device_index, device_index_,
91+
platform::errors::PreconditionNotMet(
92+
"CUDADeviceContext's device %d does not match"
93+
"Event's device %d",
94+
device_index, device_index_));
95+
platform::NPUDeviceGuard guard(device_index_);
96+
platform::NPUStreamWaitEvent(ctx.stream(), event_);
97+
}
98+
}
99+
100+
private:
101+
bool is_created_{false};
102+
aclrtEvent event_{};
103+
int8_t device_index_{0};
104+
105+
private:
106+
void CreateEvent(int device_index) {
107+
device_index_ = device_index;
108+
platform::NPUDeviceGuard guard(device_index);
109+
platform::NPUEventCreate(&event_);
110+
is_created_ = true;
111+
}
112+
};
113+
114+
class HCCLCommManager {
115+
public:
116+
explicit HCCLCommManager(HcclComm hcclComm) : hccl_comm_(hcclComm) {}
117+
118+
HCCLCommManager() : HCCLCommManager(nullptr) {}
119+
120+
~HCCLCommManager() noexcept {
121+
std::unique_lock<std::mutex> lock(mutex_);
122+
if (hccl_comm_) {
123+
platform::dynload::HcclCommDestroy(hccl_comm_);
124+
}
125+
}
126+
127+
static std::shared_ptr<HCCLCommManager> Create(int num_ranks, int rank,
128+
HcclRootInfo* comm_id,
129+
HcclComm hccl_comm) {
130+
auto hccl_manager = std::make_shared<HCCLCommManager>();
131+
auto ret = platform::dynload::HcclCommInitRootInfo(num_ranks, comm_id, rank,
132+
&hccl_comm);
133+
using __NPU_STATUS_TYPE__ = decltype(ret);
134+
constexpr auto __success_type__ =
135+
platform::details::NPUStatusType<__NPU_STATUS_TYPE__>::kSuccess;
136+
if (UNLIKELY(ret != __success_type__)) {
137+
VLOG(0) << "Error: create hccl_id error.";
138+
exit(-1);
139+
}
140+
141+
hccl_manager->hccl_id_ = comm_id;
142+
hccl_manager->rank_ = rank;
143+
hccl_manager->hccl_comm_ = hccl_comm;
144+
return hccl_manager;
145+
}
146+
147+
HcclRootInfo* GetHcclId() const {
148+
std::unique_lock<std::mutex> lock(mutex_);
149+
return hccl_id_;
150+
}
151+
152+
HcclComm GetHcclComm() const {
153+
std::unique_lock<std::mutex> lock(mutex_);
154+
return hccl_comm_;
155+
}
156+
157+
HCCLCommManager(const HCCLCommManager&) = delete;
158+
HCCLCommManager& operator=(const HCCLCommManager&) = delete;
159+
HCCLCommManager& operator=(HCCLCommManager&& other) = delete;
160+
161+
HCCLCommManager(HCCLCommManager&& other) {
162+
std::unique_lock<std::mutex> lock(other.mutex_);
163+
std::swap(hccl_comm_, other.hccl_comm_);
164+
}
165+
166+
protected:
167+
HcclComm hccl_comm_;
168+
HcclRootInfo* hccl_id_;
169+
int rank_;
170+
mutable std::mutex mutex_;
171+
};
172+
173+
} // namespace distributed
174+
} // namespace paddle

0 commit comments

Comments
 (0)