Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion paddle/fluid/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ endif()
cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)

# seperate init from device_context to avoid cycle dependencies
cc_library(init SRCS init.cc DEPS device_context custom_kernel)
cc_library(init SRCS init.cc DEPS device_context custom_kernel context_pool)

# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
Expand Down
36 changes: 36 additions & 0 deletions paddle/fluid/platform/init.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <csignal>
#include <fstream>
#include <set>
#include <string>

#include "paddle/fluid/platform/cpu_helper.h"
Expand Down Expand Up @@ -55,6 +56,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif

#include "paddle/fluid/framework/phi_utils.h"
#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/custom_kernel.h"

DECLARE_int32(paddle_num_threads);
Expand Down Expand Up @@ -83,6 +86,38 @@ namespace framework {
std::once_flag gflags_init_flag;
std::once_flag glog_init_flag;
std::once_flag npu_init_flag;
std::once_flag phi_dev_ctx_pool_init_flag;

// platform::DeviceContextPool has no const Get method, so use pointer
static void InitPhiDeviceContextPool(const std::vector<platform::Place> &places,
platform::DeviceContextPool *pool) {
std::call_once(phi_dev_ctx_pool_init_flag, [&]() {
std::set<platform::Place> place_set;
for (const auto &p : places) {
place_set.insert(p);
}
auto &context_pool = paddle::experimental::DeviceContextPool::Instance();
for (const auto &p : place_set) {
// only get CPU and GPU DeviceContext now, add other DeviceContext type
// later if needed
if (platform::is_cpu_place(p)) {
context_pool.Insert(
static_cast<platform::Place>(p),
static_cast<const typename framework::ConvertToPhiContext<
platform::CPUDeviceContext>::TYPE *>(pool->Get(p)));
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
} else if (platform::is_gpu_place(p)) {
context_pool.Insert(
static_cast<platform::Place>(p),
static_cast<const typename framework::ConvertToPhiContext<
platform::CUDADeviceContext>::TYPE *>(pool->Get(p)));
#endif
} else {
// skip other places now, do nothing
}
}
});
}

bool InitGflags(std::vector<std::string> args) {
bool successed = false;
Expand Down Expand Up @@ -274,6 +309,7 @@ void InitDevices(const std::vector<int> devices) {
}
#endif
platform::DeviceContextPool::Init(places);
InitPhiDeviceContextPool(places, &platform::DeviceContextPool::Instance());

#ifndef PADDLE_WITH_MKLDNN
platform::SetNumThreads(FLAGS_paddle_num_threads);
Expand Down
83 changes: 83 additions & 0 deletions paddle/phi/api/include/context_pool.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include "paddle/phi/common/place.h"
#include "paddle/phi/core/macros.h"
#include "paddle/utils/flat_hash_map.h"

namespace phi {
class DeviceContext;
class CPUContext;
class GPUContext;
} // namespace phi

namespace paddle {
namespace experimental {

template <AllocationType T>
struct DefaultDeviceContextType;

template <>
struct DefaultDeviceContextType<AllocationType::CPU> {
using TYPE = phi::CPUContext;
};

template <>
struct DefaultDeviceContextType<AllocationType::GPU> {
using TYPE = phi::GPUContext;
};

/**
* The DeviceContextPool here is just a mirror of the DeviceContextPool in
* fluid, and does not manage the life cycle of the DeviceContext.
* It is mainly used for external custom operator calls and high-performance
* C++ APIs.
*
* Since DeviceContextPool in fluid is a global singleton, it always exists
* in program running, so DeviceContextPool here can always access the correct
* DeviceContext pointer.
*
* In order not to depend on the fluid's DeviceContextPool,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

中间实现有所调整,这里注释也需要调整下,下个pr完善下

* the DeviceContextPool here needs to be initialized in the fluid, and cannot
* be initialized by itself.
*/
class DeviceContextPool {
public:
static DeviceContextPool& Instance();

const phi::DeviceContext* Get(const Place& place) const;

phi::DeviceContext* GetMutable(const Place& place);

template <AllocationType T>
const typename DefaultDeviceContextType<T>::TYPE* Get(
const Place& place) const {
return reinterpret_cast<const typename DefaultDeviceContextType<T>::TYPE*>(
Get(place));
}

void Insert(const Place& place, const phi::DeviceContext* dev_ctx);

private:
DeviceContextPool() = default;
paddle::flat_hash_map<Place, const phi::DeviceContext*, Place::Hash>
context_map_;

DISABLE_COPY_AND_ASSIGN(DeviceContextPool);
};

} // namespace experimental
} // namespace paddle
3 changes: 2 additions & 1 deletion paddle/phi/api/lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ add_custom_command(

cc_library(op_meta_info SRCS op_meta_info.cc DEPS phi_tensor_raw)
cc_library(wrapped_infermeta SRCS ${wrapped_infermeta_source_file} DEPS phi)
cc_library(context_pool SRCS context_pool.cc DEPS cpu_context gpu_context phi_enforce place)

cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS phi_tensor_raw phi_context kernel_factory)
cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS phi_tensor_raw phi_context kernel_factory context_pool)
cc_library(api_gen_utils SRCS api_gen_utils.cc DEPS phi_tensor_raw selected_rows sparse_csr_tensor sparse_coo_tensor)
cc_library(phi_data_transform SRCS data_transform.cc DEPS phi_tensor_raw transfer_layout_kernel cast_kernel data_device_transform)
cc_library(api_custom_impl SRCS api_custom_impl.cc DEPS phi_tensor_raw phi kernel_dispatch api_gen_utils phi_data_transform)
Expand Down
53 changes: 53 additions & 0 deletions paddle/phi/api/lib/context_pool.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/phi/api/include/context_pool.h"

#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/enforce.h"

namespace paddle {
namespace experimental {

DeviceContextPool& DeviceContextPool::Instance() {
static DeviceContextPool g_device_context_pool;
return g_device_context_pool;
}

const phi::DeviceContext* DeviceContextPool::Get(const Place& place) const {
auto it = context_map_.find(place);
PADDLE_ENFORCE_NE(
it,
context_map_.end(),
phi::errors::NotFound("The DeviceContext of %s does not exists.", place));
return it->second;
}

phi::DeviceContext* DeviceContextPool::GetMutable(const Place& place) {
return const_cast<phi::DeviceContext*>(Get(place));
}

void DeviceContextPool::Insert(const Place& place,
const phi::DeviceContext* dev_ctx) {
auto it = context_map_.find(place);
PADDLE_ENFORCE_EQ(it,
context_map_.end(),
phi::errors::AlreadyExists(
"The DeviceContext of %s already exists.", place));
context_map_[place] = dev_ctx;
}

} // namespace experimental
} // namespace paddle
5 changes: 3 additions & 2 deletions paddle/phi/api/lib/kernel_dispatch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ limitations under the License. */

#include "paddle/phi/api/lib/kernel_dispatch.h"

#include "paddle/phi/api/include/context_pool.h"
#include "paddle/phi/core/compat/convert_utils.h"

namespace paddle {
Expand Down Expand Up @@ -52,8 +53,8 @@ std::size_t CountLeadingZeros(uint64_t val) {
} // namespace detail

phi::DeviceContext* GetDeviceContextByBackend(phi::Backend backend) {
auto& pool = paddle::platform::DeviceContextPool::Instance();
return pool.Get(phi::TransToPhiPlace(backend));
auto& pool = paddle::experimental::DeviceContextPool::Instance();
return pool.GetMutable(phi::TransToPhiPlace(backend));
}

DataType ParseDataType(DataType dtype) { return dtype; }
Expand Down
16 changes: 16 additions & 0 deletions paddle/phi/common/place.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,20 @@ std::string GetGlobalDeviceType(size_t device_type_id) {
return global_registered_device_type[device_type_id];
}

constexpr static int kAllocationTypeBitLength = 8;
constexpr static int kDeviceTypeIDBitLength = 8;
constexpr static int kDeviceIDBitLength = 8;

uint32_t Place::Hash::operator()(const Place &place) const {
uint32_t hash_value = 0;
// |----31-24------|-----23-16------|-----15-08----|---7-0----|
// | For extension | AllocationType | DeviceTypeID | DeviceID |
hash_value |= (static_cast<uint8_t>(place.alloc_type_)
<< (kDeviceIDBitLength + kDeviceTypeIDBitLength));
hash_value |=
(static_cast<uint8_t>(place.device_type_id_) << kDeviceIDBitLength);
hash_value |= static_cast<uint8_t>(place.device);
return hash_value;
}

} // namespace phi
43 changes: 21 additions & 22 deletions paddle/phi/common/place.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,31 +73,23 @@ class Place {

std::string DebugString() const;

struct Hash {
// Note: Now the number of bits we need does not exceed 32 bits, so there is
// no need to use 64 bits. If needed in the future, it can be expanded,
// but now we don’t over-design.
uint32_t operator()(const Place& place) const;
};

uint32_t HashValue() const { return Hash()(*this); }

inline bool operator==(const Place& rhs) const {
if (alloc_type_ != rhs.GetType()) {
return false;
}
if (alloc_type_ == AllocationType::CPU ||
alloc_type_ == AllocationType::GPUPINNED ||
alloc_type_ == AllocationType::NPUPINNED) {
return true;
}
if (alloc_type_ == AllocationType::CUSTOM) {
return device_type_id_ == rhs.device_type_id_ &&
device == rhs.GetDeviceId();
}
return device == rhs.GetDeviceId();
return HashValue() <= rhs.HashValue();
}
inline bool operator!=(const Place& rhs) const {
return HashValue() != rhs.HashValue();
}
inline bool operator!=(const Place& rhs) const { return !(*this == rhs); }
inline bool operator<(const Place& rhs) const {
if (alloc_type_ != rhs.GetType()) {
return static_cast<int>(alloc_type_) < static_cast<int>(rhs.GetType());
}
if (alloc_type_ == AllocationType::CUSTOM &&
device_type_id_ != rhs.device_type_id_) {
return device_type_id_ < rhs.device_type_id_;
}
return device < rhs.GetDeviceId();
return HashValue() < rhs.HashValue();
}

public:
Expand Down Expand Up @@ -206,3 +198,10 @@ class CustomPlace : public Place {
std::ostream& operator<<(std::ostream&, const Place&);

} // namespace phi

namespace paddle {
namespace experimental {
using AllocationType = phi::AllocationType;
using Place = phi::Place;
} // namespace experimental
} // namespace paddle