PaddlePaddle
diff --git a/‎paddle/platform/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎paddle/platform/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/platform/device_context.h‎
Lines changed: 159 additions & 0 deletions b/‎paddle/platform/device_context.h‎
Lines changed: 159 additions & 0 deletions
diff --git a/‎paddle/platform/device_context_test.cc‎
Lines changed: 33 additions & 0 deletions b/‎paddle/platform/device_context_test.cc‎
Lines changed: 33 additions & 0 deletions
@@ -4,3 +4,5 @@ nv_test(cuda_test SRCS cuda_test.cu)
 
 cc_library(place SRCS place.cc)
 cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
+
+nv_test(device_context_test SRCS device_context_test.cc DEPS dynamic_loader place eigen3 glog gflags)
@@ -0,0 +1,159 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/framework/enforce.h"
+#ifndef PADDLE_ONLY_CPU
+#include "paddle/platform/cuda.h"
+#include "paddle/platform/dynload/cublas.h"
+#include "paddle/platform/dynload/cudnn.h"
+#include "paddle/platform/dynload/curand.h"
+#define EIGEN_USE_GPU
+#endif
+#include "paddle/platform/place.h"
+#include "unsupported/Eigen/CXX11/Tensor"
+
+namespace paddle {
+namespace platform {
+
+class DeviceContext {
+ public:
+ virtual ~DeviceContext() {}
+};
+
+class CPUDeviceContext : public DeviceContext {};
+
+#ifndef PADDLE_ONLY_CPU
+class GPUPlaceGuard {
+ public:
+ explicit GPUPlaceGuard(GPUPlace new_place) : previous_(GetCurrentDeviceId()) {
+ if (previous_ != new_place) {
+ paddle::platform::SetDeviceId(new_place.device);
+ }
+ }
+
+ ~GPUPlaceGuard() { paddle::platform::SetDeviceId(previous_.device); }
+
+ private:
+ GPUPlace previous_;
+};
+
+class CUDADeviceContext : public DeviceContext {
+ public:
+ explicit CUDADeviceContext(const GPUPlace gpu_place) : gpu_place_(gpu_place) {
+ GPUPlaceGuard guard(gpu_place_);
+ paddle::platform::throw_on_error(cudaStreamCreate(&stream_),
+ "cudaStreamCreate failed");
+ eigen_stream_ = new Eigen::CudaStreamDevice(&stream_);
+ eigen_device_ = new Eigen::GpuDevice(eigen_stream_);
+ }
+
+ void Wait() {
+ paddle::platform::throw_on_error(cudaStreamSynchronize(stream_),
+ "cudaStreamSynchronize failed");
+ }
+
+ cudaStream_t stream() { return stream_; }
+
+ Eigen::GpuDevice eigen_device() { return *eigen_device_; }
+
+ cublasHandle_t cublas_handle() {
+ if (!blas_handle_) {
+ GPUPlaceGuard guard(gpu_place_);
+ PADDLE_ENFORCE(paddle::platform::dynload::cublasCreate(&blas_handle_) ==
+ CUBLAS_STATUS_SUCCESS,
+ "cublasCreate failed");
+ PADDLE_ENFORCE(paddle::platform::dynload::cublasSetStream(
+ blas_handle_, stream_) == CUBLAS_STATUS_SUCCESS,
+ "cublasSetStream failed");
+ }
+ return blas_handle_;
+ }
+
+ cudnnHandle_t cudnn_handle() {
+ if (!dnn_handle_) {
+ GPUPlaceGuard guard(gpu_place_);
+ PADDLE_ENFORCE(paddle::platform::dynload::cudnnCreate(&dnn_handle_) ==
+ CUDNN_STATUS_SUCCESS,
+ "cudnnCreate failed");
+ PADDLE_ENFORCE(paddle::platform::dynload::cudnnSetStream(
+ dnn_handle_, stream_) == CUDNN_STATUS_SUCCESS,
+ "cudnnSetStream failed");
+ }
+ return dnn_handle_;
+ }
+
+ curandGenerator_t curand_generator() {
+ if (!rand_generator_) {
+ GPUPlaceGuard guard(gpu_place_);
+ PADDLE_ENFORCE(paddle::platform::dynload::curandCreateGenerator(
+ &rand_generator_, CURAND_RNG_PSEUDO_DEFAULT) ==
+ CURAND_STATUS_SUCCESS,
+ "curandCreateGenerator failed");
+ PADDLE_ENFORCE(
+ paddle::platform::dynload::curandSetPseudoRandomGeneratorSeed(
+ rand_generator_, random_seed_) == CURAND_STATUS_SUCCESS,
+ "curandSetPseudoRandomGeneratorSeed failed");
+ PADDLE_ENFORCE(paddle::platform::dynload::curandSetStream(
+ rand_generator_, stream_) == CURAND_STATUS_SUCCESS,
+ "curandSetStream failed");
+ }
+ return rand_generator_;
+ }
+
+ ~CUDADeviceContext() {
+ Wait();
+ if (blas_handle_) {
+ PADDLE_ENFORCE(paddle::platform::dynload::cublasDestroy(blas_handle_) ==
+ CUBLAS_STATUS_SUCCESS,
+ "cublasDestroy failed");
+ }
+
+ if (dnn_handle_) {
+ PADDLE_ENFORCE(paddle::platform::dynload::cudnnDestroy(dnn_handle_) ==
+ CUDNN_STATUS_SUCCESS,
+ "cudnnDestroy failed");
+ }
+
+ if (rand_generator_) {
+ PADDLE_ENFORCE(paddle::platform::dynload::curandDestroyGenerator(
+ rand_generator_) == CURAND_STATUS_SUCCESS,
+ "curandDestroyGenerator failed");
+ }
+
+ delete eigen_stream_;
+ delete eigen_device_;
+
+ paddle::platform::throw_on_error(cudaStreamDestroy(stream_),
+ "cudaStreamDestroy failed");
+ }
+
+ private:
+ GPUPlace gpu_place_;
+ cudaStream_t stream_;
+
+ Eigen::CudaStreamDevice* eigen_stream_;
+ Eigen::GpuDevice* eigen_device_;
+
+ cublasHandle_t blas_handle_{nullptr};
+
+ cudnnHandle_t dnn_handle_{nullptr};
+
+ int random_seed_;
+ curandGenerator_t rand_generator_{nullptr};
+};
+#endif
+} // namespace platform
+} // namespace paddle
@@ -0,0 +1,33 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/platform/device_context.h"
+#include "gtest/gtest.h"
+
+TEST(CUDADeviceContext, Init) {
+ int count = paddle::platform::GetDeviceCount();
+ for (int i = 0; i < count; i++) {
+ paddle::platform::CUDADeviceContext* device_context =
+ new paddle::platform::CUDADeviceContext(i);
+ Eigen::GpuDevice gpu_device = device_context->eigen_device();
+ ASSERT_NE(nullptr, gpu_device.stream());
+ cudnnHandle_t cudnn_handle = device_context->cudnn_handle();
+ ASSERT_NE(nullptr, cudnn_handle);
+ cublasHandle_t cublas_handle = device_context->cublas_handle();
+ ASSERT_NE(nullptr, cublas_handle);
+ curandGenerator_t curand_handle = device_context->curand_generator();
+ ASSERT_NE(nullptr, curand_handle);
+ delete device_context;
+ }
+}