PaddlePaddle
diff --git a/‎paddle/platform/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎paddle/platform/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/platform/cudnn_helper.h‎
Lines changed: 200 additions & 0 deletions b/‎paddle/platform/cudnn_helper.h‎
Lines changed: 200 additions & 0 deletions
diff --git a/‎paddle/platform/cudnn_helper_test.cc‎
Lines changed: 120 additions & 0 deletions b/‎paddle/platform/cudnn_helper_test.cc‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎paddle/platform/dynload/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎paddle/platform/dynload/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/platform/dynload/cudnn.h‎
Lines changed: 8 additions & 0 deletions b/‎paddle/platform/dynload/cudnn.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎paddle/platform/macros.h‎
Lines changed: 23 additions & 0 deletions b/‎paddle/platform/macros.h‎
Lines changed: 23 additions & 0 deletions
@@ -22,3 +22,5 @@ ENDIF()
 cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
  system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
 nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
+
+nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
@@ -0,0 +1,200 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/platform/dynload/cudnn.h"
+#include "paddle/platform/enforce.h"
+#include "paddle/platform/macros.h"
+
+namespace paddle {
+namespace platform {
+
+enum class DataLayout {
+ kNHWC,
+ kNCHW,
+ kNCHW_VECT_C,
+};
+
+enum class PoolingMode {
+ kMaximum,
+ kAverage,
+};
+
+template <typename T>
+class CudnnDataType;
+
+template <>
+class CudnnDataType<float> {
+ public:
+ static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
+};
+
+template <>
+class CudnnDataType<double> {
+ public:
+ static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
+};
+
+inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
+ switch (order) {
+ case DataLayout::kNHWC:
+ return CUDNN_TENSOR_NHWC;
+ case DataLayout::kNCHW:
+ return CUDNN_TENSOR_NCHW;
+ default:
+ PADDLE_THROW("Unknown cudnn equivalent for order");
+ }
+ return CUDNN_TENSOR_NCHW;
+}
+
+class ScopedTensorDescriptor {
+ public:
+ ScopedTensorDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_));
+ }
+ ~ScopedTensorDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_));
+ }
+
+ inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
+ const cudnnDataType_t type,
+ const std::vector<int>& dims) {
+ // the format is not used now, but it maybe useful feature
+ std::vector<int> strides(dims.size());
+ strides[dims.size() - 1] = 1;
+ for (int i = dims.size() - 2; i >= 0; i--) {
+ strides[i] = dims[i + 1] * strides[i + 1];
+ }
+ PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
+ desc_, type, dims.size(), dims.data(), strides.data()));
+ return desc_;
+ }
+
+ template <typename T>
+ inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
+ const std::vector<int>& dims) {
+ return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
+ dims);
+ }
+
+ private:
+ cudnnTensorDescriptor_t desc_;
+ DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
+};
+
+class ScopedFilterDescriptor {
+ public:
+ ScopedFilterDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_));
+ }
+ ~ScopedFilterDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_));
+ }
+
+ inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
+ const cudnnDataType_t type,
+ const std::vector<int>& kernel) {
+ // filter layout: output input spatial_dim_y spatial_dim_x
+ PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
+ desc_, type, format, kernel.size(), kernel.data()));
+ return desc_;
+ }
+
+ template <typename T>
+ inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
+ const std::vector<int>& kernel) {
+ return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
+ kernel);
+ }
+
+ private:
+ cudnnFilterDescriptor_t desc_;
+ DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
+};
+
+class ScopedConvolutionDescriptor {
+ public:
+ ScopedConvolutionDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_));
+ }
+ ~ScopedConvolutionDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_));
+ }
+
+ inline cudnnConvolutionDescriptor_t descriptor(
+ cudnnDataType_t type, const std::vector<int>& pads,
+ const std::vector<int>& strides, const std::vector<int>& dilations) {
+ PADDLE_ENFORCE_EQ(pads.size(), strides.size());
+ PADDLE_ENFORCE_EQ(pads.size(), dilations.size());
+
+#if CUDNN_VERSION < 6000
+ // cudnn v5 does not support dilation conv, the argument is called upscale
+ // instead of dilations and it is must be one.
+ for (size_t i = 0; i < dilations.size(); ++i) {
+ PADDLE_ENFORCE_EQ(
+ dilations[i], 1,
+ "Dilations conv is not supported in this cuDNN version");
+ }
+#endif
+
+ PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor(
+ desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
+ CUDNN_CROSS_CORRELATION, type));
+ return desc_;
+ }
+
+ template <typename T>
+ inline cudnnConvolutionDescriptor_t descriptor(
+ const std::vector<int>& pads, const std::vector<int>& strides,
+ const std::vector<int>& dilations) {
+ return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
+ }
+
+ private:
+ cudnnConvolutionDescriptor_t desc_;
+ DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
+};
+
+class ScopedPoolingDescriptor {
+ public:
+ ScopedPoolingDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_));
+ }
+ ~ScopedPoolingDescriptor() {
+ PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_));
+ }
+
+ inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
+ const std::vector<int>& kernel,
+ const std::vector<int>& pads,
+ const std::vector<int>& strides) {
+ PADDLE_ENFORCE_EQ(kernel.size(), pads.size());
+ PADDLE_ENFORCE_EQ(kernel.size(), strides.size());
+ PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor(
+ desc_, (mode == PoolingMode::kMaximum
+ ? CUDNN_POOLING_MAX
+ : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
+ CUDNN_PROPAGATE_NAN, // Always propagate nans.
+ kernel.size(), kernel.data(), pads.data(), strides.data()));
+ return desc_;
+ }
+
+ private:
+ cudnnPoolingDescriptor_t desc_;
+ DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
+};
+
+} // namespace platform
+} // namespace paddle
@@ -0,0 +1,120 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/platform/cudnn_helper.h"
+#include <gtest/gtest.h>
+
+TEST(CudnnHelper, ScopedTensorDescriptor) {
+ using paddle::platform::ScopedTensorDescriptor;
+ using paddle::platform::DataLayout;
+
+ ScopedTensorDescriptor tensor_desc;
+ std::vector<int> shape = {2, 4, 6, 6};
+ auto desc = tensor_desc.descriptor<float>(DataLayout::kNCHW, shape);
+
+ cudnnDataType_t type;
+ int nd;
+ std::vector<int> dims(4);
+ std::vector<int> strides(4);
+ paddle::platform::dynload::cudnnGetTensorNdDescriptor(
+ desc, 4, &type, &nd, dims.data(), strides.data());
+
+ EXPECT_EQ(nd, 4);
+ for (size_t i = 0; i < dims.size(); ++i) {
+ EXPECT_EQ(dims[i], shape[i]);
+ }
+ EXPECT_EQ(strides[3], 1);
+ EXPECT_EQ(strides[2], 6);
+ EXPECT_EQ(strides[1], 36);
+ EXPECT_EQ(strides[0], 144);
+}
+
+TEST(CudnnHelper, ScopedFilterDescriptor) {
+ using paddle::platform::ScopedFilterDescriptor;
+ using paddle::platform::DataLayout;
+
+ ScopedFilterDescriptor filter_desc;
+ std::vector<int> shape = {2, 3, 3};
+ auto desc = filter_desc.descriptor<float>(DataLayout::kNCHW, shape);
+
+ cudnnDataType_t type;
+ int nd;
+ cudnnTensorFormat_t format;
+ std::vector<int> kernel(3);
+ paddle::platform::dynload::cudnnGetFilterNdDescriptor(desc, 3, &type, &format,
+ &nd, kernel.data());
+
+ EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
+ EXPECT_EQ(nd, 3);
+ for (size_t i = 0; i < shape.size(); ++i) {
+ EXPECT_EQ(kernel[i], shape[i]);
+ }
+}
+
+TEST(CudnnHelper, ScopedConvolutionDescriptor) {
+ using paddle::platform::ScopedConvolutionDescriptor;
+
+ ScopedConvolutionDescriptor conv_desc;
+ std::vector<int> src_pads = {2, 2, 2};
+ std::vector<int> src_strides = {1, 1, 1};
+ std::vector<int> src_dilations = {1, 1, 1};
+ auto desc = conv_desc.descriptor<float>(src_pads, src_strides, src_dilations);
+
+ cudnnDataType_t type;
+ cudnnConvolutionMode_t mode;
+ int nd;
+ std::vector<int> pads(3);
+ std::vector<int> strides(3);
+ std::vector<int> dilations(3);
+ paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(
+ desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode,
+ &type);
+
+ EXPECT_EQ(nd, 3);
+ for (size_t i = 0; i < src_pads.size(); ++i) {
+ EXPECT_EQ(pads[i], src_pads[i]);
+ EXPECT_EQ(strides[i], src_strides[i]);
+ EXPECT_EQ(dilations[i], src_dilations[i]);
+ }
+ EXPECT_EQ(mode, CUDNN_CROSS_CORRELATION);
+}
+
+TEST(CudnnHelper, ScopedPoolingDescriptor) {
+ using paddle::platform::ScopedPoolingDescriptor;
+ using paddle::platform::PoolingMode;
+
+ ScopedPoolingDescriptor pool_desc;
+ std::vector<int> src_kernel = {2, 2, 5};
+ std::vector<int> src_pads = {1, 1, 2};
+ std::vector<int> src_strides = {2, 2, 3};
+ auto desc = pool_desc.descriptor(PoolingMode::kMaximum, src_kernel, src_pads,
+ src_strides);
+
+ cudnnPoolingMode_t mode;
+ cudnnNanPropagation_t nan_t = CUDNN_PROPAGATE_NAN;
+ int nd;
+ std::vector<int> kernel(3);
+ std::vector<int> pads(3);
+ std::vector<int> strides(3);
+ paddle::platform::dynload::cudnnGetPoolingNdDescriptor(
+ desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data());
+
+ EXPECT_EQ(nd, 3);
+ for (size_t i = 0; i < src_pads.size(); ++i) {
+ EXPECT_EQ(kernel[i], src_kernel[i]);
+ EXPECT_EQ(pads[i], src_pads[i]);
+ EXPECT_EQ(strides[i], src_strides[i]);
+ }
+ EXPECT_EQ(mode, CUDNN_POOLING_MAX);
+}
@@ -1,2 +1,2 @@
 cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
-nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)
+nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)
@@ -62,19 +62,27 @@ extern void* cudnn_dso_handle;
 #define CUDNN_DNN_ROUTINE_EACH(__macro) \
  __macro(cudnnSetTensor4dDescriptor); \
  __macro(cudnnSetTensor4dDescriptorEx); \
+ __macro(cudnnSetTensorNdDescriptor); \
+ __macro(cudnnGetTensorNdDescriptor); \
  __macro(cudnnGetConvolutionNdForwardOutputDim); \
  __macro(cudnnGetConvolutionForwardAlgorithm); \
  __macro(cudnnCreateTensorDescriptor); \
  __macro(cudnnDestroyTensorDescriptor); \
  __macro(cudnnCreateFilterDescriptor); \
  __macro(cudnnSetFilter4dDescriptor); \
+ __macro(cudnnSetFilterNdDescriptor); \
+ __macro(cudnnGetFilterNdDescriptor); \
  __macro(cudnnSetPooling2dDescriptor); \
+ __macro(cudnnSetPoolingNdDescriptor); \
+ __macro(cudnnGetPoolingNdDescriptor); \
  __macro(cudnnDestroyFilterDescriptor); \
  __macro(cudnnCreateConvolutionDescriptor); \
  __macro(cudnnCreatePoolingDescriptor); \
  __macro(cudnnDestroyPoolingDescriptor); \
  __macro(cudnnSetConvolution2dDescriptor); \
  __macro(cudnnDestroyConvolutionDescriptor); \
+ __macro(cudnnSetConvolutionNdDescriptor); \
+ __macro(cudnnGetConvolutionNdDescriptor); \
  __macro(cudnnCreate); \
  __macro(cudnnDestroy); \
  __macro(cudnnSetStream); \
 
@@ -0,0 +1,23 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+// Disable the copy and assignment operator for a class.
+#ifndef DISABLE_COPY_AND_ASSIGN
+#define DISABLE_COPY_AND_ASSIGN(classname) \
+ private: \
+ classname(const classname&) = delete; \
+ classname& operator=(const classname&) = delete
+#endif
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)`
`2`		`-nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)`
	`2`	`+nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)`