Skip to content

Commit 3542027

Browse files
authored
Merge pull request #3791 from qingqing01/cudnn_wrapper
cuDNN Wrapper.
2 parents d7b2058 + 8c048aa commit 3542027

File tree

6 files changed

+354
-1
lines changed

6 files changed

+354
-1
lines changed

paddle/platform/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ ENDIF()
2222
cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator
2323
system_allocator memory_block meta_data meta_cache place eigen3 ${GPU_CTX_DEPS})
2424
nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info)
25+
26+
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)

paddle/platform/cudnn_helper.h

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include "paddle/platform/dynload/cudnn.h"
18+
#include "paddle/platform/enforce.h"
19+
#include "paddle/platform/macros.h"
20+
21+
namespace paddle {
22+
namespace platform {
23+
24+
enum class DataLayout {
25+
kNHWC,
26+
kNCHW,
27+
kNCHW_VECT_C,
28+
};
29+
30+
enum class PoolingMode {
31+
kMaximum,
32+
kAverage,
33+
};
34+
35+
template <typename T>
36+
class CudnnDataType;
37+
38+
template <>
39+
class CudnnDataType<float> {
40+
public:
41+
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
42+
};
43+
44+
template <>
45+
class CudnnDataType<double> {
46+
public:
47+
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
48+
};
49+
50+
inline cudnnTensorFormat_t GetCudnnTensorFormat(const DataLayout& order) {
51+
switch (order) {
52+
case DataLayout::kNHWC:
53+
return CUDNN_TENSOR_NHWC;
54+
case DataLayout::kNCHW:
55+
return CUDNN_TENSOR_NCHW;
56+
default:
57+
PADDLE_THROW("Unknown cudnn equivalent for order");
58+
}
59+
return CUDNN_TENSOR_NCHW;
60+
}
61+
62+
class ScopedTensorDescriptor {
63+
public:
64+
ScopedTensorDescriptor() {
65+
PADDLE_ENFORCE(dynload::cudnnCreateTensorDescriptor(&desc_));
66+
}
67+
~ScopedTensorDescriptor() {
68+
PADDLE_ENFORCE(dynload::cudnnDestroyTensorDescriptor(desc_));
69+
}
70+
71+
inline cudnnTensorDescriptor_t descriptor(const cudnnTensorFormat_t format,
72+
const cudnnDataType_t type,
73+
const std::vector<int>& dims) {
74+
// the format is not used now, but it maybe useful feature
75+
std::vector<int> strides(dims.size());
76+
strides[dims.size() - 1] = 1;
77+
for (int i = dims.size() - 2; i >= 0; i--) {
78+
strides[i] = dims[i + 1] * strides[i + 1];
79+
}
80+
PADDLE_ENFORCE(dynload::cudnnSetTensorNdDescriptor(
81+
desc_, type, dims.size(), dims.data(), strides.data()));
82+
return desc_;
83+
}
84+
85+
template <typename T>
86+
inline cudnnTensorDescriptor_t descriptor(const DataLayout& order,
87+
const std::vector<int>& dims) {
88+
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
89+
dims);
90+
}
91+
92+
private:
93+
cudnnTensorDescriptor_t desc_;
94+
DISABLE_COPY_AND_ASSIGN(ScopedTensorDescriptor);
95+
};
96+
97+
class ScopedFilterDescriptor {
98+
public:
99+
ScopedFilterDescriptor() {
100+
PADDLE_ENFORCE(dynload::cudnnCreateFilterDescriptor(&desc_));
101+
}
102+
~ScopedFilterDescriptor() {
103+
PADDLE_ENFORCE(dynload::cudnnDestroyFilterDescriptor(desc_));
104+
}
105+
106+
inline cudnnFilterDescriptor_t descriptor(const cudnnTensorFormat_t format,
107+
const cudnnDataType_t type,
108+
const std::vector<int>& kernel) {
109+
// filter layout: output input spatial_dim_y spatial_dim_x
110+
PADDLE_ENFORCE(dynload::cudnnSetFilterNdDescriptor(
111+
desc_, type, format, kernel.size(), kernel.data()));
112+
return desc_;
113+
}
114+
115+
template <typename T>
116+
inline cudnnFilterDescriptor_t descriptor(const DataLayout& order,
117+
const std::vector<int>& kernel) {
118+
return descriptor(GetCudnnTensorFormat(order), CudnnDataType<T>::type,
119+
kernel);
120+
}
121+
122+
private:
123+
cudnnFilterDescriptor_t desc_;
124+
DISABLE_COPY_AND_ASSIGN(ScopedFilterDescriptor);
125+
};
126+
127+
class ScopedConvolutionDescriptor {
128+
public:
129+
ScopedConvolutionDescriptor() {
130+
PADDLE_ENFORCE(dynload::cudnnCreateConvolutionDescriptor(&desc_));
131+
}
132+
~ScopedConvolutionDescriptor() {
133+
PADDLE_ENFORCE(dynload::cudnnDestroyConvolutionDescriptor(desc_));
134+
}
135+
136+
inline cudnnConvolutionDescriptor_t descriptor(
137+
cudnnDataType_t type, const std::vector<int>& pads,
138+
const std::vector<int>& strides, const std::vector<int>& dilations) {
139+
PADDLE_ENFORCE_EQ(pads.size(), strides.size());
140+
PADDLE_ENFORCE_EQ(pads.size(), dilations.size());
141+
142+
#if CUDNN_VERSION < 6000
143+
// cudnn v5 does not support dilation conv, the argument is called upscale
144+
// instead of dilations and it is must be one.
145+
for (size_t i = 0; i < dilations.size(); ++i) {
146+
PADDLE_ENFORCE_EQ(
147+
dilations[i], 1,
148+
"Dilations conv is not supported in this cuDNN version");
149+
}
150+
#endif
151+
152+
PADDLE_ENFORCE(dynload::cudnnSetConvolutionNdDescriptor(
153+
desc_, pads.size(), pads.data(), strides.data(), dilations.data(),
154+
CUDNN_CROSS_CORRELATION, type));
155+
return desc_;
156+
}
157+
158+
template <typename T>
159+
inline cudnnConvolutionDescriptor_t descriptor(
160+
const std::vector<int>& pads, const std::vector<int>& strides,
161+
const std::vector<int>& dilations) {
162+
return descriptor(CudnnDataType<T>::type, pads, strides, dilations);
163+
}
164+
165+
private:
166+
cudnnConvolutionDescriptor_t desc_;
167+
DISABLE_COPY_AND_ASSIGN(ScopedConvolutionDescriptor);
168+
};
169+
170+
class ScopedPoolingDescriptor {
171+
public:
172+
ScopedPoolingDescriptor() {
173+
PADDLE_ENFORCE(dynload::cudnnCreatePoolingDescriptor(&desc_));
174+
}
175+
~ScopedPoolingDescriptor() {
176+
PADDLE_ENFORCE(dynload::cudnnDestroyPoolingDescriptor(desc_));
177+
}
178+
179+
inline cudnnPoolingDescriptor_t descriptor(const PoolingMode& mode,
180+
const std::vector<int>& kernel,
181+
const std::vector<int>& pads,
182+
const std::vector<int>& strides) {
183+
PADDLE_ENFORCE_EQ(kernel.size(), pads.size());
184+
PADDLE_ENFORCE_EQ(kernel.size(), strides.size());
185+
PADDLE_ENFORCE(dynload::cudnnSetPoolingNdDescriptor(
186+
desc_, (mode == PoolingMode::kMaximum
187+
? CUDNN_POOLING_MAX
188+
: CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING),
189+
CUDNN_PROPAGATE_NAN, // Always propagate nans.
190+
kernel.size(), kernel.data(), pads.data(), strides.data()));
191+
return desc_;
192+
}
193+
194+
private:
195+
cudnnPoolingDescriptor_t desc_;
196+
DISABLE_COPY_AND_ASSIGN(ScopedPoolingDescriptor);
197+
};
198+
199+
} // namespace platform
200+
} // namespace paddle
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/platform/cudnn_helper.h"
16+
#include <gtest/gtest.h>
17+
18+
TEST(CudnnHelper, ScopedTensorDescriptor) {
19+
using paddle::platform::ScopedTensorDescriptor;
20+
using paddle::platform::DataLayout;
21+
22+
ScopedTensorDescriptor tensor_desc;
23+
std::vector<int> shape = {2, 4, 6, 6};
24+
auto desc = tensor_desc.descriptor<float>(DataLayout::kNCHW, shape);
25+
26+
cudnnDataType_t type;
27+
int nd;
28+
std::vector<int> dims(4);
29+
std::vector<int> strides(4);
30+
paddle::platform::dynload::cudnnGetTensorNdDescriptor(
31+
desc, 4, &type, &nd, dims.data(), strides.data());
32+
33+
EXPECT_EQ(nd, 4);
34+
for (size_t i = 0; i < dims.size(); ++i) {
35+
EXPECT_EQ(dims[i], shape[i]);
36+
}
37+
EXPECT_EQ(strides[3], 1);
38+
EXPECT_EQ(strides[2], 6);
39+
EXPECT_EQ(strides[1], 36);
40+
EXPECT_EQ(strides[0], 144);
41+
}
42+
43+
TEST(CudnnHelper, ScopedFilterDescriptor) {
44+
using paddle::platform::ScopedFilterDescriptor;
45+
using paddle::platform::DataLayout;
46+
47+
ScopedFilterDescriptor filter_desc;
48+
std::vector<int> shape = {2, 3, 3};
49+
auto desc = filter_desc.descriptor<float>(DataLayout::kNCHW, shape);
50+
51+
cudnnDataType_t type;
52+
int nd;
53+
cudnnTensorFormat_t format;
54+
std::vector<int> kernel(3);
55+
paddle::platform::dynload::cudnnGetFilterNdDescriptor(desc, 3, &type, &format,
56+
&nd, kernel.data());
57+
58+
EXPECT_EQ(GetCudnnTensorFormat(DataLayout::kNCHW), format);
59+
EXPECT_EQ(nd, 3);
60+
for (size_t i = 0; i < shape.size(); ++i) {
61+
EXPECT_EQ(kernel[i], shape[i]);
62+
}
63+
}
64+
65+
TEST(CudnnHelper, ScopedConvolutionDescriptor) {
66+
using paddle::platform::ScopedConvolutionDescriptor;
67+
68+
ScopedConvolutionDescriptor conv_desc;
69+
std::vector<int> src_pads = {2, 2, 2};
70+
std::vector<int> src_strides = {1, 1, 1};
71+
std::vector<int> src_dilations = {1, 1, 1};
72+
auto desc = conv_desc.descriptor<float>(src_pads, src_strides, src_dilations);
73+
74+
cudnnDataType_t type;
75+
cudnnConvolutionMode_t mode;
76+
int nd;
77+
std::vector<int> pads(3);
78+
std::vector<int> strides(3);
79+
std::vector<int> dilations(3);
80+
paddle::platform::dynload::cudnnGetConvolutionNdDescriptor(
81+
desc, 3, &nd, pads.data(), strides.data(), dilations.data(), &mode,
82+
&type);
83+
84+
EXPECT_EQ(nd, 3);
85+
for (size_t i = 0; i < src_pads.size(); ++i) {
86+
EXPECT_EQ(pads[i], src_pads[i]);
87+
EXPECT_EQ(strides[i], src_strides[i]);
88+
EXPECT_EQ(dilations[i], src_dilations[i]);
89+
}
90+
EXPECT_EQ(mode, CUDNN_CROSS_CORRELATION);
91+
}
92+
93+
TEST(CudnnHelper, ScopedPoolingDescriptor) {
94+
using paddle::platform::ScopedPoolingDescriptor;
95+
using paddle::platform::PoolingMode;
96+
97+
ScopedPoolingDescriptor pool_desc;
98+
std::vector<int> src_kernel = {2, 2, 5};
99+
std::vector<int> src_pads = {1, 1, 2};
100+
std::vector<int> src_strides = {2, 2, 3};
101+
auto desc = pool_desc.descriptor(PoolingMode::kMaximum, src_kernel, src_pads,
102+
src_strides);
103+
104+
cudnnPoolingMode_t mode;
105+
cudnnNanPropagation_t nan_t = CUDNN_PROPAGATE_NAN;
106+
int nd;
107+
std::vector<int> kernel(3);
108+
std::vector<int> pads(3);
109+
std::vector<int> strides(3);
110+
paddle::platform::dynload::cudnnGetPoolingNdDescriptor(
111+
desc, 3, &mode, &nan_t, &nd, kernel.data(), pads.data(), strides.data());
112+
113+
EXPECT_EQ(nd, 3);
114+
for (size_t i = 0; i < src_pads.size(); ++i) {
115+
EXPECT_EQ(kernel[i], src_kernel[i]);
116+
EXPECT_EQ(pads[i], src_pads[i]);
117+
EXPECT_EQ(strides[i], src_strides[i]);
118+
}
119+
EXPECT_EQ(mode, CUDNN_POOLING_MAX);
120+
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
2-
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc)
2+
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)

paddle/platform/dynload/cudnn.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,27 @@ extern void* cudnn_dso_handle;
6262
#define CUDNN_DNN_ROUTINE_EACH(__macro) \
6363
__macro(cudnnSetTensor4dDescriptor); \
6464
__macro(cudnnSetTensor4dDescriptorEx); \
65+
__macro(cudnnSetTensorNdDescriptor); \
66+
__macro(cudnnGetTensorNdDescriptor); \
6567
__macro(cudnnGetConvolutionNdForwardOutputDim); \
6668
__macro(cudnnGetConvolutionForwardAlgorithm); \
6769
__macro(cudnnCreateTensorDescriptor); \
6870
__macro(cudnnDestroyTensorDescriptor); \
6971
__macro(cudnnCreateFilterDescriptor); \
7072
__macro(cudnnSetFilter4dDescriptor); \
73+
__macro(cudnnSetFilterNdDescriptor); \
74+
__macro(cudnnGetFilterNdDescriptor); \
7175
__macro(cudnnSetPooling2dDescriptor); \
76+
__macro(cudnnSetPoolingNdDescriptor); \
77+
__macro(cudnnGetPoolingNdDescriptor); \
7278
__macro(cudnnDestroyFilterDescriptor); \
7379
__macro(cudnnCreateConvolutionDescriptor); \
7480
__macro(cudnnCreatePoolingDescriptor); \
7581
__macro(cudnnDestroyPoolingDescriptor); \
7682
__macro(cudnnSetConvolution2dDescriptor); \
7783
__macro(cudnnDestroyConvolutionDescriptor); \
84+
__macro(cudnnSetConvolutionNdDescriptor); \
85+
__macro(cudnnGetConvolutionNdDescriptor); \
7886
__macro(cudnnCreate); \
7987
__macro(cudnnDestroy); \
8088
__macro(cudnnSetStream); \

paddle/platform/macros.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
// Disable the copy and assignment operator for a class.
18+
#ifndef DISABLE_COPY_AND_ASSIGN
19+
#define DISABLE_COPY_AND_ASSIGN(classname) \
20+
private: \
21+
classname(const classname&) = delete; \
22+
classname& operator=(const classname&) = delete
23+
#endif

0 commit comments

Comments
 (0)