Skip to content

Commit c7f91a9

Browse files
authored
Merge pull request #3817 from xinghai-sun/dropout
Add dropout operator.
2 parents f86c1cc + ffeeef8 commit c7f91a9

File tree

4 files changed

+344
-0
lines changed

4 files changed

+344
-0
lines changed

paddle/operators/dropout_op.cc

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/operators/dropout_op.h"
16+
17+
namespace paddle {
18+
namespace operators {
19+
20+
using framework::Tensor;
21+
using framework::LoDTensor;
22+
23+
class DropoutOp : public framework::OperatorWithKernel {
24+
public:
25+
using framework::OperatorWithKernel::OperatorWithKernel;
26+
27+
protected:
28+
void InferShape(const framework::InferShapeContext &ctx) const override {
29+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
30+
PADDLE_ENFORCE_GE(ctx.Attr<float>("dropout_prob"), 0);
31+
PADDLE_ENFORCE_LE(ctx.Attr<float>("dropout_prob"), 1);
32+
// TODO(xinghai-sun): remove this check after swtiching to bool
33+
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
34+
ctx.Attr<int>("is_training") == 1);
35+
36+
auto dims = ctx.Input<Tensor>("X")->dims();
37+
ctx.Output<LoDTensor>("Out")->Resize(dims);
38+
if (ctx.Attr<int>("is_training") == 1) {
39+
ctx.Output<LoDTensor>("Mask")->Resize(dims);
40+
}
41+
}
42+
};
43+
44+
template <typename AttrType>
45+
class DropoutOpMaker : public framework::OpProtoAndCheckerMaker {
46+
public:
47+
DropoutOpMaker(framework::OpProto *proto,
48+
framework::OpAttrChecker *op_checker)
49+
: OpProtoAndCheckerMaker(proto, op_checker) {
50+
AddAttr<AttrType>("dropout_prob", "Probability of setting units to zero.")
51+
.SetDefault(.5f);
52+
// TODO(xinghai-sun): use bool for is_training after bool is supported.
53+
AddAttr<int>("is_training", "Whether in training phase.").SetDefault(1);
54+
AddAttr<int>("seed", "Dropout random seed.").SetDefault(0);
55+
AddInput("X", "The input of dropout op.");
56+
AddOutput("Out", "The output of dropout op.");
57+
AddOutput("Mask", "The random sampled dropout mask.").AsIntermediate();
58+
59+
AddComment(R"DOC(
60+
Dropout Operator.
61+
62+
"Dropout" refers to randomly dropping out units in a nerual network. It is a
63+
regularization technique for reducing overfitting by preventing neuron
64+
co-adaption during training. The dropout operator randomly set (according to
65+
the given dropout probability) the outputs of some units to zero, while others
66+
being set to their inputs.
67+
)DOC");
68+
}
69+
};
70+
71+
template <typename AttrType>
72+
class DropoutOpGrad : public framework::OperatorWithKernel {
73+
public:
74+
using framework::OperatorWithKernel::OperatorWithKernel;
75+
76+
protected:
77+
void InferShape(const framework::InferShapeContext &ctx) const override {
78+
PADDLE_ENFORCE_EQ(ctx.Attr<int>("is_training"), 1,
79+
"GradOp is only callable when is_training is true");
80+
81+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null.");
82+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Mask"), "Mask must not be null.");
83+
PADDLE_ENFORCE_NOT_NULL(ctx.InputVar(framework::GradVarName("Out")),
84+
"Input(Out@GRAD) must not be null.");
85+
86+
PADDLE_ENFORCE_GE(ctx.Attr<AttrType>("dropout_prob"), 0);
87+
PADDLE_ENFORCE_LE(ctx.Attr<AttrType>("dropout_prob"), 1);
88+
// TODO(xinghai-sun): remove this check after swtiching to bool
89+
PADDLE_ENFORCE(ctx.Attr<int>("is_training") == 0 ||
90+
ctx.Attr<int>("is_training") == 1);
91+
auto x_dims = ctx.Input<Tensor>("X")->dims();
92+
auto out_dims = ctx.Input<Tensor>(framework::GradVarName("Out"))->dims();
93+
PADDLE_ENFORCE_EQ(x_dims, out_dims,
94+
"Dimensions of Input(X) and Out@Grad must be the same.");
95+
auto mask_dims = ctx.Input<Tensor>("Mask")->dims();
96+
PADDLE_ENFORCE_EQ(x_dims, mask_dims,
97+
"Dimensions of Input(X) and Mask must be the same.");
98+
99+
auto *x_grad = ctx.Output<LoDTensor>(framework::GradVarName("X"));
100+
x_grad->Resize(x_dims);
101+
}
102+
};
103+
104+
} // namespace operators
105+
} // namespace paddle
106+
107+
namespace ops = paddle::operators;
108+
REGISTER_OP(dropout, ops::DropoutOp, ops::DropoutOpMaker<float>, dropout_grad,
109+
ops::DropoutOpGrad<float>);
110+
REGISTER_OP_CPU_KERNEL(
111+
dropout, ops::CPUDropoutKernel<paddle::platform::CPUPlace, float, float>);
112+
REGISTER_OP_CPU_KERNEL(
113+
dropout_grad, ops::DropoutGradKernel<paddle::platform::CPUPlace, float>);

paddle/operators/dropout_op.cu

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#define EIGEN_USE_GPU
16+
#include <thrust/device_ptr.h>
17+
#include <thrust/iterator/counting_iterator.h>
18+
#include <thrust/random.h>
19+
#include <thrust/transform.h>
20+
#include "paddle/operators/dropout_op.h"
21+
22+
namespace paddle {
23+
namespace operators {
24+
25+
template <typename T, typename AttrType>
26+
struct MaskGenerator {
27+
AttrType dropout_prob;
28+
int seed;
29+
30+
__host__ __device__ MaskGenerator(AttrType dropout_prob, int seed)
31+
: dropout_prob(dropout_prob), seed(seed) {}
32+
33+
__host__ __device__ T operator()(const unsigned int n) const {
34+
thrust::minstd_rand rng;
35+
rng.seed(seed);
36+
thrust::uniform_real_distribution<AttrType> dist(0, 1);
37+
rng.discard(n);
38+
if (dist(rng) < dropout_prob) {
39+
return static_cast<T>(0);
40+
} else {
41+
return static_cast<T>(1);
42+
}
43+
}
44+
};
45+
46+
// It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT.
47+
// Use std::random and thrust::random(thrust is a std library in CUDA) to
48+
// implement uniform random.
49+
template <typename Place, typename T, typename AttrType>
50+
class GPUDropoutKernel : public framework::OpKernel {
51+
public:
52+
void Compute(const framework::ExecutionContext& context) const override {
53+
auto* x = context.Input<Tensor>("X");
54+
auto* y = context.Output<Tensor>("Out");
55+
y->mutable_data<T>(context.GetPlace());
56+
AttrType dropout_prob = context.Attr<AttrType>("dropout_prob");
57+
58+
auto X = EigenMatrix<T>::Reshape(*x, 1);
59+
auto Y = EigenMatrix<T>::Reshape(*y, 1);
60+
61+
auto place = context.GetEigenDevice<Place>();
62+
if (context.Attr<int>("is_training") == 1) {
63+
auto* mask = context.Output<Tensor>("Mask");
64+
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
65+
int size = framework::product(mask->dims());
66+
int seed = context.Attr<int>("seed");
67+
thrust::counting_iterator<unsigned int> index_sequence_begin(0);
68+
thrust::transform(index_sequence_begin, index_sequence_begin + size,
69+
thrust::device_ptr<T>(mask_data),
70+
MaskGenerator<T, AttrType>(dropout_prob, seed));
71+
auto M = EigenMatrix<T>::Reshape(*mask, 1);
72+
Y.device(place) = X * M;
73+
} else {
74+
Y.device(place) = X * dropout_prob;
75+
}
76+
}
77+
};
78+
79+
} // namespace operators
80+
} // namespace paddle
81+
82+
namespace ops = paddle::operators;
83+
REGISTER_OP_GPU_KERNEL(
84+
dropout, ops::GPUDropoutKernel<paddle::platform::GPUPlace, float, float>);
85+
REGISTER_OP_GPU_KERNEL(
86+
dropout_grad, ops::DropoutGradKernel<paddle::platform::GPUPlace, float>);

paddle/operators/dropout_op.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
#include <random>
17+
#include "paddle/framework/eigen.h"
18+
#include "paddle/framework/op_registry.h"
19+
20+
namespace paddle {
21+
namespace operators {
22+
23+
using Tensor = framework::Tensor;
24+
template <typename T, int MajorType = Eigen::RowMajor,
25+
typename IndexType = Eigen::DenseIndex>
26+
using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
27+
28+
template <typename Place, typename T, typename AttrType>
29+
class CPUDropoutKernel : public framework::OpKernel {
30+
public:
31+
void Compute(const framework::ExecutionContext& context) const override {
32+
auto* x = context.Input<Tensor>("X");
33+
auto* y = context.Output<Tensor>("Out");
34+
const auto* x_data = x->data<T>();
35+
auto* y_data = y->mutable_data<T>(context.GetPlace());
36+
AttrType dropout_prob = context.Attr<AttrType>("dropout_prob");
37+
38+
if (context.Attr<int>("is_training") == 1) {
39+
auto* mask = context.Output<Tensor>("Mask");
40+
auto* mask_data = mask->mutable_data<T>(context.GetPlace());
41+
int seed = context.Attr<int>("seed");
42+
std::minstd_rand engine;
43+
engine.seed(seed);
44+
std::uniform_real_distribution<AttrType> dist(0, 1);
45+
size_t size = framework::product(mask->dims());
46+
for (size_t i = 0; i < size; ++i) {
47+
if (dist(engine) < dropout_prob) {
48+
mask_data[i] = 0;
49+
y_data[i] = 0;
50+
} else {
51+
mask_data[i] = 1;
52+
y_data[i] = x_data[i];
53+
}
54+
}
55+
} else {
56+
auto X = EigenMatrix<T>::Reshape(*x, 1);
57+
auto Y = EigenMatrix<T>::Reshape(*y, 1);
58+
auto place = context.GetEigenDevice<Place>();
59+
Y.device(place) = X * dropout_prob;
60+
}
61+
}
62+
};
63+
64+
template <typename Place, typename T>
65+
class DropoutGradKernel : public framework::OpKernel {
66+
public:
67+
void Compute(const framework::ExecutionContext& context) const override {
68+
PADDLE_ENFORCE_EQ(context.Attr<int>("is_training"), 1,
69+
"GradOp is only callable when is_training is true");
70+
71+
auto* grad_x = context.Output<Tensor>(framework::GradVarName("X"));
72+
auto* grad_y = context.Input<Tensor>(framework::GradVarName("Out"));
73+
auto* mask = context.Input<Tensor>("Mask");
74+
grad_x->mutable_data<T>(context.GetPlace());
75+
76+
auto M = EigenMatrix<T>::Reshape(*mask, 1);
77+
auto dX = EigenMatrix<T>::Reshape(*grad_x, 1);
78+
auto dY = EigenMatrix<T>::Reshape(*grad_y, 1);
79+
80+
auto place = context.GetEigenDevice<Place>();
81+
dX.device(place) = dY * M;
82+
}
83+
};
84+
85+
} // namespace operators
86+
} // namespace paddle
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import unittest
2+
import numpy as np
3+
from op_test import OpTest
4+
5+
6+
class TestDropoutOp(OpTest):
7+
def setUp(self):
8+
self.op_type = "dropout"
9+
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
10+
self.attrs = {'dropout_prob': 0.0, 'is_training': 1}
11+
self.outputs = {'Out': self.inputs['X'], 'Mask': np.ones((32, 64))}
12+
13+
def test_check_output(self):
14+
self.check_output()
15+
16+
def test_check_grad_normal(self):
17+
self.check_grad(['X'], 'Out', max_relative_error=0.05)
18+
19+
20+
class TestDropoutOp2(TestDropoutOp):
21+
def setUp(self):
22+
self.op_type = "dropout"
23+
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
24+
self.attrs = {'dropout_prob': 1.0, 'is_training': 1}
25+
self.outputs = {'Out': np.zeros((32, 64)), 'Mask': np.zeros((32, 64))}
26+
27+
28+
class TestDropoutOp3(TestDropoutOp):
29+
def setUp(self):
30+
self.op_type = "dropout"
31+
self.inputs = {'X': np.random.random((32, 64, 2)).astype("float32")}
32+
self.attrs = {'dropout_prob': 0.0, 'is_training': 1}
33+
self.outputs = {'Out': self.inputs['X'], 'Mask': np.ones((32, 64, 2))}
34+
35+
36+
class TestDropoutOp4(OpTest):
37+
def setUp(self):
38+
self.op_type = "dropout"
39+
self.inputs = {'X': np.random.random((32, 64)).astype("float32")}
40+
self.attrs = {'dropout_prob': 0.35, 'is_training': 0}
41+
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
42+
43+
def test_check_output(self):
44+
self.check_output()
45+
46+
47+
class TestDropoutOp5(OpTest):
48+
def setUp(self):
49+
self.op_type = "dropout"
50+
self.inputs = {'X': np.random.random((32, 64, 3)).astype("float32")}
51+
self.attrs = {'dropout_prob': 0.75, 'is_training': 0}
52+
self.outputs = {'Out': self.inputs['X'] * self.attrs['dropout_prob']}
53+
54+
def test_check_output(self):
55+
self.check_output()
56+
57+
58+
if __name__ == '__main__':
59+
unittest.main()

0 commit comments

Comments
 (0)