PaddlePaddle
diff --git a/‎paddle/capi/Matrix.cpp‎
Lines changed: 2 additions & 2 deletions b/‎paddle/capi/Matrix.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎paddle/capi/matrix.h‎
Lines changed: 4 additions & 4 deletions b/‎paddle/capi/matrix.h‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎paddle/framework/tensor_util.h‎
Lines changed: 4 additions & 5 deletions b/‎paddle/framework/tensor_util.h‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎paddle/operators/math/maxouting.cc‎
Lines changed: 13 additions & 18 deletions b/‎paddle/operators/math/maxouting.cc‎
Lines changed: 13 additions & 18 deletions
diff --git a/‎paddle/operators/math/maxouting.cu‎
Lines changed: 39 additions & 41 deletions b/‎paddle/operators/math/maxouting.cu‎
Lines changed: 39 additions & 41 deletions
diff --git a/‎paddle/operators/math/maxouting.h‎
Lines changed: 3 additions & 5 deletions b/‎paddle/operators/math/maxouting.h‎
Lines changed: 3 additions & 5 deletions
diff --git a/‎paddle/operators/maxout_op.cc‎
Lines changed: 18 additions & 20 deletions b/‎paddle/operators/maxout_op.cc‎
Lines changed: 18 additions & 20 deletions
diff --git a/‎paddle/operators/maxout_op.cu.cc‎
Lines changed: 3 additions & 5 deletions b/‎paddle/operators/maxout_op.cu.cc‎
Lines changed: 3 additions & 5 deletions
@@ -55,7 +55,7 @@ paddle_error paddle_matrix_set_row(paddle_matrix mat,
 }
 
 PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
- paddle_real* value) {
+  paddle_real* value) {
  if (mat == nullptr || value == nullptr) return kPD_NULLPTR;
  auto ptr = cast(mat);
  if (ptr->mat == nullptr) return kPD_NULLPTR;
@@ -75,7 +75,7 @@ PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
 }
 
 PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
- paddle_real* result) {
+  paddle_real* result) {
  if (mat == nullptr || result == nullptr) return kPD_NULLPTR;
  auto ptr = cast(mat);
  if (ptr->mat == nullptr) return kPD_NULLPTR;
 
@@ -79,7 +79,7 @@ PD_API paddle_error paddle_matrix_set_row(paddle_matrix mat,
  * @note value should contain enough element of data to init the mat
  */
 PD_API paddle_error paddle_matrix_set_value(paddle_matrix mat,
- paddle_real* value);
+  paddle_real* value);
 
 /**
  * @brief PDMatGetRow Get raw row buffer from matrix
@@ -93,14 +93,14 @@ PD_API paddle_error paddle_matrix_get_row(paddle_matrix mat,
  paddle_real** rawRowBuffer);
 
 /**
- * @brief copy data from the matrix 
+ * @brief copy data from the matrix
  * @param [in] mat Target matrix
- * @param [out] result pointer to store the matrix data 
+ * @param [out] result pointer to store the matrix data
  * @return paddle_error
  * @note the space of the result should allocated before invoke this API
  */
 PD_API paddle_error paddle_matrix_get_value(paddle_matrix mat,
- paddle_real* result);
+  paddle_real* result);
 /**
  * @brief PDMatCreateNone Create None Matrix
  * @return
 
@@ -135,18 +135,17 @@ inline void CopyToVector(const Tensor& src, const platform::DeviceContext& ctx,
  auto dst_ptr = static_cast<void*>(dst->data());
 
  if (platform::is_cpu_place(src.place())) {
- memory::Copy(dst_place, dst_ptr, boost::get<platform::CPUPlace>(src.place()), 
- src_ptr, size);
+ memory::Copy(dst_place, dst_ptr,
+  boost::get<platform::CPUPlace>(src.place()), src_ptr, size);
  }
 #ifdef PADDLE_WITH_CUDA
  else if (platform::is_gpu_place(src.place())) { // NOLINT
  memory::Copy(
- dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()), src_ptr,
- size,
+ dst_place, dst_ptr, boost::get<platform::GPUPlace>(src.place()),
+ src_ptr, size,
  reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream());
  }
 #endif
-
 }
 
 } // namespace framework
 
@@ -23,8 +23,7 @@ template <typename T>
 class MaxOutFunctor<platform::CPUPlace, T> {
  public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input,
- framework::Tensor * output,
+ const framework::Tensor& input, framework::Tensor* output,
  int groups) {
  const int batch_size = input.dims()[0];
  const int input_height = input.dims()[2];
@@ -37,34 +36,30 @@ class MaxOutFunctor<platform::CPUPlace, T> {
  T* output_data = output->mutable_data<T>(context.GetPlace());
 
  for (int i = 0; i < batch_size; ++i) {
- int new_bindex =  c_size * i;
+ int new_bindex = c_size * i;
  for (int c = 0; c < output_channels; ++c) {
  int new_cindex = fea_size * c;
  for (int f = 0; f < fea_size; ++f) {
  T ele = static_cast<T>(-FLT_MAX);
  for (int ph = 0; ph < groups; ++ph) {
- T x = input_data[(new_bindex + new_cindex) * groups
- + ph * fea_size + f];
+ T x = input_data[(new_bindex + new_cindex) * groups +
+   ph * fea_size + f];
  ele = ele > x ? ele : x;
  }
- output_data[(new_bindex+new_cindex+f)] = ele;
+ output_data[(new_bindex + new_cindex + f)] = ele;
  }
  }
  }
  }
 };
 
-
-
 template <class T>
 class MaxOutGradFunctor<platform::CPUPlace, T> {
-public:
+ public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input,
- framework::Tensor * input_grad,
+ const framework::Tensor& input, framework::Tensor* input_grad,
  const framework::Tensor& output,
- const framework::Tensor& output_grad,
- int groups) {
+ const framework::Tensor& output_grad, int groups) {
  const int batch_size = input.dims()[0];
  const int input_height = input.dims()[2];
  const int input_width = input.dims()[3];
@@ -84,11 +79,11 @@ class MaxOutGradFunctor<platform::CPUPlace, T> {
  bool continue_match = true;
  int output_idx = blen + clen + f;
  for (int g = 0; g < groups && continue_match; ++g) {
-  int input_idx = input_idx0 + fea_size * g;
-  if (input_data[input_idx] == output_data[output_idx]) {
-  input_grad_data[input_idx] += output_grad_data[output_idx];
-  continue_match = false;
-  }
+ int input_idx = input_idx0 + fea_size * g;
+ if (input_data[input_idx] == output_data[output_idx]) {
+ input_grad_data[input_idx] += output_grad_data[output_idx];
+ continue_match = false;
+ }
  }
  }
  }
 
@@ -21,9 +21,9 @@ namespace math {
 
 template <typename T>
 __global__ void KernelMaxOut(const int nthreads, const T* input_data,
- const int channels,
- const int input_height, const int input_width,
- int groups, T* output_data ) {
+  const int channels, const int input_height,
+ const int input_width, int groups,
+ T* output_data) {
  const int size = input_height * input_width * channels / groups;
  const int feat_len = input_height * input_width;
  int index = blockIdx.x * blockDim.x + threadIdx.x;
@@ -34,7 +34,7 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
  int channel_idx = batch_offset / feat_len;
  int feat_idx = batch_offset % feat_len;
  int data_idx =
- (batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
+  (batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
  T ele = static_cast<T>(-FLT_MAX);
  for (int g = 0; g < groups; ++g) {
  T x = input_data[data_idx + g * feat_len];
@@ -44,34 +44,35 @@ __global__ void KernelMaxOut(const int nthreads, const T* input_data,
  }
 }
 template <typename T>
-__global__ void KernelMaxoutGrad(
- const int nthreads, const T* input_data, const T* output_data,
- const T* output_grad, T* input_grad, const int channels,
- const int input_height, const int input_width, int groups) {
- const int size = input_height * input_width * channels / groups;
- const int feat_len = input_height * input_width;
- int index = blockIdx.x * blockDim.x + threadIdx.x;
- int offset = blockDim.x * gridDim.x;
- for (int i = index; i < nthreads; i += offset) {
- int batch_idx = i / size;
- int batch_offset = i % size;
- int channel_idx = batch_offset / feat_len;
- int feat_idx = batch_offset % feat_len;
- int data_idx =
+__global__ void KernelMaxoutGrad(const int nthreads, const T* input_data,
+ const T* output_data, const T* output_grad,
+ T* input_grad, const int channels,
+ const int input_height, const int input_width,
+ int groups) {
+ const int size = input_height * input_width * channels / groups;
+ const int feat_len = input_height * input_width;
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ int offset = blockDim.x * gridDim.x;
+ for (int i = index; i < nthreads; i += offset) {
+ int batch_idx = i / size;
+ int batch_offset = i % size;
+ int channel_idx = batch_offset / feat_len;
+ int feat_idx = batch_offset % feat_len;
+ int data_idx =
  (batch_idx * size + channel_idx * feat_len) * groups + feat_idx;
- int max_index = -1;
- bool continue_match = true;
- for (int g = 0; g < groups && continue_match; ++g) {
- if (input_data[data_idx + g * feat_len] == output_data[i]) {
- max_index = data_idx + g * feat_len;
- continue_match = false;
- break;
- }
- }
- if (max_index != -1) {
- input_grad[max_index] += output_grad[index];
+ int max_index = -1;
+ bool continue_match = true;
+ for (int g = 0; g < groups && continue_match; ++g) {
+ if (input_data[data_idx + g * feat_len] == output_data[i]) {
+ max_index = data_idx + g * feat_len;
+ continue_match = false;
+ break;
  }
  }
+ if (max_index != -1) {
+ input_grad[max_index] += output_grad[index];
+ }
+ }
 }
 /*
  * All tensors are in NCHW format.
@@ -80,7 +81,7 @@ template <typename T>
 class MaxOutFunctor<platform::GPUPlace, T> {
  public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input, framework::Tensor * output,
+ const framework::Tensor& input, framework::Tensor* output,
  int groups) {
  const int batch_size = input.dims()[0];
  const int input_channels = input.dims()[1];
@@ -92,7 +93,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {
 
  const T* input_data = input.data<T>();
  T* output_data = output->mutable_data<T>(context.GetPlace());
- int nthreads =  output->numel();
+ int nthreads = output->numel();
  int blocks = (nthreads + 1024 - 1) / 1024;
  dim3 threads(1024, 1);
  dim3 grid(blocks, 1);
@@ -101,8 +102,7 @@ class MaxOutFunctor<platform::GPUPlace, T> {
  T><<<grid, threads, 0,
  reinterpret_cast<const platform::CUDADeviceContext&>(context)
  .stream()>>>(nthreads, input_data, input_channels,
- input_height, input_width, groups,
- output_data);
+ input_height, input_width, groups, output_data);
  }
 };
 /*
@@ -112,11 +112,9 @@ template <typename T>
 class MaxOutGradFunctor<platform::GPUPlace, T> {
  public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input,
- framework::Tensor * input_grad,
+ const framework::Tensor& input, framework::Tensor* input_grad,
  const framework::Tensor& output,
- const framework::Tensor& output_grad,
- int groups) {
+ const framework::Tensor& output_grad, int groups) {
  const int batch_size = input.dims()[0];
  const int input_channels = input.dims()[1];
  const int input_height = input.dims()[2];
@@ -129,17 +127,17 @@ class MaxOutGradFunctor<platform::GPUPlace, T> {
  const T* output_data = output.data<T>();
  const T* output_grad_data = output_grad.data<T>();
  T* input_grad_data = input_grad->mutable_data<T>(context.GetPlace());
- int nthreads =  output.numel();
+ int nthreads = output.numel();
  int blocks = (nthreads + 1024 - 1) / 1024;
  dim3 threads(1024, 1);
  dim3 grid(blocks, 1);
 
  KernelMaxoutGrad<
  T><<<grid, threads, 0,
  reinterpret_cast<const platform::CUDADeviceContext&>(context)
- .stream()>>>(
- nthreads, input_data, output_data, output_grad_data, input_grad_data,
- input_channels, input_height, input_width, groups);
+ .stream()>>>(nthreads, input_data, output_data,
+  output_grad_data, input_grad_data, input_channels,
+   input_height, input_width, groups);
  }
 };
 
 
@@ -21,24 +21,22 @@ namespace paddle {
 namespace operators {
 namespace math {
 
-#define FLT_MAX \
- __FLT_MAX__
+#define FLT_MAX __FLT_MAX__
 
 template <typename Place, typename T>
 
 class MaxOutFunctor {
  public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input, framework::Tensor * output,
+ const framework::Tensor& input, framework::Tensor* output,
  int groups);
 };
 
 template <typename Place, class T>
 class MaxOutGradFunctor {
  public:
  void operator()(const platform::DeviceContext& context,
- const framework::Tensor& input,
- framework::Tensor * input_grad,
+ const framework::Tensor& input, framework::Tensor* input_grad,
  const framework::Tensor& output,
  const framework::Tensor& output_grad, int groups);
 };
 
@@ -22,16 +22,17 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
  MaxOutOpMaker(framework::OpProto* proto, framework::OpAttrChecker* op_checker)
  : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X",
+ AddInput(
+ "X",
  "(Tensor) The input tensor of maxout operator. "
  "The format of input tensor is NCHW. Where N is batch size, C is the "
  "number of channels, H and W is the height and width of feature.");
  AddOutput("Out",
- "(Tensor) The output tensor of maxout operator."
- "The format of output tensor is also NCHW."
- "Where N is batch size, C is "
- "the number of channels, H and W is the height and "
- "width of feature.");
+  "(Tensor) The output tensor of maxout operator."
+  "The format of output tensor is also NCHW."
+  "Where N is batch size, C is "
+  "the number of channels, H and W is the height and "
+  "width of feature.");
  AddAttr<int>(
  "groups",
  R"DOC("Specifies how many groups the input tensor will be split"
@@ -59,21 +60,19 @@ class MaxOutOpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
 
-
 class MaxOutOp : public framework::OperatorWithKernel {
  public:
  using framework::OperatorWithKernel::OperatorWithKernel;
  void InferShape(framework::InferShapeContext* ctx) const override {
- PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) of MaxoutOp"
+ PADDLE_ENFORCE(ctx->HasInput("X"),
+ "Input(X) of MaxoutOp"
  "should not be null.");
  PADDLE_ENFORCE(ctx->HasOutput("Out"),
  "Output(Out) of MaxoutOp should not be null.");
  auto in_x_dims = ctx->GetInputDim("X");
  int groups = ctx->Attrs().Get<int>("groups");
  // check groups > 1
- PADDLE_ENFORCE_GT(
- groups, 1,
- "groups should be larger than 1 in maxoutop");
+ PADDLE_ENFORCE_GT(groups, 1, "groups should be larger than 1 in maxoutop");
  std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1] / groups});
  output_shape.push_back(in_x_dims[2]);
  output_shape.push_back(in_x_dims[3]);
@@ -87,18 +86,17 @@ class MaxOutOpGrad : public framework::OperatorWithKernel {
  void InferShape(framework::InferShapeContext* ctx) const override {
  PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) must not be null.");
  PADDLE_ENFORCE(ctx->HasOutput(framework::GradVarName("X")),
- "Input(X@GRAD) should not be null.");
+  "Input(X@GRAD) should not be null.");
  ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
  }
 };
-}  // namespace operators
-}  // namespace paddle
+} // namespace operators
+} // namespace paddle
 
 namespace ops = paddle::operators;
 REGISTER_OP(maxout, ops::MaxOutOp, ops::MaxOutOpMaker, maxout_grad,
- ops::MaxOutOpGrad);
-REGISTER_OP_CPU_KERNEL(maxout, ops::MaxOutKernel<paddle::platform::CPUPlace,
- float>);
-REGISTER_OP_CPU_KERNEL(maxout_grad,
- ops::MaxOutGradKernel<paddle::platform::CPUPlace,
- float>);
+ ops::MaxOutOpGrad);
+REGISTER_OP_CPU_KERNEL(maxout,
+ ops::MaxOutKernel<paddle::platform::CPUPlace, float>);
+REGISTER_OP_CPU_KERNEL(
+ maxout_grad, ops::MaxOutGradKernel<paddle::platform::CPUPlace, float>);
@@ -18,8 +18,6 @@ namespace ops = paddle::operators;
 REGISTER_OP_GPU_KERNEL(maxout,
  ops::MaxOutKernel<paddle::platform::GPUPlace, float>,
  ops::MaxOutKernel<paddle::platform::GPUPlace, double>);
-REGISTER_OP_GPU_KERNEL(maxout_grad,
- ops::MaxOutGradKernel<paddle::platform::GPUPlace,
- float>,
- ops::MaxOutGradKernel<paddle::platform::GPUPlace,
- double>);
+REGISTER_OP_GPU_KERNEL(
+ maxout_grad, ops::MaxOutGradKernel<paddle::platform::GPUPlace, float>,
+ ops::MaxOutGradKernel<paddle::platform::GPUPlace, double>);