@@ -66,8 +66,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
6666 public:
6767 void operator ()(const platform::DeviceContext& context,
6868 const framework::Tensor& im, framework::Tensor& col,
69- int stride_height, int stride_width, int padding_height ,
70- int padding_width ) {
69+ int stride_height, int stride_width, int padding_up ,
70+ int padding_down, int padding_left, int padding_right ) {
7171 PADDLE_ENFORCE (im.dims ().size () == 3 );
7272 PADDLE_ENFORCE (col.dims ().size () == 5 );
7373
@@ -79,6 +79,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
7979 int output_height = col.dims ()[3 ];
8080 int output_width = col.dims ()[4 ];
8181
82+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
83+ stride_height +
84+ 1 ==
85+ output_height);
86+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
87+ stride_width +
88+ 1 ==
89+ output_width);
90+
8291 int num_outputs = input_channels * output_height * output_width;
8392 int blocks = (num_outputs + 1024 - 1 ) / 1024 ;
8493 int block_x = 512 ;
@@ -89,8 +98,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
8998 reinterpret_cast <const platform::CUDADeviceContext&>(context)
9099 .stream()>>> (
91100 im.data <T>(), num_outputs, input_height, input_width, filter_height,
92- filter_width, stride_height, stride_width, padding_height ,
93- padding_width, output_height, output_width, col.data <T>());
101+ filter_width, stride_height, stride_width, padding_up, padding_left ,
102+ output_height, output_width, col.data <T>());
94103 }
95104};
96105
@@ -152,7 +161,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
152161 public:
153162 void operator ()(const platform::DeviceContext& context, framework::Tensor& im,
154163 const framework::Tensor& col, int stride_height,
155- int stride_width, int padding_height, int padding_width) {
164+ int stride_width, int padding_up, int padding_down,
165+ int padding_left, int padding_right) {
156166 PADDLE_ENFORCE (im.dims ().size () == 3 );
157167 PADDLE_ENFORCE (col.dims ().size () == 5 );
158168
@@ -164,8 +174,18 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
164174 int output_height = col.dims ()[3 ];
165175 int output_width = col.dims ()[4 ];
166176
167- size_t num_kernels = input_channels * (input_height + 2 * padding_height) *
168- (input_width + 2 * padding_width);
177+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
178+ stride_height +
179+ 1 ==
180+ output_height);
181+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
182+ stride_width +
183+ 1 ==
184+ output_width);
185+
186+ size_t num_kernels = input_channels *
187+ (input_height + padding_up + padding_down) *
188+ (input_width + padding_left + padding_right);
169189
170190 size_t blocks = (num_kernels + 1024 - 1 ) / 1024 ;
171191 size_t block_x = 512 ;
@@ -178,10 +198,10 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
178198 col2im<T><<<grid, threads, 0 ,
179199 reinterpret_cast <const platform::CUDADeviceContext&>(context)
180200 .stream()>>> (
181- num_kernels, col.data <T>(), input_height + 2 * padding_height ,
182- input_width + 2 * padding_width , input_channels, filter_height ,
183- filter_width, stride_height, stride_width, padding_height ,
184- padding_width , output_height, output_width, im.data <T>());
201+ num_kernels, col.data <T>(), input_height + padding_up + padding_down ,
202+ input_width + padding_left + padding_left , input_channels,
203+ filter_height, filter_width, stride_height, stride_width, padding_up ,
204+ padding_left , output_height, output_width, im.data <T>());
185205 }
186206};
187207
@@ -238,8 +258,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
238258 public:
239259 void operator ()(const platform::DeviceContext& context,
240260 const framework::Tensor& im, framework::Tensor& col,
241- int stride_height, int stride_width, int padding_height ,
242- int padding_width ) {
261+ int stride_height, int stride_width, int padding_up ,
262+ int padding_down, int padding_left, int padding_right ) {
243263 PADDLE_ENFORCE (im.dims ().size () == 3 );
244264 PADDLE_ENFORCE (col.dims ().size () == 5 );
245265 int input_channels = im.dims ()[0 ];
@@ -250,6 +270,15 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
250270 int output_height = col.dims ()[0 ];
251271 int output_width = col.dims ()[1 ];
252272
273+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
274+ stride_height +
275+ 1 ==
276+ output_height);
277+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
278+ stride_width +
279+ 1 ==
280+ output_width);
281+
253282 int block_dim_x = 0 ;
254283 int block_dim_y = 0 ;
255284 if (filter_height <= 4 && filter_width <= 4 ) {
@@ -274,8 +303,8 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
274303 reinterpret_cast <const platform::CUDADeviceContext&>(context)
275304 .stream()>>> (
276305 im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
277- filter_height, filter_width, stride_height, stride_width,
278- padding_height, padding_width , output_height, output_width);
306+ filter_height, filter_width, stride_height, stride_width, padding_up,
307+ padding_left , output_height, output_width);
279308 }
280309};
281310
@@ -322,7 +351,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
322351 public:
323352 void operator ()(const platform::DeviceContext& context, framework::Tensor& im,
324353 const framework::Tensor& col, int stride_height,
325- int stride_width, int padding_height, int padding_width) {
354+ int stride_width, int padding_up, int padding_down,
355+ int padding_left, int padding_right) {
326356 PADDLE_ENFORCE (im.dims ().size () == 3 );
327357 PADDLE_ENFORCE (col.dims ().size () == 5 );
328358 int input_channels = im.dims ()[0 ];
@@ -333,6 +363,15 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
333363 int output_height = col.dims ()[0 ];
334364 int output_width = col.dims ()[1 ];
335365
366+ PADDLE_ENFORCE ((input_height + padding_up + padding_down - filter_height) /
367+ stride_height +
368+ 1 ==
369+ output_height);
370+ PADDLE_ENFORCE ((input_width + padding_left + padding_right - filter_width) /
371+ stride_width +
372+ 1 ==
373+ output_width);
374+
336375 int block_dim_x = 0 ;
337376 int block_dim_y = 0 ;
338377 if (filter_height <= 4 && filter_width <= 4 ) {
@@ -357,8 +396,8 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
357396 reinterpret_cast <const platform::CUDADeviceContext&>(context)
358397 .stream()>>> (
359398 im.data <T>(), col.data <T>(), input_channels, input_height, input_width,
360- filter_height, filter_width, stride_height, stride_width,
361- padding_height, padding_width , output_height, output_width);
399+ filter_height, filter_width, stride_height, stride_width, padding_up,
400+ padding_left , output_height, output_width);
362401 }
363402};
364403
0 commit comments