PaddlePaddle
diff --git a/‎paddle/phi/kernels/cpu/stack_grad_kernel.cc‎
Lines changed: 10 additions & 0 deletions b/‎paddle/phi/kernels/cpu/stack_grad_kernel.cc‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎paddle/phi/kernels/cpu/stack_kernel.cc‎
Lines changed: 12 additions & 4 deletions b/‎paddle/phi/kernels/cpu/stack_kernel.cc‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎paddle/phi/kernels/funcs/stack_and_unstack.h‎
Lines changed: 16 additions & 0 deletions b/‎paddle/phi/kernels/funcs/stack_and_unstack.h‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎test/legacy_test/test_stack_op.py‎
Lines changed: 57 additions & 0 deletions b/‎test/legacy_test/test_stack_op.py‎
Lines changed: 57 additions & 0 deletions
@@ -37,6 +37,16 @@ void StackGradKernel(const Context& dev_ctx,
  }
  }
  auto dy_data = out.data<T>();
+
+ // zero sized tensor case
+ if (out.numel() == 0) {
+ for (int i = 0; i < n; i++) {
+ auto x_grad_dim = x_grad[i]->dims();
+ x_grad[i]->Resize(x_grad_dim);
+ }
+ return;
+ }
+
  int pre = 1;
  for (int i = 0; i < axis; ++i) pre *= static_cast<int>(out.dims()[i]);
  int total_num = static_cast<int>(out.numel());
 
@@ -28,10 +28,18 @@ void StackKernel(const Context& dev_ctx,
 
  auto x_dims = x[0]->dims();
  for (int i = 0; i < x_dims.size(); i++) {
- PADDLE_ENFORCE_GT(x_dims[i],
- 0,
- phi::errors::InvalidArgument(
- "The dims of Input(X) should be greater than 0"));
+ PADDLE_ENFORCE_GE(
+ x_dims[i],
+ 0,
+ phi::errors::InvalidArgument(
+ "The dims of Input(X) should be greater than or equal to 0"));
+ }
+ // zero sized tensor case
+ if (x[0]->numel() == 0) {
+ dev_ctx.template Alloc<T>(out);
+ auto out_dims = out->dims();
+ out->Resize(out_dims);
+ return;
  }
 
  int n = static_cast<int>(x.size());
 
@@ -77,6 +77,13 @@ void StackRawKernel(const Context& ctx,
  if (axis < 0) axis += (x[0]->dims().size() + 1);
  int num = static_cast<int>(x.size());
 
+ // zero sized tensor case
+ if (x[0]->numel() == 0) {
+ ctx.template Alloc<T>(out);
+ auto out_dims = out->dims();
+ out->Resize(out_dims);
+ return;
+ }
  // Split x dim from axis to matrix of shape [x_row, x_col], and the output
  // tensor's shape is [x_row, out_col].
  int64_t x_row = 1, x_row_bak = 1;
@@ -251,6 +258,15 @@ void UnStackRawKernel(const Context& ctx,
  // Input tensor is splited to split_dim tensors along split_dim dimension.
  int64_t split_dim = x_dims[axis];
 
+ // zero sized tensor case
+ if (x.numel() == 0) {
+ for (int i = 0; i < split_dim; i++) {
+ ctx.template Alloc<T>((*outs)[i]);
+ auto x_grad_dim = (*outs)[i]->dims();
+ (*outs)[i]->Resize(x_grad_dim);
+ }
+ return;
+ }
  // Treat outs[i] as [out_row, out_col], and x as [out_row, split_dim,
  // out_col].
  int64_t out_row = 1;
 
@@ -452,5 +452,62 @@ def test_stack_triple_grad(self):
  paddle.base.core.set_prim_eager_enabled(False)
 
 
+class TestStackAPI_ZeroSizedTensor(unittest.TestCase):
+ def test_dygraph(self):
+ places = [base.CPUPlace()]
+ if base.is_compiled_with_cuda():
+ places.append(base.CUDAPlace(0))
+
+ for place in places:
+ with base.dygraph.guard():
+ paddle.disable_static(place)
+
+ x1 = paddle.ones([1, 0])
+ x2 = paddle.ones([1, 0])
+ x1.stop_gradient = False
+ x2.stop_gradient = False
+ out = paddle.stack([x1, x2])
+ out.retain_grads()
+ out.backward()
+
+ np.testing.assert_equal(out.shape, [2, 1, 0])
+ np.testing.assert_equal(x1.grad, None)
+ np.testing.assert_equal(x2.grad, None)
+ np.testing.assert_equal(out, np.ones([2, 1, 0]))
+
+ paddle.enable_static()
+
+ @test_with_pir_api
+ def test_static(self):
+ places = [paddle.CPUPlace()]
+ if base.is_compiled_with_cuda():
+ places.append(paddle.CUDAPlace(0))
+ paddle.enable_static()
+ for place in places:
+ with paddle.static.program_guard(
+ paddle.static.Program(), paddle.static.Program()
+ ):
+ data1 = paddle.static.data(
+ 'data1', shape=[0, 2], dtype='float64'
+ )
+ data2 = paddle.static.data(
+ 'data2', shape=[0, 2], dtype='float64'
+ )
+ data3 = paddle.static.data(
+ 'data3', shape=[0, 2], dtype='float64'
+ )
+ result_stack = paddle.stack([data1, data2, data3], axis=0)
+ exe = base.Executor(place)
+ input1 = np.ones([0, 2]).astype('float64')
+ input2 = np.ones([0, 2]).astype('float64')
+ input3 = np.ones([0, 2]).astype('float64')
+ (result,) = exe.run(
+ feed={"data1": input1, "data2": input2, "data3": input3},
+ fetch_list=[result_stack],
+ )
+ expected_result = np.stack([input1, input2, input3], axis=0)
+ np.testing.assert_equal(expected_result, result)
+
+
 if __name__ == '__main__':
  unittest.main()