PaddlePaddle
diff --git a/‎paddle/phi/infermeta/backward.cc‎
Lines changed: 11 additions & 2 deletions b/‎paddle/phi/infermeta/backward.cc‎
Lines changed: 11 additions & 2 deletions
diff --git a/‎paddle/phi/infermeta/backward.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/infermeta/backward.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/instance_norm.cc‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/instance_norm.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/infermeta/spmd_rules/instance_norm.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/phi/infermeta/spmd_rules/instance_norm.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/phi/infermeta/ternary.cc‎
Lines changed: 19 additions & 21 deletions b/‎paddle/phi/infermeta/ternary.cc‎
Lines changed: 19 additions & 21 deletions
diff --git a/‎paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc‎
Lines changed: 13 additions & 3 deletions b/‎paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎paddle/phi/kernels/cpu/instance_norm_kernel.cc‎
Lines changed: 16 additions & 1 deletion b/‎paddle/phi/kernels/cpu/instance_norm_kernel.cc‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu‎
Lines changed: 14 additions & 2 deletions b/‎paddle/phi/kernels/gpu/instance_norm_grad_kernel.cu‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎paddle/phi/kernels/gpu/instance_norm_kernel.cu‎
Lines changed: 14 additions & 1 deletion b/‎paddle/phi/kernels/gpu/instance_norm_kernel.cu‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎paddle/phi/kernels/xpu/instance_norm_grad_kernel.cc‎
Lines changed: 18 additions & 0 deletions b/‎paddle/phi/kernels/xpu/instance_norm_grad_kernel.cc‎
Lines changed: 18 additions & 0 deletions
@@ -921,6 +921,7 @@ void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
 
 void InstanceNormGradInferMeta(const MetaTensor& x,
  const MetaTensor& scale,
+ const MetaTensor& bias,
  const MetaTensor& saved_mean,
  const MetaTensor& saved_variance,
  const MetaTensor& y_grad,
@@ -939,10 +940,18 @@ void InstanceNormGradInferMeta(const MetaTensor& x,
  x_grad->set_dtype(x.dtype());
  x_grad->set_layout(x.layout());
  if (scale_grad) {
- scale_grad->set_dims({C});
+ if (C == 0) {
+ scale_grad->set_dims({scale.dims()[0]});
+ } else {
+ scale_grad->set_dims({C});
+ }
  }
  if (bias_grad) {
- bias_grad->set_dims({C});
+ if (C == 0) {
+ bias_grad->set_dims({bias.dims()[0]});
+ } else {
+ bias_grad->set_dims({C});
+ }
  }
 }
 void InstanceNormDoubleGradInferMeta(const MetaTensor& x,
 
@@ -345,6 +345,7 @@ void GumbelSoftmaxGradInferMeta(const MetaTensor& out,
 
 void InstanceNormGradInferMeta(const MetaTensor& x,
  const MetaTensor& scale,
+ const MetaTensor& bias,
  const MetaTensor& saved_mean,
  const MetaTensor& saved_variance,
  const MetaTensor& y_grad,
 
@@ -131,6 +131,7 @@ SpmdInfo InstanceNormInferSpmd(const DistMetaTensor& x,
 
 SpmdInfo InstanceNormGradInferSpmd(const DistMetaTensor& x,
  const DistMetaTensor& scale,
+ const DistMetaTensor& bias UNUSED,
  const DistMetaTensor& saved_mean,
  const DistMetaTensor& saved_variance,
  const DistMetaTensor& y_grad,
 
@@ -27,6 +27,7 @@ SpmdInfo InstanceNormInferSpmd(const DistMetaTensor& x,
 
 SpmdInfo InstanceNormGradInferSpmd(const DistMetaTensor& x,
  const DistMetaTensor& scale,
+ const DistMetaTensor& bias,
  const DistMetaTensor& saved_mean,
  const DistMetaTensor& saved_variance,
  const DistMetaTensor& y_grad,
 
@@ -826,13 +826,6 @@ void InstanceNormInferMeta(const MetaTensor& x,
  common::errors::InvalidArgument(
  "The y in InstanceNormInferMeta can't be nullptr."));
  const auto x_dims = x.dims();
- PADDLE_ENFORCE_NE(common::product(x_dims),
- 0,
- common::errors::PreconditionNotMet(
- "The Input variable X has not "
- "been initialized. You may need to confirm "
- "if you put exe.run(startup_program) "
- "after optimizer.minimize function."));
  PADDLE_ENFORCE_GE(
  x_dims.size(),
  2,
@@ -867,13 +860,16 @@ void InstanceNormInferMeta(const MetaTensor& x,
  scale_dim.size()));
  bool check = config.is_runtime || contain_unknown_dim(scale_dim);
  if (check) {
- PADDLE_ENFORCE_EQ(scale_dim[0],
- C,
- common::errors::InvalidArgument(
- "ShapeError: the shape of scale must equal to [%d]"
- "But received: the shape of scale is [%d]",
- C,
- scale_dim[0]));
+ if (C != 0) {
+ PADDLE_ENFORCE_EQ(
+ scale_dim[0],
+ C,
+ common::errors::InvalidArgument(
+ "ShapeError: the shape of scale must equal to [%d]"
+ "But received: the shape of scale is [%d]",
+ C,
+ scale_dim[0]));
+ }
  }
  }
  if (bias) {
@@ -889,13 +885,15 @@ void InstanceNormInferMeta(const MetaTensor& x,
  bias_dim.size()));
  bool check = config.is_runtime || !contain_unknown_dim(bias_dim);
  if (check) {
- PADDLE_ENFORCE_EQ(bias_dim[0],
- C,
- common::errors::InvalidArgument(
- "ShapeError: the shape of bias must equal to [%d]"
- "But received: the shape of bias is [%d]",
- C,
- bias_dim[0]));
+ if (C != 0) {
+ PADDLE_ENFORCE_EQ(bias_dim[0],
+ C,
+ common::errors::InvalidArgument(
+ "ShapeError: the shape of bias must equal to [%d]"
+ "But received: the shape of bias is [%d]",
+ C,
+ bias_dim[0]));
+ }
  }
  }
  y->set_dims(x_dims);
 
@@ -51,6 +51,19 @@ void InstanceNormGradKernel(const Context& dev_ctx,
  DenseTensor* d_x,
  DenseTensor* d_scale,
  DenseTensor* d_bias) {
+ phi::funcs::SetConstant<CPUContext, T> set_constant;
+ dev_ctx.template Alloc<T>(d_x);
+ if (x.numel() == 0) {
+ if (d_scale) {
+ dev_ctx.template Alloc<T>(d_scale);
+ set_constant(dev_ctx, d_scale, static_cast<T>(0));
+ }
+ if (d_bias) {
+ dev_ctx.template Alloc<T>(d_bias);
+ set_constant(dev_ctx, d_bias, static_cast<T>(0));
+ }
+ return;
+ }
  const auto* scale_ptr = scale.get_ptr();
 
  const auto& x_dims = x.dims();
@@ -60,7 +73,6 @@ void InstanceNormGradKernel(const Context& dev_ctx,
  const int NxC = N * C;
  const int sample_size = static_cast<int>(x.numel() / N / C);
 
- dev_ctx.template Alloc<T>(d_x);
  auto* place = dev_ctx.eigen_device();
 
  Eigen::DSizes<int, 2> rshape(NxC, sample_size);
@@ -83,8 +95,6 @@ void InstanceNormGradKernel(const Context& dev_ctx,
  NxC_shape.set(0, NxC);
 #endif
 
- phi::funcs::SetConstant<CPUContext, T> set_constant;
-
  DenseTensor scale_data;
  if (!scale_ptr) {
  scale_data.Resize({C});
 
@@ -38,6 +38,22 @@ void InstanceNormKernel(const Context& dev_ctx,
  DenseTensor* y,
  DenseTensor* saved_mean,
  DenseTensor* saved_variance) {
+ phi::funcs::SetConstant<CPUContext, T> set_constant;
+ if (x.numel() == 0) {
+ dev_ctx.template Alloc<T>(y);
+ set_constant(dev_ctx, y, static_cast<T>(0));
+
+ if (saved_mean) {
+ dev_ctx.template Alloc<T>(saved_mean);
+ set_constant(dev_ctx, saved_mean, static_cast<T>(0));
+ }
+ if (saved_variance) {
+ dev_ctx.template Alloc<T>(saved_variance);
+ set_constant(dev_ctx, saved_variance, static_cast<T>(0));
+ }
+ return;
+ }
+
  const auto& x_dims = x.dims();
  T epsilon = static_cast<T>(epsilon_f);
  const int N = static_cast<int>(x_dims[0]);
@@ -63,7 +79,6 @@ void InstanceNormKernel(const Context& dev_ctx,
  Eigen::IndexList<Eigen::type2index<1>> rdims;
 #endif
 
- phi::funcs::SetConstant<CPUContext, T> set_constant;
  DenseTensor saved_mean_tmp, saved_variance_tmp;
  if (saved_mean) {
  dev_ctx.template Alloc<T>(saved_mean);
 
@@ -326,11 +326,25 @@ void InstanceNormGradKernel(const Context &dev_ctx,
  x_tmp.ShareDataWith(x).Resize({1, NxC, H, W, D});
  d_y_tmp.ShareDataWith(d_y).Resize({1, NxC, H, W, D});
 
+ phi::funcs::SetConstant<GPUContext, AccT> set_constant;
+
  dev_ctx.template Alloc<T>(d_x);
+ if (x.numel() == 0) {
+ if (d_scale) {
+ dev_ctx.template Alloc<AccT>(d_scale);
+ set_constant(dev_ctx, d_scale, static_cast<AccT>(0));
+ }
+ if (d_bias) {
+ dev_ctx.template Alloc<AccT>(d_bias);
+ set_constant(dev_ctx, d_bias, static_cast<AccT>(0));
+ }
+ return;
+ }
  if (d_scale && d_bias) {
  dev_ctx.template Alloc<AccT>(d_scale);
  dev_ctx.template Alloc<AccT>(d_bias);
  }
+
  if (scale_ptr) {
  PADDLE_ENFORCE_EQ(
  scale_ptr->dims().size(),
@@ -354,8 +368,6 @@ void InstanceNormGradKernel(const Context &dev_ctx,
  scale_ptr->dims()));
  }
 
- phi::funcs::SetConstant<GPUContext, AccT> set_constant;
-
  const int n = x.numel();
  const int block = 512;
  int max_threads = dev_ctx.GetMaxPhysicalThreadCount();
 
@@ -60,6 +60,20 @@ void InstanceNormKernel(const Context &dev_ctx,
  DenseTensor x_tmp;
  x_tmp.ShareDataWith(x).Resize({1, NxC, H, W, D});
  dev_ctx.template Alloc<T>(y);
+ phi::funcs::SetConstant<GPUContext, BatchNormParamType<T>> functor;
+ phi::funcs::SetConstant<GPUContext, T> functor_y;
+ if (x.numel() == 0) {
+ functor_y(dev_ctx, y, static_cast<T>(0));
+ if (saved_mean) {
+ dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean);
+ functor(dev_ctx, saved_mean, static_cast<BatchNormParamType<T>>(0));
+ }
+ if (saved_variance) {
+ dev_ctx.template Alloc<BatchNormParamType<T>>(saved_variance);
+ functor(dev_ctx, saved_variance, static_cast<BatchNormParamType<T>>(0));
+ }
+ return;
+ }
 
 #ifdef PADDLE_WITH_HIP
  miopenTensorDescriptor_t data_desc_;
@@ -144,7 +158,6 @@ void InstanceNormKernel(const Context &dev_ctx,
  auto handle = dev_ctx.cudnn_handle();
 
  DenseTensor saved_mean_tmp, saved_variance_tmp;
- phi::funcs::SetConstant<GPUContext, BatchNormParamType<T>> functor;
 
  if (saved_mean) {
  dev_ctx.template Alloc<BatchNormParamType<T>>(saved_mean);
 
@@ -15,6 +15,7 @@
 #include "paddle/phi/kernels/instance_norm_grad_kernel.h"
 #include "paddle/phi/backends/xpu/enforce_xpu.h"
 #include "paddle/phi/core/kernel_registry.h"
+#include "paddle/phi/kernels/full_kernel.h"
 #include "paddle/phi/kernels/funcs/norm_utils.h"
 
 namespace phi {
@@ -44,6 +45,23 @@ void InstanceNormGradKernel(const Context& dev_ctx,
  x_dims.size()));
 
  dev_ctx.template Alloc<T>(d_x);
+ if (x.numel() == 0) {
+ if (d_scale) {
+ phi::Full<float, Context>(
+ dev_ctx,
+ phi::IntArray(common::vectorize(d_scale->dims())),
+ 0.f,
+ d_scale);
+ }
+ if (d_bias) {
+ phi::Full<float, Context>(
+ dev_ctx,
+ phi::IntArray(common::vectorize(d_bias->dims())),
+ 0.f,
+ d_bias);
+ }
+ return;
+ }
  T* d_scale_data = nullptr;
  T* d_bias_data = nullptr;
  if (d_scale && d_bias) {