PaddlePaddle
diff --git a/‎paddle/fluid/eager/api/utils/hook_utils.cc‎
Lines changed: 29 additions & 34 deletions b/‎paddle/fluid/eager/api/utils/hook_utils.cc‎
Lines changed: 29 additions & 34 deletions
diff --git a/‎paddle/fluid/eager/auto_code_generator/eager_generator.cc‎
Lines changed: 15 additions & 10 deletions b/‎paddle/fluid/eager/auto_code_generator/eager_generator.cc‎
Lines changed: 15 additions & 10 deletions
diff --git a/‎paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py‎
Lines changed: 21 additions & 1 deletion b/‎paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎paddle/fluid/eager/backward.cc‎
Lines changed: 4 additions & 3 deletions b/‎paddle/fluid/eager/backward.cc‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎paddle/fluid/eager/grad_node_info.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/eager/grad_node_info.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/eager/utils.cc‎
Lines changed: 9 additions & 0 deletions b/‎paddle/fluid/eager/utils.cc‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/infershape_utils.cc‎
Lines changed: 5 additions & 1 deletion b/‎paddle/fluid/framework/infershape_utils.cc‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/operator.h‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/framework/operator.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/phi_utils.cc‎
Lines changed: 9 additions & 0 deletions b/‎paddle/fluid/framework/phi_utils.cc‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 29 additions & 0 deletions b/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 29 additions & 0 deletions
@@ -52,49 +52,44 @@ void RegisterReduceHookForTensor(const paddle::experimental::Tensor& tensor,
  }
 }
 
-static void RetainGradForRegularNode(
-  const paddle::experimental::Tensor& tensor) {
- AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
- if (meta->RetainGrads()) {
+void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
+ if (IsLeafTensor(tensor)) {
+  // Leaf tensor's grad will always be retained
+  // Refer to implementation of AccumulationNode for more details
  return;
  } else {
- meta->SetRetainGrads(true);
- }
+ AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor);
+ if (meta->RetainGrads()) {
+ return;
+ } else {
+ meta->SetRetainGrads(true);
+ }
 
- std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor =
- meta->WeakGrad();
+  std::weak_ptr<paddle::experimental::Tensor> weak_grad_tensor =
+  meta->WeakGrad();
 
- // Define Hook
- auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
- if (!weak_grad_tensor.expired()) {
- auto grad_tensor = weak_grad_tensor.lock();
- if (t.defined()) {
- VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
- // Simply Copy impl() to grad_tensor
- grad_tensor->set_impl(t.impl());
- return *grad_tensor.get();
+ // Define Hook
+ auto hook = [weak_grad_tensor](const paddle::experimental::Tensor& t) {
+ if (!weak_grad_tensor.expired()) {
+ auto grad_tensor = weak_grad_tensor.lock();
+ if (t.defined()) {
+ VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name();
+ // Simply Copy impl() to grad_tensor
+ grad_tensor->set_impl(t.impl());
+ return *grad_tensor.get();
+ } else {
+ VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
+ return paddle::experimental::Tensor();
+ }
  } else {
  VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
  return paddle::experimental::Tensor();
  }
- } else {
- VLOG(7) << "Retain NULL paddle::experimental::Tensor in Grad Hook";
- return paddle::experimental::Tensor();
- }
- };
+ };
 
- // Append to GradientHooks
- RegisterGradientHookForTensor(tensor,
- std::make_shared<egr::CppTensorHook>(hook));
-}
-
-void RetainGradForTensor(const paddle::experimental::Tensor& tensor) {
- if (IsLeafTensor(tensor)) {
- // Leaf tensor's grad will always be retained
- // Refer to implementation of AccumulationNode for more details
- return;
- } else {
- RetainGradForRegularNode(tensor);
+ // Append to GradientHooks
+ RegisterGradientHookForTensor(tensor,
+ std::make_shared<egr::CppTensorHook>(hook));
  }
 }
 
 
@@ -1156,11 +1156,13 @@ static std::string GenerateGradNodeCreationContent(
  grad_node_creation_str += paddle::string::Sprintf(
  SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
 
- const char* SET_HISTORY_TEMPLATE =
- " egr::EagerUtils::SetHistory(&%s, grad_node);\n";
- grad_node_creation_str +=
- paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
-
+ // Intermediate Tensor does not require SetHistory
+ if (!output.intermediate()) {
+ const char* SET_HISTORY_TEMPLATE =
+ " egr::EagerUtils::SetHistory(&%s, grad_node);\n";
+ grad_node_creation_str +=
+ paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
+ }
  const char* SET_GRAD_IN_META_TEMPLATE =
  " grad_node->SetGradInMeta(&%s, %d);\n";
  grad_node_creation_str += paddle::string::Sprintf(
@@ -1173,17 +1175,20 @@ static std::string GenerateGradNodeCreationContent(
  grad_node_creation_str += paddle::string::Sprintf(
  SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
 
- const char* SET_HISTORY_TEMPLATE =
- " egr::EagerUtils::SetHistory(%s, grad_node);\n";
- grad_node_creation_str +=
- paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
-
+ // Intermediate Tensor does not require SetHistory
+ if (!output.intermediate()) {
+ const char* SET_HISTORY_TEMPLATE =
+ " egr::EagerUtils::SetHistory(%s, grad_node);\n";
+ grad_node_creation_str +=
+ paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
+ }
  const char* SET_GRAD_IN_META_TEMPLATE =
  " grad_node->SetGradInMeta(%s, %d);\n";
  grad_node_creation_str += paddle::string::Sprintf(
  SET_GRAD_IN_META_TEMPLATE, output_autograd_name, output_position);
  }
 
+ // Intermediate Tensor does not require CheckAndRetainGrad
  if (!output.intermediate()) {
  VLOG(6) << "Generated Call RetainGradForTensor";
  const char* RETAIN_GRAD_TEMPLATE =
 
@@ -24,6 +24,17 @@
 core_ops_args_type_info = {}
 
 
+yaml_types_mapping = {
+ 'int' : 'int', 'int32_t' : 'int32_t', 'int64_t' : 'int64_t', 'size_t' : 'size_t', \
+ 'float' : 'float', 'double' : 'double', 'bool' : 'bool', \
+ 'Backend' : 'Backend', 'DataLayout' : 'DataLayout', 'DataType' : 'DataType', \
+ 'int64_t[]' : 'std::vector<int64_t>', 'int[]' : 'std::vector<int>',
+ 'Tensor' : 'Tensor',
+ 'Tensor[]' : 'std::vector<Tensor>',
+ 'Tensor[Tensor[]]' : 'std::vector<std::vector<Tensor>>'
+}
+
+
 def ParseArguments():
  parser = argparse.ArgumentParser(
  description='Eager Code Generator Args Parser')
@@ -59,7 +70,9 @@ def IsPlainTensorType(string):
 
 
 def IsVectorTensorType(string):
- vector_tensor_types = ['list(Tensor)']
+ vector_tensor_types = [
+ 'std::vector<std::vector<Tensor>>', 'std::vector<Tensor>'
+ ]
  if string in vector_tensor_types:
  return True
  return False
@@ -180,6 +193,9 @@ def ParseYamlArgs(string):
  arg_name = m.group(3).split("=")[0].strip()
  default_value = m.group(3).split("=")[1].strip() if len(
  m.group(3).split("=")) > 1 else None
+
+ assert arg_type in yaml_types_mapping.keys()
+ arg_type = yaml_types_mapping[arg_type]
  if "Tensor" in arg_type:
  assert default_value is None
  inputs_list.append([arg_name, arg_type, i])
@@ -219,6 +235,10 @@ def ParseYamlReturnsWithName(string):
  m = re.search(pattern, ret)
  ret_type = m.group(1)
  ret_name = m.group(2)
+
+ assert ret_type in yaml_types_mapping.keys()
+ ret_type = yaml_types_mapping[ret_type]
+
  assert "Tensor" in ret_type
  returns_list.append([ret_name, ret_type, i])
 
 
@@ -221,10 +221,11 @@ void RunBackward(const std::vector<paddle::experimental::Tensor>& tensors,
  << " 's name is: " << grad_output_tensor.name();
 
  auto* next_node = next_node_shared.get();
-
  if (!node_input_buffers_dict.count(next_node)) {
- node_input_buffers_dict[next_node] =
- std::make_unique<GradTensorHolder>(next_node->InputMeta());
+ const auto& input_meta = next_node->InputMeta();
+ auto grad_tensor_holder =
+ std::make_unique<GradTensorHolder>(input_meta);
+ node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
  }
  VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
  << ", rank: " << edge_rank.second;
 
@@ -244,7 +244,7 @@ GradNodeBase::ApplyGradientHooks(
  if (!out.defined() || !out.initialized()) {
  out = (*hook)(tensors[slot_id][rank]);
  } else {
- // If more than one hook is registered, the input to the next hook func
+ // If more than one hook is registered, the input to the next hook func
  // should be the output of the previous hook
  out = (*hook)(out);
  }
 
@@ -122,12 +122,21 @@ paddle::experimental::Tensor* EagerUtils::mutable_grad(
 void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
  const std::shared_ptr<GradNodeBase>& grad_node) {
  for (const auto& autograd_meta : *autograd_metas) {
+ if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) {
+ VLOG(6) << "Warning: Reseting GradNodeAccumulation for leaf tensor is "
+ "detected";
+ }
  autograd_meta->SetGradNode(grad_node);
  }
 }
 
 void EagerUtils::SetHistory(AutogradMeta* autograd_meta,
  const std::shared_ptr<GradNodeBase>& grad_node) {
+ if (dynamic_cast<GradNodeAccumulation*>(autograd_meta->GradNode())) {
+ VLOG(6)
+ << "Warning: Reseting GradNodeAccumulation for leaf tensor is detected";
+ }
+
  autograd_meta->SetGradNode(grad_node);
 }
 
 
@@ -88,6 +88,8 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext {
  return var_types[0] == proto::VarType::SELECTED_ROWS;
  }
 
+ bool IsForInferShape() const override { return true; }
+
  private:
  const InferShapeContext& ctx_;
 };
@@ -127,7 +129,9 @@ class CompatMetaTensor : public phi::MetaTensor {
  }
  } else {
  auto* var = BOOST_GET_CONST(VarDesc*, var_);
- return phi::make_ddim(var->GetShape());
+
+ return var->GetShape().empty() ? phi::make_ddim({0UL})
+ : phi::make_ddim(var->GetShape());
  }
  }
 
 
@@ -489,6 +489,8 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext {
  return ctx_.OutputVar(name)->IsType<phi::SelectedRows>();
  }
 
+ bool IsForInferShape() const override { return false; }
+
  private:
  const ExecutionContext& ctx_;
 };
 
@@ -125,6 +125,15 @@ phi::KernelKey FallBackToCpu(const OpKernelType& expected_kernel_key,
  return phi::KernelKey(phi::Backend::CPU, kernel_key.layout(),
  kernel_key.dtype());
  }
+#endif
+#ifdef PADDLE_WITH_IPU
+ if (platform::is_ipu_place(expected_kernel_key.place_)) {
+ VLOG(3) << "pten missing IPU kernel: " << op.Type()
+ << ", expected_kernel_key:" << expected_kernel_key
+ << ", fallbacking to CPU one!";
+ return phi::KernelKey(phi::Backend::CPU, kernel_key.layout(),
+ kernel_key.dtype());
+ }
 #endif
  return phi::KernelKey();
 }
 
@@ -453,6 +453,23 @@ if(WITH_MKLDNN)
  download_int8_data_without_verify(${INT8_GOOGLENET_MODEL_DIR} "GoogleNet_int8_model.tar.gz" )
  inference_analysis_api_int8_test_run_custom_warmup_batch_size(test_analyzer_int8_googlenet ${INT8_IMG_CLASS_TEST_APP} ${INT8_GOOGLENET_MODEL_DIR} ${IMAGENET_DATA_PATH} 10)
 
+ # mobilenetv3_large_x1_0 int8
+ set(INT8_MOBILENETV3_LARGE_MODEL_DIR "${INT8_DATA_DIR}/mobilenetv3_large")
+ set(INT8_MOBILENETV3_FILE_NAME "MobileNetV3_large_x1_0_infer.tar")
+ if (NOT EXISTS ${INT8_MOBILENETV3_LARGE_MODEL_DIR}/${INT8_MOBILENETV3_FILE_NAME})
+ inference_download_and_uncompress_without_verify(${INT8_MOBILENETV3_LARGE_MODEL_DIR} "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/" ${INT8_MOBILENETV3_FILE_NAME})
+ endif()
+ inference_analysis_test_run(test_analyzer_int8_mobilenetv3_large 
+ COMMAND ${INT8_IMG_CLASS_TEST_APP} 
+ ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
+ --infer_data=${IMAGENET_DATA_PATH} 
+ --warmup_batch_size=50
+ --batch_size=1
+ --enable_int8=true 
+ --cpu_num_threads=${CPU_NUM_THREADS_ON_CI} 
+ --iterations=100
+ --with_accuracy_layer=false)
+
  ### BFLOAT16 tests
 
  # build test binary to be used in subsequent tests
@@ -472,6 +489,17 @@ if(WITH_MKLDNN)
  # mobilenetv2 bfloat16
  inference_analysis_api_bfloat16_test_run(test_analyzer_bfloat16_mobilenetv2 ${BF16_IMG_CLASS_TEST_APP} ${INT8_MOBILENETV2_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
+ # mobilenetv3_large 
+ inference_analysis_test_run(test_analyzer_bfloat16_mobilenetv3_large
+ COMMAND ${BF16_IMG_CLASS_TEST_APP}
+ ARGS --infer_model=${INT8_MOBILENETV3_LARGE_MODEL_DIR}/MobileNetV3_large_x1_0_infer
+ --infer_data=${IMAGENET_DATA_PATH} 
+ --batch_size=1
+ --enable_bf16=true
+ --paddle_num_threads=${CPU_NUM_THREADS_ON_CI}
+ --iterations=100
+ --with_accuracy_layer=false)
+
  ### Object detection models
  set(PASCALVOC_DATA_PATH "${INT8_DATA_DIR}/pascalvoc_val_head_300.bin")
  set(INT8_OBJ_DETECT_TEST_APP "test_analyzer_int8_object_detection")
@@ -739,6 +767,7 @@ if(WITH_MKLDNN)
  set_tests_properties(test_analyzer_quant_performance_benchmark PROPERTIES TIMEOUT 120)
  set_tests_properties(test_analyzer_int8_mobilenetv2 PROPERTIES TIMEOUT 120)
  set_tests_properties(test_analyzer_int8_mobilenetv1 PROPERTIES TIMEOUT 120)
+ set_tests_properties(test_analyzer_int8_mobilenetv3_large PROPERTIES TIMEOUT 120)
 endif()
 
 set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)
Original file line number	Diff line number	Diff line change
`@@ -244,7 +244,7 @@ GradNodeBase::ApplyGradientHooks(`
`244`	`244`	`if (!out.defined() \|\| !out.initialized()) {`
`245`	`245`	`out = (*hook)(tensors[slot_id][rank]);`
`246`	`246`	`} else {`
`247`		`- // If more than one hook is registered, the input to the next hook func`
	`247`	`+ // If more than one hook is registered, the input to the next hook func`
`248`	`248`	`// should be the output of the previous hook`
`249`	`249`	`out = (*hook)(out);`
`250`	`250`	`}`
Original file line number	Diff line number	Diff line change
`@@ -88,6 +88,8 @@ class InferShapeArgumentMappingContext : public phi::ArgumentMappingContext {`
`88`	`88`	`return var_types[0] == proto::VarType::SELECTED_ROWS;`
`89`	`89`	`}`
`90`	`90`
	`91`	`+ bool IsForInferShape() const override { return true; }`
	`92`	`+`
`91`	`93`	`private:`
`92`	`94`	`const InferShapeContext& ctx_;`
`93`	`95`	`};`
`@@ -127,7 +129,9 @@ class CompatMetaTensor : public phi::MetaTensor {`
`127`	`129`	`}`
`128`	`130`	`} else {`
`129`	`131`	`auto* var = BOOST_GET_CONST(VarDesc*, var_);`
`130`		`- return phi::make_ddim(var->GetShape());`
	`132`	`+`
	`133`	`+ return var->GetShape().empty() ? phi::make_ddim({0UL})`
	`134`	`+ : phi::make_ddim(var->GetShape());`
`131`	`135`	`}`
`132`	`136`	`}`
`133`	`137`
Original file line number	Diff line number	Diff line change
`@@ -489,6 +489,8 @@ class ExecutionArgumentMappingContext : public phi::ArgumentMappingContext {`
`489`	`489`	`return ctx_.OutputVar(name)->IsType<phi::SelectedRows>();`
`490`	`490`	`}`
`491`	`491`
	`492`	`+ bool IsForInferShape() const override { return false; }`
	`493`	`+`
`492`	`494`	`private:`
`493`	`495`	`const ExecutionContext& ctx_;`
`494`	`496`	`};`