PaddlePaddle
diff --git a/‎cmake/inference_lib.cmake‎
Lines changed: 3 additions & 3 deletions b/‎cmake/inference_lib.cmake‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cmake/pten.cmake‎
Lines changed: 1 addition & 2 deletions b/‎cmake/pten.cmake‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎cmake/third_party.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cmake/third_party.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/framework/custom_operator.cc‎
Lines changed: 7 additions & 7 deletions b/‎paddle/fluid/framework/custom_operator.cc‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.cc‎
Lines changed: 17 additions & 0 deletions b/‎paddle/fluid/framework/ir/graph_pattern_detector.cc‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.h‎
Lines changed: 10 additions & 0 deletions b/‎paddle/fluid/framework/ir/graph_pattern_detector.h‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/ipu/infer_shape_pass.cc‎
Lines changed: 1 addition & 3 deletions b/‎paddle/fluid/framework/ir/ipu/infer_shape_pass.cc‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc‎
Lines changed: 15 additions & 1 deletion b/‎paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.cc‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc‎
Lines changed: 1 addition & 1 deletion b/‎paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc‎
Lines changed: 21 additions & 0 deletions b/‎paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_placement_pass.cc‎
Lines changed: 21 additions & 0 deletions
@@ -218,16 +218,16 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)
 
 # copy api headers for pten & custom op
 copy(inference_lib_dist
- SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/*
+ SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/ext/*.h
  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/ext/)
 copy(inference_lib_dist
- SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/*
+ SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/include/*.h
  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/include/)
 copy(inference_lib_dist
  SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/api/all.h
  DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/pten/api/)
 copy(inference_lib_dist
- SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/*
+ SRCS ${PADDLE_SOURCE_DIR}/paddle/pten/common/*.h
  ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/bfloat16.h
  ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/complex.h
  ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/float16.h
 
@@ -20,8 +20,7 @@ string(FIND ${TARGET_PATH} "experimental" pos)
 if (pos GREATER 1)
  file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
  foreach(header ${HEADERS})
- string(FIND ${header} ".h" hpos)
- if (hpos GREATER 1)
+ if (${header} MATCHES ".*.h$")
  file(READ ${header} HEADER_CONTENT)
  string(REPLACE "paddle/pten/" "paddle/include/experimental/pten/" HEADER_CONTENT "${HEADER_CONTENT}")
  string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" HEADER_CONTENT "${HEADER_CONTENT}")
 
@@ -393,7 +393,7 @@ endif (WIN32)
 
 if (WITH_INFRT)
  include(external/llvm)
- list(APPEND third_party_deps external_llvm)
+ list(APPEND third_party_deps ${llvm_libs})
 endif()
 
 if (WITH_IPU)
 
@@ -110,8 +110,8 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
  const std::vector<std::string>& outputs,
  const std::vector<std::string>& attrs) {
  VLOG(1) << "Custom Operator: Start run KernelFunc.";
- std::vector<paddle::Tensor> custom_ins;
- std::vector<std::vector<paddle::Tensor>> custom_vec_ins;
+ std::vector<paddle::experimental::Tensor> custom_ins;
+ std::vector<std::vector<paddle::experimental::Tensor>> custom_vec_ins;
  for (auto& in_name : inputs) {
  VLOG(1) << "Custom Operator: input name - " << in_name;
  if (detail::IsDuplicableVar(in_name)) {
@@ -120,7 +120,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
  PADDLE_ENFORCE_NE(vec_x.empty(), true,
  platform::errors::NotFound(
  "Input vector<tensor> (%s) is empty.", in_name));
- std::vector<paddle::Tensor> custom_vec_in;
+ std::vector<paddle::experimental::Tensor> custom_vec_in;
  for (size_t i = 0; i < vec_x.size(); ++i) {
  auto* x = vec_x[i];
  PADDLE_ENFORCE_NOT_NULL(
@@ -132,7 +132,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
  "The %d-th tensor in input vector<tensor> (%s) "
  "is not initialized.",
  i, in_name));
- paddle::Tensor custom_t;
+ paddle::experimental::Tensor custom_t;
  custom_t.set_impl(std::move(experimental::MakePtenDenseTensor(*x)));
  custom_vec_in.emplace_back(custom_t);
  }
@@ -144,7 +144,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
  PADDLE_ENFORCE_EQ(x->IsInitialized(), true,
  platform::errors::InvalidArgument(
  "Input tensor (%s) is not initialized.", in_name));
- paddle::Tensor custom_in;
+ paddle::experimental::Tensor custom_in;
  custom_in.set_impl(std::move(experimental::MakePtenDenseTensor(*x)));
  custom_ins.emplace_back(custom_in);
  }
@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
  "Tensors.",
  vec_true_outs.size(), outs.size()));
  for (size_t j = 0; j < vec_true_outs.size(); ++j) {
- experimental::MovesStorage(
+ experimental::MovesSharedStorage(
  std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
  .get(),
  vec_true_outs.at(j));
  }
  } else {
  auto* true_out = ctx.Output<Tensor>(out_name);
- experimental::MovesStorage(
+ experimental::MovesSharedStorage(
  std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
  .get(),
  true_out);
 
@@ -2412,6 +2412,23 @@ PDNode *patterns::OrphanedBfloat16::operator()() {
  return next_op;
 }
 
+PDNode *patterns::UnsupportedBfloat16::operator()() {
+ auto *prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
+ prev_op->assert_more([&](Node *node) {
+ return node->Op()->HasAttr("mkldnn_data_type") == false;
+ });
+ auto *prev_out = pattern->NewNode(prev_out_repr())->AsOutput();
+
+ auto *op = pattern->NewNode(op_repr())->assert_is_op();
+ op->assert_more([&](Node *node) {
+ return node->Op()->GetAttrIfExists<std::string>("mkldnn_data_type") ==
+ "bfloat16";
+ });
+ prev_op->LinksTo({prev_out});
+ op->LinksFrom({prev_out});
+ return op;
+}
+
 PDNode *patterns::LastBfloat16Ops::operator()() {
  auto *op = pattern->NewNode(op_repr())->assert_is_op();
  op->assert_more([&](Node *node) {
 
@@ -1416,6 +1416,16 @@ struct OrphanedBfloat16 : public PatternBase {
  PATTERN_DECL_NODE(next_op);
 };
 
+struct UnsupportedBfloat16 : public PatternBase {
+ UnsupportedBfloat16(PDPattern* pattern, const std::string& name_scope)
+ : PatternBase(pattern, name_scope, "unsupported_bfloat16") {}
+ PDNode* operator()();
+
+ PATTERN_DECL_NODE(prev_op);
+ PATTERN_DECL_NODE(prev_out);
+ PATTERN_DECL_NODE(op);
+};
+
 struct LastBfloat16Ops : public PatternBase {
  LastBfloat16Ops(PDPattern* pattern, const std::string& name_scope)
  : PatternBase(pattern, name_scope, "last_bfloat16_ops") {}
 
@@ -13,14 +13,12 @@
 // limitations under the License.
 
 #include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h"
-
-#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
-
 #include "paddle/fluid/framework/ddim.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
 #include "paddle/fluid/framework/ir/pass_tester_helper.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/framework/variable_helper.h"
+#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
 
 namespace paddle {
 namespace framework {
 
@@ -147,7 +147,21 @@ Conv3DBiasFusePass::Conv3DBiasFusePass() {
  .IsType<std::vector<int>>()
  .End()
  .AddAttr("data_format")
- .IsStringIn({"NCHW", "NHWC"})
+ .IsStringIn({"NDHWC", "NCDHW"})
+ .End();
+
+ AddOpCompat(OpCompat("elementwise_add"))
+ .AddInput("X")
+ .IsTensor()
+ .End()
+ .AddInput("Y")
+ .IsTensor()
+ .End()
+ .AddOutput("Out")
+ .IsTensor()
+ .End()
+ .AddAttr("axis")
+ .IsNumGE(1)
  .End();
 }
 
 
@@ -59,7 +59,7 @@ ConvConcatReLUFusePass::ConvConcatReLUFusePass() {
  .IsType<std::vector<int>>()
  .End()
  .AddAttr("data_format")
- .IsStringIn({"NCHW", "NHWC", "AnyLayout"})
+ .IsStringIn({"NCHW"})
  .End();
 
  AddOpCompat(OpCompat("concat"))
 
@@ -71,10 +71,31 @@ void CPUBfloat16PlacementPass::RemoveOrphanedOperators(
  gpd(graph, handler);
 }
 
+void CPUBfloat16PlacementPass::RemoveUnsupportedOperators(
+ ir::Graph* graph, int* bfloat16_operators) const {
+ // now quantize is supported FP32 only, so try to find
+ // bfloat16 operator that input type is not FP32
+ GraphPatternDetector gpd;
+ patterns::UnsupportedBfloat16 unsupported_bfloat16_pattern{
+ gpd.mutable_pattern(), "unsupported_bfloat16"};
+ unsupported_bfloat16_pattern();
+ auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+ Graph* g) {
+ GET_IR_NODE_FROM_SUBGRAPH(prev_out, prev_out, unsupported_bfloat16_pattern);
+ GET_IR_NODE_FROM_SUBGRAPH(op, op, unsupported_bfloat16_pattern);
+ if ((prev_out->Var()->GetDataType() != proto::VarType::FP32)) {
+ op->Op()->SetAttr("mkldnn_data_type", std::string("float32"));
+ bfloat16_operators--;
+ }
+ };
+ gpd(graph, handler);
+}
+
 void CPUBfloat16PlacementPass::ApplyImpl(ir::Graph* graph) const {
  int bfloat16_operators = 0;
  SetMkldnnDataType(graph, &bfloat16_operators);
  RemoveOrphanedOperators(graph, &bfloat16_operators);
+ RemoveUnsupportedOperators(graph, &bfloat16_operators);
  PrettyLogDetail("--- marked %d operators to bfloat16 ",
  bfloat16_operators);
 }