PaddlePaddle
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.cc‎
Lines changed: 21 additions & 1 deletion b/‎paddle/fluid/framework/ir/graph_pattern_detector.cc‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎paddle/fluid/framework/ir/graph_pattern_detector.h‎
Lines changed: 14 additions & 0 deletions b/‎paddle/fluid/framework/ir/graph_pattern_detector.h‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc‎
Lines changed: 52 additions & 0 deletions b/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc‎
Lines changed: 111 additions & 0 deletions b/‎paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc‎
Lines changed: 111 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/mkldnn_quantizer.cc‎
Lines changed: 10 additions & 0 deletions b/‎paddle/fluid/inference/api/mkldnn_quantizer.cc‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/mkldnn_quantizer_config.cc‎
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/inference/api/mkldnn_quantizer_config.cc‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 16 additions & 3 deletions b/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc‎
Lines changed: 54 additions & 0 deletions b/‎paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc‎
Lines changed: 54 additions & 0 deletions
@@ -1619,6 +1619,26 @@ PDNode *patterns::Reshape::operator()() {
  return reshape_out;
 }
 
+PDNode *patterns::Slice::operator()() {
+ auto prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
+
+ auto slice_op = pattern->NewNode(slice_op_repr())->assert_is_op("slice");
+
+ auto slice_in = pattern->NewNode(slice_in_repr())
+ ->AsInput()
+ ->assert_is_op_input("slice", "Input");
+ auto slice_out = pattern->NewNode(slice_out_repr())
+ ->AsOutput()
+ ->assert_is_op_output("slice", "Out");
+
+ auto next_op = pattern->NewNode(next_op_repr())->assert_is_op();
+
+ prev_op->LinksTo({slice_in});
+ slice_op->LinksFrom({slice_in}).LinksTo({slice_out});
+ next_op->LinksFrom({slice_out});
+ return slice_out;
+}
+
 PDNode *patterns::Matmul::operator()() {
  auto matmul_op = pattern->NewNode(matmul_op_repr())->assert_is_op("matmul");
 
@@ -2315,7 +2335,7 @@ PDNode *patterns::QuantizePlacement::operator()(
  std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
  "fc", "matmul", "pool2d", "prior_box",
  "reshape2", "transpose2", "fusion_gru",
- "fusion_lstm", "multi_gru"});
+ "fusion_lstm", "multi_gru", "slice"});
  if (!quantize_enabled_op_types.empty()) {
  supported_op_types = quantize_enabled_op_types;
  }
 
@@ -980,6 +980,20 @@ struct Reshape : public PatternBase {
  PATTERN_DECL_NODE(reshape_out);
  PATTERN_DECL_NODE(next_op);
 };
+// Slice op
+// Forward pass for slice.
+// slice_out is a result of the operator.
+struct Slice : public PatternBase {
+ Slice(PDPattern* pattern, const std::string& name_scope)
+ : PatternBase(pattern, name_scope, "slice") {}
+
+ PDNode* operator()();
+ PATTERN_DECL_NODE(prev_op);
+ PATTERN_DECL_NODE(slice_in);
+ PATTERN_DECL_NODE(slice_op);
+ PATTERN_DECL_NODE(slice_out);
+ PATTERN_DECL_NODE(next_op);
+};
 
 // Matmul op
 // Forward pass for matmul.
 
@@ -676,6 +676,57 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
  PrettyLogDetail("--- quantized %d reshape ops", quantize_reshape_count);
 }
 
+void CPUQuantizePass::QuantizeSlice(Graph* graph) const {
+ GraphPatternDetector gpd;
+ auto pattern = gpd.mutable_pattern();
+ patterns::Slice slice_pattern{pattern, name_scope_};
+ slice_pattern();
+
+ int quantize_slice_count = 0;
+ auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
+ Graph* g) {
+ VLOG(4) << "Quantize slice op";
+ GET_IR_NODE_FROM_SUBGRAPH(slice_op, slice_op, slice_pattern);
+
+ // skip if should not be quantized
+ if (!platform::HasOpINT8DataType(slice_op->Op())) {
+ LogQuantizationDisabled(slice_op);
+ return;
+ }
+ GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, slice_pattern);
+ GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, slice_pattern);
+
+ // skip if prev op and next op is not quantized
+ if (!IsOpDequantized(prev_op) && !IsOpQuantized(next_op)) {
+ return;
+ }
+ GET_IR_NODE_FROM_SUBGRAPH(slice_in, slice_in, slice_pattern);
+ GET_IR_NODE_FROM_SUBGRAPH(slice_out, slice_out, slice_pattern);
+
+ if (!AreScalesPresentForNodes({slice_out})) {
+ LogCannotQuantizeOp(slice_op);
+ return;
+ }
+
+ bool is_input_unsigned{false};
+ auto input_scale = GetScaleValueForNode(slice_out, &is_input_unsigned);
+ QuantizeInput(g, slice_op, slice_in, "Input", input_scale,
+ is_input_unsigned);
+
+ bool is_output_unsigned{false};
+ auto output_scale = GetScaleValueForNode(slice_out, &is_output_unsigned);
+ DequantizeOutput(g, slice_op, slice_out, "Out", output_scale,
+ is_output_unsigned);
+
+ ++quantize_slice_count;
+ };
+
+ gpd(graph, handler);
+ AddStatis(quantize_slice_count);
+
+ PrettyLogDetail("--- quantized %d slice ops", quantize_slice_count);
+}
+
 void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
  GraphPatternDetector gpd;
  auto pattern = gpd.mutable_pattern();
@@ -1024,6 +1075,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
  QuantizeFusionGru(graph);
  QuantizeMultiGru(graph);
  QuantizeFusionLSTM(graph);
+ QuantizeSlice(graph);
 }
 
 } // namespace ir
 
@@ -61,6 +61,7 @@ class CPUQuantizePass : public FusePassBase {
  void QuantizeFusionGru(Graph* graph) const;
  void QuantizeMultiGru(Graph* graph) const;
  void QuantizeFusionLSTM(Graph* graph) const;
+ void QuantizeSlice(Graph* graph) const;
 
  void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
  double scale_to_one, bool is_input_unsigned,
 
@@ -55,6 +55,10 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
  op->SetInput("X", {inputs[0]});
  op->SetOutput("Out", {outputs[0]});
  op->SetAttr("mkldnn_data_type", mkldnn_data_type);
+ } else if (type == "slice") {
+ op->SetInput("Input", {inputs[0]});
+ op->SetOutput("Out", {outputs[0]});
+ op->SetAttr("mkldnn_data_type", mkldnn_data_type);
  } else if (type == "dropout") {
  op->SetInput("X", {inputs[0]});
  op->SetOutput("Out", {outputs[0]});
@@ -784,6 +788,113 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
  added_nodes_count, 2.0f * 127);
 }
 
+static const std::initializer_list<std::string> variable_names_slice = {
+ "a", "b", "c", "d"};
+
+// a->Dequantize->b
+// b->Slice->c
+// c->Dropout->d
+ProgramDesc BuildProgramDescSlice() {
+ ProgramDesc prog;
+ for (auto& v : variable_names_slice) {
+ prog.MutableBlock(0)->Var(v);
+ }
+ SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
+ SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8");
+ SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
+
+ return prog;
+}
+
+// a->Transpose->b
+// b->slice->c
+// c->Dropout->d
+ProgramDesc BuildProgramDescSliceBetweenNonQuantizedOp() {
+ ProgramDesc prog;
+ for (auto& v : variable_names_slice) {
+ prog.MutableBlock(0)->Var(v);
+ }
+
+ SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32");
+ SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8");
+ SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
+
+ return prog;
+}
+
+void MainTestSlice(const ProgramDesc& prog, int transpose_count,
+ int slice_count, int quant_count, int dequant_count,
+ int added_nodes_count, float scale) {
+ std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
+ int original_nodes_num, current_nodes_num;
+ PreparePass(&graph, prog, variable_names_slice, &original_nodes_num,
+ &current_nodes_num);
+
+ float quant_scale = 1.0f;
+ float dequant_scale = 1.0f;
+ int quantize_nodes_count = 0;
+ int dequantize_nodes_count = 0;
+ int transpose_nodes_count = 0;
+ int slice_nodes_count = 0;
+ for (auto* node : graph->Nodes()) {
+ if (node->IsOp()) {
+ auto* op = node->Op();
+ if (op->Type() == "transpose2") {
+ transpose_nodes_count++;
+ } else if (op->Type() == "slice") {
+ slice_nodes_count++;
+ } else if (op->Type() == "quantize") {
+ quantize_nodes_count++;
+ quant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale"));
+ EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'.";
+ } else if (op->Type() == "dequantize") {
+ dequantize_nodes_count++;
+ auto op_name = op->GetAttrIfExists<std::string>("name");
+ VLOG(3) << op_name << "\n";
+ if (op_name != "Dequantize1") {
+ dequant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale"));
+ EXPECT_EQ(dequant_scale, scale)
+ << "Scale for node '" + op->Type() + "'.";
+ }
+ }
+ }
+ }
+ EXPECT_EQ(transpose_nodes_count, transpose_count);
+ EXPECT_EQ(slice_nodes_count, slice_count);
+ EXPECT_EQ(quantize_nodes_count, quant_count);
+ EXPECT_EQ(dequantize_nodes_count, dequant_count);
+ EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
+}
+
+TEST(CpuQuantizePass, slice) {
+ // a->Dequantize->b
+ // b2->Quant->b3->slice->c1->Dequant->c2
+ // c2->Dropout->d
+ int slice_count = 1;
+ int transpose_count = 0;
+ int quant_count = 1;
+ int dequant_count = 2;
+ // 1 Quant + 1 IN + 1 DeQuant + 1 OUT
+ int added_nodes_count = 4;
+ MainTestSlice(BuildProgramDescSlice(), transpose_count, slice_count,
+ quant_count, dequant_count, added_nodes_count, 2.0f * 127);
+}
+
+TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) {
+ // a->Transpos2->b
+ // b->slice->c
+ // c->Dropout->d
+ int slice_count = 1;
+ int transpose_count = 1;
+ int quant_count = 0;
+ int dequant_count = 0;
+ // 0 Quant + 0 IN + 0 DeQuant + 0 OUT
+ int added_nodes_count = 0;
+ MainTestSlice(BuildProgramDescSliceBetweenNonQuantizedOp(), transpose_count,
+ slice_count, quant_count, dequant_count, added_nodes_count,
+ 2.0f * 127);
+}
+
 static const std::initializer_list<std::string> variable_names_matmul = {
  "a", "b", "c", "d", "e", "f"};
 
 
@@ -134,6 +134,16 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
  scales_[var_name] = scales_[input_var_name];
  }
  compute_scale = false;
+ } else if (op->Type() == "slice") {
+ auto input_var_name = op->Input("Input")[0];
+ PADDLE_ENFORCE_NE(scales_.find(input_var_name), scales_.end(),
+ platform::errors::PreconditionNotMet(
+ "Input scales must be calculated before the "
+ "output scales to infer if output is unsigned."));
+ if (scales_.find(input_var_name) != scales_.end()) {
+ scales_[var_name] = scales_[input_var_name];
+ }
+ compute_scale = false;
  } else if (op->Type() == "concat") {
  // output of ops with unsigned input must be unsigned
  is_unsigned = true;
 
@@ -42,6 +42,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
  rules_["transpose2"]["X"] = ScaleAlgo::KL;
  rules_["transpose2"]["Out"] = ScaleAlgo::NONE;
 
+ rules_["slice"]["Input"] = ScaleAlgo::KL;
+ rules_["slice"]["Out"] = ScaleAlgo::NONE;
+
  rules_["fc"]["Input"] = ScaleAlgo::KL;
  rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T;
  rules_["fc"]["Bias"] = ScaleAlgo::NONE;
 
@@ -94,6 +94,17 @@ function(inference_analysis_api_test target install_dir filename)
  ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt --refer_result=${install_dir}/result.txt)
 endfunction()
 
+function(inference_analysis_api_int8_test target install_dir filename)
+ inference_analysis_test(${target} SRCS ${filename}
+ EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
+ ARGS --infer_model=${install_dir}/model
+ --infer_data=${install_dir}/data.txt
+ --refer_result=${install_dir}/result.txt
+ --accuracy=0.8
+ --batch_size=5
+ --enable_int8=true)
+endfunction()
+
 function(inference_multiple_models_analysis_api_test target install_dir filename)
  inference_analysis_test(${target} SRCS ${filename}
  EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
@@ -284,13 +295,14 @@ set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn")
 download_model_and_data_without_verify(${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" "PyramidDNN_data.txt.tar.gz")
 inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} analyzer_pyramid_dnn_tester.cc)
 
-#Ernie
+# Ernie
 set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
 download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1)
 download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62)
 inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
+inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)
 
-#Ernie large
+# Ernie large
 set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large")
 download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" af7715245ed32cc77374625d4c80f7ef "Ernie_large_data.txt.tar.gz" edb2113eec93783cad56ed76d47ba57f)
 download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz" 1facda98eef1085dc9d435ebf3f23a73)
@@ -426,7 +438,7 @@ if(WITH_MKLDNN)
  # TODO(grygielski) Enable after MKL-DNN 1.0 merge
  set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
  download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR} "VGG16_int8_model.tar.gz" )
-#  inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
+# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
 
  # vgg19 int8
  # TODO(grygielski) Enable after MKL-DNN 1.0 merge
@@ -730,6 +742,7 @@ set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ner PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
+set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120)
 
@@ -0,0 +1,54 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/tests/api/analyzer_ernie_tester.h"
+
+namespace paddle {
+namespace inference {
+
+using paddle::PaddleTensor;
+
+#ifdef PADDLE_WITH_MKLDNN
+void SetInt8Config(AnalysisConfig *cfg,
+ std::vector<paddle::PaddleTensor> data) {
+ cfg->SetModel(FLAGS_infer_model);
+ cfg->EnableMKLDNN();
+ cfg->EnableMkldnnQuantizer();
+ auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(data);
+ cfg->mkldnn_quantizer_config()->SetWarmupData(warmup_data);
+ cfg->mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_batch_size);
+ cfg->SwitchSpecifyInputNames();
+ cfg->SwitchIrOptim();
+ cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads);
+}
+
+// Compare result of NativeConfig and AnalysisConfig
+void compare_int8(bool use_mkldnn = false) {
+ std::vector<std::vector<PaddleTensor>> inputs;
+ LoadInputData(&inputs);
+
+ AnalysisConfig cfg;
+ SetInt8Config(&cfg, inputs[0]);
+
+ CompareNativeAndAnalysis(
+ reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
+}
+
+TEST(Analyzer_ernie, compare_int8_mkldnn) {
+ compare_int8(true /* use_mkldnn */);
+}
+#endif
+
+} // namespace inference
+} // namespace paddle