Skip to content

Commit 2bd0f3c

Browse files
authored
Quantize slice op (#37630)
* quantize slice op * correct test * fix code formatting
1 parent c9a3c66 commit 2bd0f3c

File tree

15 files changed

+450
-138
lines changed

15 files changed

+450
-138
lines changed

paddle/fluid/framework/ir/graph_pattern_detector.cc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,26 @@ PDNode *patterns::Reshape::operator()() {
16191619
return reshape_out;
16201620
}
16211621

1622+
PDNode *patterns::Slice::operator()() {
1623+
auto prev_op = pattern->NewNode(prev_op_repr())->assert_is_op();
1624+
1625+
auto slice_op = pattern->NewNode(slice_op_repr())->assert_is_op("slice");
1626+
1627+
auto slice_in = pattern->NewNode(slice_in_repr())
1628+
->AsInput()
1629+
->assert_is_op_input("slice", "Input");
1630+
auto slice_out = pattern->NewNode(slice_out_repr())
1631+
->AsOutput()
1632+
->assert_is_op_output("slice", "Out");
1633+
1634+
auto next_op = pattern->NewNode(next_op_repr())->assert_is_op();
1635+
1636+
prev_op->LinksTo({slice_in});
1637+
slice_op->LinksFrom({slice_in}).LinksTo({slice_out});
1638+
next_op->LinksFrom({slice_out});
1639+
return slice_out;
1640+
}
1641+
16221642
PDNode *patterns::Matmul::operator()() {
16231643
auto matmul_op = pattern->NewNode(matmul_op_repr())->assert_is_op("matmul");
16241644

@@ -2315,7 +2335,7 @@ PDNode *patterns::QuantizePlacement::operator()(
23152335
std::unordered_set<std::string>({"concat", "conv2d", "elementwise_add",
23162336
"fc", "matmul", "pool2d", "prior_box",
23172337
"reshape2", "transpose2", "fusion_gru",
2318-
"fusion_lstm", "multi_gru"});
2338+
"fusion_lstm", "multi_gru", "slice"});
23192339
if (!quantize_enabled_op_types.empty()) {
23202340
supported_op_types = quantize_enabled_op_types;
23212341
}

paddle/fluid/framework/ir/graph_pattern_detector.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,20 @@ struct Reshape : public PatternBase {
980980
PATTERN_DECL_NODE(reshape_out);
981981
PATTERN_DECL_NODE(next_op);
982982
};
983+
// Slice op
984+
// Forward pass for slice.
985+
// slice_out is a result of the operator.
986+
struct Slice : public PatternBase {
987+
Slice(PDPattern* pattern, const std::string& name_scope)
988+
: PatternBase(pattern, name_scope, "slice") {}
989+
990+
PDNode* operator()();
991+
PATTERN_DECL_NODE(prev_op);
992+
PATTERN_DECL_NODE(slice_in);
993+
PATTERN_DECL_NODE(slice_op);
994+
PATTERN_DECL_NODE(slice_out);
995+
PATTERN_DECL_NODE(next_op);
996+
};
983997

984998
// Matmul op
985999
// Forward pass for matmul.

paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,57 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
676676
PrettyLogDetail("--- quantized %d reshape ops", quantize_reshape_count);
677677
}
678678

679+
void CPUQuantizePass::QuantizeSlice(Graph* graph) const {
680+
GraphPatternDetector gpd;
681+
auto pattern = gpd.mutable_pattern();
682+
patterns::Slice slice_pattern{pattern, name_scope_};
683+
slice_pattern();
684+
685+
int quantize_slice_count = 0;
686+
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
687+
Graph* g) {
688+
VLOG(4) << "Quantize slice op";
689+
GET_IR_NODE_FROM_SUBGRAPH(slice_op, slice_op, slice_pattern);
690+
691+
// skip if should not be quantized
692+
if (!platform::HasOpINT8DataType(slice_op->Op())) {
693+
LogQuantizationDisabled(slice_op);
694+
return;
695+
}
696+
GET_IR_NODE_FROM_SUBGRAPH(prev_op, prev_op, slice_pattern);
697+
GET_IR_NODE_FROM_SUBGRAPH(next_op, next_op, slice_pattern);
698+
699+
// skip if prev op and next op is not quantized
700+
if (!IsOpDequantized(prev_op) && !IsOpQuantized(next_op)) {
701+
return;
702+
}
703+
GET_IR_NODE_FROM_SUBGRAPH(slice_in, slice_in, slice_pattern);
704+
GET_IR_NODE_FROM_SUBGRAPH(slice_out, slice_out, slice_pattern);
705+
706+
if (!AreScalesPresentForNodes({slice_out})) {
707+
LogCannotQuantizeOp(slice_op);
708+
return;
709+
}
710+
711+
bool is_input_unsigned{false};
712+
auto input_scale = GetScaleValueForNode(slice_out, &is_input_unsigned);
713+
QuantizeInput(g, slice_op, slice_in, "Input", input_scale,
714+
is_input_unsigned);
715+
716+
bool is_output_unsigned{false};
717+
auto output_scale = GetScaleValueForNode(slice_out, &is_output_unsigned);
718+
DequantizeOutput(g, slice_op, slice_out, "Out", output_scale,
719+
is_output_unsigned);
720+
721+
++quantize_slice_count;
722+
};
723+
724+
gpd(graph, handler);
725+
AddStatis(quantize_slice_count);
726+
727+
PrettyLogDetail("--- quantized %d slice ops", quantize_slice_count);
728+
}
729+
679730
void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
680731
GraphPatternDetector gpd;
681732
auto pattern = gpd.mutable_pattern();
@@ -1024,6 +1075,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
10241075
QuantizeFusionGru(graph);
10251076
QuantizeMultiGru(graph);
10261077
QuantizeFusionLSTM(graph);
1078+
QuantizeSlice(graph);
10271079
}
10281080

10291081
} // namespace ir

paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class CPUQuantizePass : public FusePassBase {
6161
void QuantizeFusionGru(Graph* graph) const;
6262
void QuantizeMultiGru(Graph* graph) const;
6363
void QuantizeFusionLSTM(Graph* graph) const;
64+
void QuantizeSlice(Graph* graph) const;
6465

6566
void QuantizeInput(Graph* g, Node* op, Node* input, std::string input_name,
6667
double scale_to_one, bool is_input_unsigned,

paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
5555
op->SetInput("X", {inputs[0]});
5656
op->SetOutput("Out", {outputs[0]});
5757
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
58+
} else if (type == "slice") {
59+
op->SetInput("Input", {inputs[0]});
60+
op->SetOutput("Out", {outputs[0]});
61+
op->SetAttr("mkldnn_data_type", mkldnn_data_type);
5862
} else if (type == "dropout") {
5963
op->SetInput("X", {inputs[0]});
6064
op->SetOutput("Out", {outputs[0]});
@@ -784,6 +788,113 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
784788
added_nodes_count, 2.0f * 127);
785789
}
786790

791+
static const std::initializer_list<std::string> variable_names_slice = {
792+
"a", "b", "c", "d"};
793+
794+
// a->Dequantize->b
795+
// b->Slice->c
796+
// c->Dropout->d
797+
ProgramDesc BuildProgramDescSlice() {
798+
ProgramDesc prog;
799+
for (auto& v : variable_names_slice) {
800+
prog.MutableBlock(0)->Var(v);
801+
}
802+
SetOp(&prog, "dequantize", "Dequantize1", {"a"}, {"b"}, true);
803+
SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8");
804+
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
805+
806+
return prog;
807+
}
808+
809+
// a->Transpose->b
810+
// b->slice->c
811+
// c->Dropout->d
812+
ProgramDesc BuildProgramDescSliceBetweenNonQuantizedOp() {
813+
ProgramDesc prog;
814+
for (auto& v : variable_names_slice) {
815+
prog.MutableBlock(0)->Var(v);
816+
}
817+
818+
SetOp(&prog, "transpose2", "Transpose2", {"a"}, {"b"}, true, "float32");
819+
SetOp(&prog, "slice", "Slice", {"b"}, {"c"}, true, "int8");
820+
SetOp(&prog, "dropout", "Dropout", {"c"}, {"d"}, true, "float32");
821+
822+
return prog;
823+
}
824+
825+
void MainTestSlice(const ProgramDesc& prog, int transpose_count,
826+
int slice_count, int quant_count, int dequant_count,
827+
int added_nodes_count, float scale) {
828+
std::unique_ptr<ir::Graph> graph(new ir::Graph(prog));
829+
int original_nodes_num, current_nodes_num;
830+
PreparePass(&graph, prog, variable_names_slice, &original_nodes_num,
831+
&current_nodes_num);
832+
833+
float quant_scale = 1.0f;
834+
float dequant_scale = 1.0f;
835+
int quantize_nodes_count = 0;
836+
int dequantize_nodes_count = 0;
837+
int transpose_nodes_count = 0;
838+
int slice_nodes_count = 0;
839+
for (auto* node : graph->Nodes()) {
840+
if (node->IsOp()) {
841+
auto* op = node->Op();
842+
if (op->Type() == "transpose2") {
843+
transpose_nodes_count++;
844+
} else if (op->Type() == "slice") {
845+
slice_nodes_count++;
846+
} else if (op->Type() == "quantize") {
847+
quantize_nodes_count++;
848+
quant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale"));
849+
EXPECT_EQ(quant_scale, scale) << "Scale for node '" + op->Type() + "'.";
850+
} else if (op->Type() == "dequantize") {
851+
dequantize_nodes_count++;
852+
auto op_name = op->GetAttrIfExists<std::string>("name");
853+
VLOG(3) << op_name << "\n";
854+
if (op_name != "Dequantize1") {
855+
dequant_scale = BOOST_GET_CONST(float, op->GetAttr("Scale"));
856+
EXPECT_EQ(dequant_scale, scale)
857+
<< "Scale for node '" + op->Type() + "'.";
858+
}
859+
}
860+
}
861+
}
862+
EXPECT_EQ(transpose_nodes_count, transpose_count);
863+
EXPECT_EQ(slice_nodes_count, slice_count);
864+
EXPECT_EQ(quantize_nodes_count, quant_count);
865+
EXPECT_EQ(dequantize_nodes_count, dequant_count);
866+
EXPECT_EQ(original_nodes_num + added_nodes_count, current_nodes_num);
867+
}
868+
869+
TEST(CpuQuantizePass, slice) {
870+
// a->Dequantize->b
871+
// b2->Quant->b3->slice->c1->Dequant->c2
872+
// c2->Dropout->d
873+
int slice_count = 1;
874+
int transpose_count = 0;
875+
int quant_count = 1;
876+
int dequant_count = 2;
877+
// 1 Quant + 1 IN + 1 DeQuant + 1 OUT
878+
int added_nodes_count = 4;
879+
MainTestSlice(BuildProgramDescSlice(), transpose_count, slice_count,
880+
quant_count, dequant_count, added_nodes_count, 2.0f * 127);
881+
}
882+
883+
TEST(CpuQuantizePass, sliceBetweenNonQuantizedOp) {
884+
// a->Transpos2->b
885+
// b->slice->c
886+
// c->Dropout->d
887+
int slice_count = 1;
888+
int transpose_count = 1;
889+
int quant_count = 0;
890+
int dequant_count = 0;
891+
// 0 Quant + 0 IN + 0 DeQuant + 0 OUT
892+
int added_nodes_count = 0;
893+
MainTestSlice(BuildProgramDescSliceBetweenNonQuantizedOp(), transpose_count,
894+
slice_count, quant_count, dequant_count, added_nodes_count,
895+
2.0f * 127);
896+
}
897+
787898
static const std::initializer_list<std::string> variable_names_matmul = {
788899
"a", "b", "c", "d", "e", "f"};
789900

paddle/fluid/inference/api/mkldnn_quantizer.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,16 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
134134
scales_[var_name] = scales_[input_var_name];
135135
}
136136
compute_scale = false;
137+
} else if (op->Type() == "slice") {
138+
auto input_var_name = op->Input("Input")[0];
139+
PADDLE_ENFORCE_NE(scales_.find(input_var_name), scales_.end(),
140+
platform::errors::PreconditionNotMet(
141+
"Input scales must be calculated before the "
142+
"output scales to infer if output is unsigned."));
143+
if (scales_.find(input_var_name) != scales_.end()) {
144+
scales_[var_name] = scales_[input_var_name];
145+
}
146+
compute_scale = false;
137147
} else if (op->Type() == "concat") {
138148
// output of ops with unsigned input must be unsigned
139149
is_unsigned = true;

paddle/fluid/inference/api/mkldnn_quantizer_config.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
4242
rules_["transpose2"]["X"] = ScaleAlgo::KL;
4343
rules_["transpose2"]["Out"] = ScaleAlgo::NONE;
4444

45+
rules_["slice"]["Input"] = ScaleAlgo::KL;
46+
rules_["slice"]["Out"] = ScaleAlgo::NONE;
47+
4548
rules_["fc"]["Input"] = ScaleAlgo::KL;
4649
rules_["fc"]["W"] = ScaleAlgo::MAX_CH_T;
4750
rules_["fc"]["Bias"] = ScaleAlgo::NONE;

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,17 @@ function(inference_analysis_api_test target install_dir filename)
9494
ARGS --infer_model=${install_dir}/model --infer_data=${install_dir}/data.txt --refer_result=${install_dir}/result.txt)
9595
endfunction()
9696

97+
function(inference_analysis_api_int8_test target install_dir filename)
98+
inference_analysis_test(${target} SRCS ${filename}
99+
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
100+
ARGS --infer_model=${install_dir}/model
101+
--infer_data=${install_dir}/data.txt
102+
--refer_result=${install_dir}/result.txt
103+
--accuracy=0.8
104+
--batch_size=5
105+
--enable_int8=true)
106+
endfunction()
107+
97108
function(inference_multiple_models_analysis_api_test target install_dir filename)
98109
inference_analysis_test(${target} SRCS ${filename}
99110
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
@@ -284,13 +295,14 @@ set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn")
284295
download_model_and_data_without_verify(${PYRAMID_DNN_INSTALL_DIR} "PyramidDNN_model.tar.gz" "PyramidDNN_data.txt.tar.gz")
285296
inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR} analyzer_pyramid_dnn_tester.cc)
286297

287-
#Ernie
298+
# Ernie
288299
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
289300
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1)
290301
download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62)
291302
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
303+
inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)
292304

293-
#Ernie large
305+
# Ernie large
294306
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie_Large")
295307
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_large_model.tar.gz" af7715245ed32cc77374625d4c80f7ef "Ernie_large_data.txt.tar.gz" edb2113eec93783cad56ed76d47ba57f)
296308
download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz" 1facda98eef1085dc9d435ebf3f23a73)
@@ -426,7 +438,7 @@ if(WITH_MKLDNN)
426438
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
427439
set(INT8_VGG16_MODEL_DIR "${INT8_DATA_DIR}/vgg16")
428440
download_int8_data_without_verify(${INT8_VGG16_MODEL_DIR} "VGG16_int8_model.tar.gz" )
429-
# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
441+
# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
430442

431443
# vgg19 int8
432444
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
@@ -730,6 +742,7 @@ set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
730742
set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 120)
731743
set_tests_properties(test_analyzer_ner PROPERTIES TIMEOUT 120)
732744
set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
745+
set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120)
733746
set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120)
734747
set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120)
735748
set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/inference/tests/api/analyzer_ernie_tester.h"
16+
17+
namespace paddle {
18+
namespace inference {
19+
20+
using paddle::PaddleTensor;
21+
22+
#ifdef PADDLE_WITH_MKLDNN
23+
void SetInt8Config(AnalysisConfig *cfg,
24+
std::vector<paddle::PaddleTensor> data) {
25+
cfg->SetModel(FLAGS_infer_model);
26+
cfg->EnableMKLDNN();
27+
cfg->EnableMkldnnQuantizer();
28+
auto warmup_data = std::make_shared<std::vector<PaddleTensor>>(data);
29+
cfg->mkldnn_quantizer_config()->SetWarmupData(warmup_data);
30+
cfg->mkldnn_quantizer_config()->SetWarmupBatchSize(FLAGS_batch_size);
31+
cfg->SwitchSpecifyInputNames();
32+
cfg->SwitchIrOptim();
33+
cfg->SetCpuMathLibraryNumThreads(FLAGS_cpu_num_threads);
34+
}
35+
36+
// Compare result of NativeConfig and AnalysisConfig
37+
void compare_int8(bool use_mkldnn = false) {
38+
std::vector<std::vector<PaddleTensor>> inputs;
39+
LoadInputData(&inputs);
40+
41+
AnalysisConfig cfg;
42+
SetInt8Config(&cfg, inputs[0]);
43+
44+
CompareNativeAndAnalysis(
45+
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
46+
}
47+
48+
TEST(Analyzer_ernie, compare_int8_mkldnn) {
49+
compare_int8(true /* use_mkldnn */);
50+
}
51+
#endif
52+
53+
} // namespace inference
54+
} // namespace paddle

0 commit comments

Comments
 (0)