1- /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
1+ /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
22
33Licensed under the Apache License, Version 2.0 (the "License");
44you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
1313limitations under the License. */
1414
1515#include " paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+ #include " paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h"
1617
1718namespace paddle {
1819namespace framework {
@@ -35,99 +36,149 @@ class MatMulOpConverter : public OpConverter {
3536 public:
3637 void operator ()(const framework::proto::OpDesc& op,
3738 const framework::Scope& scope, bool test_mode) override {
38- VLOG (3 ) << " convert a fluid matmul op to tensorrt mul layer without bias" ;
39-
39+ VLOG (3 ) << " convert a fluid matmul op to tensorrt matmul layer " ;
4040 framework::OpDesc op_desc (op, nullptr );
41+ nvinfer1::ILayer* layer = nullptr ;
42+
4143 // Declare inputs
4244 auto * input1 = engine_->GetITensor (op_desc.Input (" X" )[0 ]);
4345 auto * input2 = engine_->GetITensor (op_desc.Input (" Y" )[0 ]);
4446
47+ nvinfer1::Dims dims_x = input1->getDimensions ();
48+ nvinfer1::Dims dims_y = input2->getDimensions ();
49+
4550 bool transpose_X = BOOST_GET_CONST (bool , op_desc.GetAttr (" transpose_X" ));
4651 bool transpose_Y = BOOST_GET_CONST (bool , op_desc.GetAttr (" transpose_Y" ));
4752
53+ auto output_name = op_desc.Output (" Out" )[0 ];
54+ float alpha = 1 ;
55+ if (op_desc.HasAttr (" alpha" )) {
56+ float alpha_tem = BOOST_GET_CONST (float , op_desc.GetAttr (" alpha" ));
57+ alpha = alpha_tem;
58+ }
4859 nvinfer1::MatrixOperation matrix_operation_X =
4960 transpose_X ? nvinfer1::MatrixOperation::kTRANSPOSE
5061 : nvinfer1::MatrixOperation::kNONE ;
5162 nvinfer1::MatrixOperation matrix_operation_Y =
5263 transpose_Y ? nvinfer1::MatrixOperation::kTRANSPOSE
5364 : nvinfer1::MatrixOperation::kNONE ;
5465
55- auto * layer =
56- TRT_ENGINE_ADD_LAYER (engine_, MatrixMultiply, *input1,
57- matrix_operation_X, *input2, matrix_operation_Y);
58-
59- float alpha = BOOST_GET_CONST (float , op_desc.GetAttr (" alpha" ));
60- auto output_name = op_desc.Output (" Out" )[0 ];
61- if (fabs (alpha - 1.0 ) < std::numeric_limits<float >::epsilon ()) {
62- engine_->SetITensor (output_name, layer->getOutput (0 ));
63- } else {
64- // IScaleLayer requires the input must have at least
65- // three dimensions in static shape mode and at least
66- // four dimensions in dynamic shape mode.
67- auto * matmul_out = layer->getOutput (0 );
68- nvinfer1::Dims out_shape = matmul_out->getDimensions ();
69- const int out_dims = out_shape.nbDims ;
70- bool need_change_dim = false ;
71-
66+ if (op_desc.HasAttr (" support_int8" ) &&
67+ engine_->precision () == AnalysisConfig::Precision::kInt8 ) {
7268 if (engine_->with_dynamic_shape ()) {
73- if (out_dims == 3 ) {
74- need_change_dim = true ;
75- }
69+ VLOG (3 ) << " Convert a fluid matmul_op_int8_dynamic to TensorRT "
70+ " MatmulPluginLayer" ;
71+ plugin::MatmulPluginDynamic* plugin =
72+ new plugin::MatmulPluginDynamic (transpose_X, transpose_Y, alpha);
73+ std::vector<nvinfer1::ITensor*> inputs{input1, input2};
74+ layer = engine_->AddDynamicPlugin (inputs.data (), inputs.size (), plugin);
75+ RreplenishLayerAndOutput (layer, " matmul_op_int8_dynamic" , {output_name},
76+ test_mode);
7677 } else {
77- if (out_dims == 2 ) {
78- need_change_dim = true ;
79- }
78+ VLOG (3 ) << " Convert a fluid matmul_op_int8_static to TensorRT "
79+ " MatmulPluginLayer" ;
80+ plugin::MatmulPlugin* plugin = new plugin::MatmulPlugin (
81+ dims_x, dims_y, transpose_X, transpose_Y, alpha);
82+ std::vector<nvinfer1::ITensor*> inputs{input1, input2};
83+ layer = engine_->AddPluginV2IOExt (inputs.data (), inputs.size (), plugin);
84+ RreplenishLayerAndOutput (layer, " matmul_op_int8_static" , {output_name},
85+ test_mode);
8086 }
81-
82- if (need_change_dim) {
83- nvinfer1::Dims reshape_dim;
84- reshape_dim.nbDims = out_dims + 1 ;
85- reshape_dim.d [out_dims] = 1 ;
86- for (int i = 0 ; i < out_dims; i++) {
87- reshape_dim.d [i] = out_shape.d [i];
87+ } else {
88+ VLOG (3 ) << " Convert a fluid matmul_op_float to TensorRT " ;
89+ layer =
90+ TRT_ENGINE_ADD_LAYER (engine_, MatrixMultiply, *input1,
91+ matrix_operation_X, *input2, matrix_operation_Y);
92+ if (alpha == 1 ) {
93+ RreplenishLayerAndOutput (layer, " matmul_op_float_no_alpha" ,
94+ {output_name}, test_mode);
95+ } else {
96+ layer->setName (
97+ (" matmul_op_float_has_alpha: MatrixMultiplyLayer (Output: " +
98+ output_name + " )" )
99+ .c_str ());
100+ // IScaleLayer requires the input must have at least
101+ // three dimensions in static shape mode and at least
102+ // four dimensions in dynamic shape mode.
103+ auto * matmul_out = layer->getOutput (0 );
104+ nvinfer1::Dims out_shape = matmul_out->getDimensions ();
105+ const int out_dims = out_shape.nbDims ;
106+ bool need_change_dim = false ;
107+
108+ if (engine_->with_dynamic_shape ()) {
109+ if (out_dims == 3 ) {
110+ need_change_dim = true ;
111+ }
112+ } else {
113+ if (out_dims == 2 ) {
114+ need_change_dim = true ;
115+ }
88116 }
89117
90- auto * reshape_layer =
91- TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *matmul_out);
92- reshape_layer->setReshapeDimensions (reshape_dim);
93- matmul_out = reshape_layer->getOutput (0 );
94- }
118+ if (need_change_dim) {
119+ nvinfer1::Dims reshape_dim;
120+ reshape_dim.nbDims = out_dims + 1 ;
121+ reshape_dim.d [out_dims] = 1 ;
122+ for (int i = 0 ; i < out_dims; i++) {
123+ reshape_dim.d [i] = out_shape.d [i];
124+ }
125+
126+ auto * reshape_layer =
127+ TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *matmul_out);
128+ reshape_layer->setReshapeDimensions (reshape_dim);
129+ matmul_out = reshape_layer->getOutput (0 );
130+ reshape_layer->setName ((" matmul_op_float_has_alpha_reshape_before: "
131+ " ShuffleLayer (Output: " +
132+ output_name + " )" )
133+ .c_str ());
134+ }
95135
96- auto create_weights = [&](float data, const std::string& type) -> float * {
97- std::unique_ptr<framework::Tensor> tmp_tensor (new framework::Tensor ());
98- tmp_tensor->Resize ({1 });
99- auto * tmp_data = tmp_tensor->mutable_data <float >(platform::CPUPlace ());
100- tmp_data[0 ] = data;
101- engine_->SetWeights (output_name + " _add_scale_op_" + type,
102- std::move (tmp_tensor));
103- return tmp_data;
104- };
105- float * alpha_data = create_weights (alpha, " alpha" );
106- float * shift_data = create_weights (0.0 , " shift" );
107- float * power_data = create_weights (1.0 , " power" );
108- TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT ,
109- static_cast <void *>(alpha_data), 1 };
110- TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT ,
111- static_cast <void *>(shift_data), 1 };
112- TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT ,
113- static_cast <void *>(power_data), 1 };
114- auto * scale_layer = TRT_ENGINE_ADD_LAYER (
115- engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM ,
116- nv_shift.get (), nv_alpha.get (), nv_power.get ());
117- auto * scale_out = scale_layer->getOutput (0 );
118-
119- if (need_change_dim) {
120- auto * reshape_layer =
121- TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *scale_out);
122- reshape_layer->setReshapeDimensions (out_shape);
123- scale_out = reshape_layer->getOutput (0 );
136+ auto create_weights = [&](float data,
137+ const std::string& type) -> float * {
138+ std::unique_ptr<framework::Tensor> tmp_tensor (
139+ new framework::Tensor ());
140+ tmp_tensor->Resize ({1 });
141+ auto * tmp_data =
142+ tmp_tensor->mutable_data <float >(platform::CPUPlace ());
143+ tmp_data[0 ] = data;
144+ engine_->SetWeights (output_name + " _add_scale_op_" + type,
145+ std::move (tmp_tensor));
146+ return tmp_data;
147+ };
148+ float * alpha_data = create_weights (alpha, " alpha" );
149+ float * shift_data = create_weights (0.0 , " shift" );
150+ float * power_data = create_weights (1.0 , " power" );
151+ TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT ,
152+ static_cast <void *>(alpha_data), 1 };
153+ TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT ,
154+ static_cast <void *>(shift_data), 1 };
155+ TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT ,
156+ static_cast <void *>(power_data), 1 };
157+ auto * scale_layer = TRT_ENGINE_ADD_LAYER (
158+ engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM ,
159+ nv_shift.get (), nv_alpha.get (), nv_power.get ());
160+ auto * scale_out = scale_layer->getOutput (0 );
161+ scale_layer->setName (
162+ (" matmul_op_float_has_alpha: ScaleLayer (Output: " + output_name +
163+ " )" )
164+ .c_str ());
165+
166+ if (need_change_dim) {
167+ auto * reshape_layer =
168+ TRT_ENGINE_ADD_LAYER (engine_, Shuffle, *scale_out);
169+ reshape_layer->setReshapeDimensions (out_shape);
170+ scale_out = reshape_layer->getOutput (0 );
171+ reshape_layer->setName ((" matmul_op_float_has_alpha_reshape_after: "
172+ " ShuffleLayer (Output: " +
173+ output_name + " )" )
174+ .c_str ());
175+ }
176+ engine_->SetITensor (output_name, scale_out);
177+ if (test_mode) { // the test framework can not determine which is the
178+ // output, so place the declaration inside.
179+ engine_->DeclareOutput (output_name);
180+ }
124181 }
125-
126- engine_->SetITensor (output_name, scale_out);
127- }
128- if (test_mode) { // the test framework can not determine which is the
129- // output, so place the declaration inside.
130- engine_->DeclareOutput (output_name);
131182 }
132183 }
133184};
0 commit comments