Skip to content

Commit 1659079

Browse files
authored
[Paddle-Inference] Matmul_int8_convert: tensor*tensor (#37285)
* matmul_convert_int8 * matmul_convert_int8 * matmulconvert_int8 * Matmul_int8_convert: tensor*tensor * Matmul_int8_convert: tensor*tensor * Matmul_int8_convert: tensor*tensor
1 parent 025053b commit 1659079

File tree

11 files changed

+1810
-89
lines changed

11 files changed

+1810
-89
lines changed

paddle/fluid/inference/tensorrt/convert/matmul_op.cc

Lines changed: 123 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
1+
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

1515
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
16+
#include "paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h"
1617

1718
namespace paddle {
1819
namespace framework {
@@ -35,99 +36,149 @@ class MatMulOpConverter : public OpConverter {
3536
public:
3637
void operator()(const framework::proto::OpDesc& op,
3738
const framework::Scope& scope, bool test_mode) override {
38-
VLOG(3) << "convert a fluid matmul op to tensorrt mul layer without bias";
39-
39+
VLOG(3) << "convert a fluid matmul op to tensorrt matmul layer ";
4040
framework::OpDesc op_desc(op, nullptr);
41+
nvinfer1::ILayer* layer = nullptr;
42+
4143
// Declare inputs
4244
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
4345
auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]);
4446

47+
nvinfer1::Dims dims_x = input1->getDimensions();
48+
nvinfer1::Dims dims_y = input2->getDimensions();
49+
4550
bool transpose_X = BOOST_GET_CONST(bool, op_desc.GetAttr("transpose_X"));
4651
bool transpose_Y = BOOST_GET_CONST(bool, op_desc.GetAttr("transpose_Y"));
4752

53+
auto output_name = op_desc.Output("Out")[0];
54+
float alpha = 1;
55+
if (op_desc.HasAttr("alpha")) {
56+
float alpha_tem = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
57+
alpha = alpha_tem;
58+
}
4859
nvinfer1::MatrixOperation matrix_operation_X =
4960
transpose_X ? nvinfer1::MatrixOperation::kTRANSPOSE
5061
: nvinfer1::MatrixOperation::kNONE;
5162
nvinfer1::MatrixOperation matrix_operation_Y =
5263
transpose_Y ? nvinfer1::MatrixOperation::kTRANSPOSE
5364
: nvinfer1::MatrixOperation::kNONE;
5465

55-
auto* layer =
56-
TRT_ENGINE_ADD_LAYER(engine_, MatrixMultiply, *input1,
57-
matrix_operation_X, *input2, matrix_operation_Y);
58-
59-
float alpha = BOOST_GET_CONST(float, op_desc.GetAttr("alpha"));
60-
auto output_name = op_desc.Output("Out")[0];
61-
if (fabs(alpha - 1.0) < std::numeric_limits<float>::epsilon()) {
62-
engine_->SetITensor(output_name, layer->getOutput(0));
63-
} else {
64-
// IScaleLayer requires the input must have at least
65-
// three dimensions in static shape mode and at least
66-
// four dimensions in dynamic shape mode.
67-
auto* matmul_out = layer->getOutput(0);
68-
nvinfer1::Dims out_shape = matmul_out->getDimensions();
69-
const int out_dims = out_shape.nbDims;
70-
bool need_change_dim = false;
71-
66+
if (op_desc.HasAttr("support_int8") &&
67+
engine_->precision() == AnalysisConfig::Precision::kInt8) {
7268
if (engine_->with_dynamic_shape()) {
73-
if (out_dims == 3) {
74-
need_change_dim = true;
75-
}
69+
VLOG(3) << "Convert a fluid matmul_op_int8_dynamic to TensorRT "
70+
"MatmulPluginLayer";
71+
plugin::MatmulPluginDynamic* plugin =
72+
new plugin::MatmulPluginDynamic(transpose_X, transpose_Y, alpha);
73+
std::vector<nvinfer1::ITensor*> inputs{input1, input2};
74+
layer = engine_->AddDynamicPlugin(inputs.data(), inputs.size(), plugin);
75+
RreplenishLayerAndOutput(layer, "matmul_op_int8_dynamic", {output_name},
76+
test_mode);
7677
} else {
77-
if (out_dims == 2) {
78-
need_change_dim = true;
79-
}
78+
VLOG(3) << "Convert a fluid matmul_op_int8_static to TensorRT "
79+
"MatmulPluginLayer";
80+
plugin::MatmulPlugin* plugin = new plugin::MatmulPlugin(
81+
dims_x, dims_y, transpose_X, transpose_Y, alpha);
82+
std::vector<nvinfer1::ITensor*> inputs{input1, input2};
83+
layer = engine_->AddPluginV2IOExt(inputs.data(), inputs.size(), plugin);
84+
RreplenishLayerAndOutput(layer, "matmul_op_int8_static", {output_name},
85+
test_mode);
8086
}
81-
82-
if (need_change_dim) {
83-
nvinfer1::Dims reshape_dim;
84-
reshape_dim.nbDims = out_dims + 1;
85-
reshape_dim.d[out_dims] = 1;
86-
for (int i = 0; i < out_dims; i++) {
87-
reshape_dim.d[i] = out_shape.d[i];
87+
} else {
88+
VLOG(3) << "Convert a fluid matmul_op_float to TensorRT ";
89+
layer =
90+
TRT_ENGINE_ADD_LAYER(engine_, MatrixMultiply, *input1,
91+
matrix_operation_X, *input2, matrix_operation_Y);
92+
if (alpha == 1) {
93+
RreplenishLayerAndOutput(layer, "matmul_op_float_no_alpha",
94+
{output_name}, test_mode);
95+
} else {
96+
layer->setName(
97+
("matmul_op_float_has_alpha: MatrixMultiplyLayer (Output: " +
98+
output_name + ")")
99+
.c_str());
100+
// IScaleLayer requires the input must have at least
101+
// three dimensions in static shape mode and at least
102+
// four dimensions in dynamic shape mode.
103+
auto* matmul_out = layer->getOutput(0);
104+
nvinfer1::Dims out_shape = matmul_out->getDimensions();
105+
const int out_dims = out_shape.nbDims;
106+
bool need_change_dim = false;
107+
108+
if (engine_->with_dynamic_shape()) {
109+
if (out_dims == 3) {
110+
need_change_dim = true;
111+
}
112+
} else {
113+
if (out_dims == 2) {
114+
need_change_dim = true;
115+
}
88116
}
89117

90-
auto* reshape_layer =
91-
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out);
92-
reshape_layer->setReshapeDimensions(reshape_dim);
93-
matmul_out = reshape_layer->getOutput(0);
94-
}
118+
if (need_change_dim) {
119+
nvinfer1::Dims reshape_dim;
120+
reshape_dim.nbDims = out_dims + 1;
121+
reshape_dim.d[out_dims] = 1;
122+
for (int i = 0; i < out_dims; i++) {
123+
reshape_dim.d[i] = out_shape.d[i];
124+
}
125+
126+
auto* reshape_layer =
127+
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out);
128+
reshape_layer->setReshapeDimensions(reshape_dim);
129+
matmul_out = reshape_layer->getOutput(0);
130+
reshape_layer->setName(("matmul_op_float_has_alpha_reshape_before: "
131+
"ShuffleLayer (Output: " +
132+
output_name + ")")
133+
.c_str());
134+
}
95135

96-
auto create_weights = [&](float data, const std::string& type) -> float* {
97-
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
98-
tmp_tensor->Resize({1});
99-
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
100-
tmp_data[0] = data;
101-
engine_->SetWeights(output_name + "_add_scale_op_" + type,
102-
std::move(tmp_tensor));
103-
return tmp_data;
104-
};
105-
float* alpha_data = create_weights(alpha, "alpha");
106-
float* shift_data = create_weights(0.0, "shift");
107-
float* power_data = create_weights(1.0, "power");
108-
TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT,
109-
static_cast<void*>(alpha_data), 1};
110-
TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT,
111-
static_cast<void*>(shift_data), 1};
112-
TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT,
113-
static_cast<void*>(power_data), 1};
114-
auto* scale_layer = TRT_ENGINE_ADD_LAYER(
115-
engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM,
116-
nv_shift.get(), nv_alpha.get(), nv_power.get());
117-
auto* scale_out = scale_layer->getOutput(0);
118-
119-
if (need_change_dim) {
120-
auto* reshape_layer =
121-
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out);
122-
reshape_layer->setReshapeDimensions(out_shape);
123-
scale_out = reshape_layer->getOutput(0);
136+
auto create_weights = [&](float data,
137+
const std::string& type) -> float* {
138+
std::unique_ptr<framework::Tensor> tmp_tensor(
139+
new framework::Tensor());
140+
tmp_tensor->Resize({1});
141+
auto* tmp_data =
142+
tmp_tensor->mutable_data<float>(platform::CPUPlace());
143+
tmp_data[0] = data;
144+
engine_->SetWeights(output_name + "_add_scale_op_" + type,
145+
std::move(tmp_tensor));
146+
return tmp_data;
147+
};
148+
float* alpha_data = create_weights(alpha, "alpha");
149+
float* shift_data = create_weights(0.0, "shift");
150+
float* power_data = create_weights(1.0, "power");
151+
TensorRTEngine::Weight nv_alpha{nvinfer1::DataType::kFLOAT,
152+
static_cast<void*>(alpha_data), 1};
153+
TensorRTEngine::Weight nv_shift{nvinfer1::DataType::kFLOAT,
154+
static_cast<void*>(shift_data), 1};
155+
TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT,
156+
static_cast<void*>(power_data), 1};
157+
auto* scale_layer = TRT_ENGINE_ADD_LAYER(
158+
engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM,
159+
nv_shift.get(), nv_alpha.get(), nv_power.get());
160+
auto* scale_out = scale_layer->getOutput(0);
161+
scale_layer->setName(
162+
("matmul_op_float_has_alpha: ScaleLayer (Output: " + output_name +
163+
")")
164+
.c_str());
165+
166+
if (need_change_dim) {
167+
auto* reshape_layer =
168+
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out);
169+
reshape_layer->setReshapeDimensions(out_shape);
170+
scale_out = reshape_layer->getOutput(0);
171+
reshape_layer->setName(("matmul_op_float_has_alpha_reshape_after: "
172+
"ShuffleLayer (Output: " +
173+
output_name + ")")
174+
.c_str());
175+
}
176+
engine_->SetITensor(output_name, scale_out);
177+
if (test_mode) { // the test framework can not determine which is the
178+
// output, so place the declaration inside.
179+
engine_->DeclareOutput(output_name);
180+
}
124181
}
125-
126-
engine_->SetITensor(output_name, scale_out);
127-
}
128-
if (test_mode) { // the test framework can not determine which is the
129-
// output, so place the declaration inside.
130-
engine_->DeclareOutput(output_name);
131182
}
132183
}
133184
};

paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ nv_library(tensorrt_plugin
1212
mish_op_plugin.cu
1313
pool3d_op_plugin.cu
1414
deformable_conv_op_plugin.cu
15-
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
15+
matmul_op_int8_plugin.cu
16+
DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor)
1617

1718
nv_test(test_split_plugin SRCS test_split_plugin.cc DEPS
1819
paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_plugin)

0 commit comments

Comments
 (0)