Skip to content

Commit 1251bf0

Browse files
committed
Add TensorRT inspector into Paddle-TRT
1 parent 87f4a68 commit 1251bf0

File tree

14 files changed

+143
-17
lines changed

14 files changed

+143
-17
lines changed

AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,4 @@
8383
| jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
8484
| mingxu1067 | Ming Huang (NVIDIA) |
8585
| zlsh80826 | Reese Wang (NVIDIA) |
86+
| leo0519 | Leo Chen (NVIDIA) |

paddle/fluid/inference/analysis/argument.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ struct Argument {
219219
bool);
220220
DECL_ARGUMENT_FIELD(tensorrt_allow_build_at_runtime,
221221
TensorRtAllowBuildAtRuntime, bool);
222+
DECL_ARGUMENT_FIELD(tensorrt_use_inspector, TensorRtUseInspector, bool);
222223

223224
DECL_ARGUMENT_FIELD(use_dlnne, UseDlnne, bool);
224225
DECL_ARGUMENT_FIELD(dlnne_min_subgraph_size, DlnneMinSubgraphSize, int);

paddle/fluid/inference/analysis/ir_pass_manager.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ void IRPassManager::CreatePasses(Argument *argument,
156156
pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
157157
pass->Set("use_static_engine", new bool(use_static_engine));
158158
pass->Set("model_from_memory", new bool(argument->model_from_memory()));
159+
pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector()));
159160

160161
// tuned trt dynamic_shape
161162
pass->Set("trt_shape_range_info_path",

paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
265265
op_desc->SetAttr("parameters", params);
266266
op_desc->SetAttr("allow_build_at_runtime", allow_build_at_runtime);
267267
op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
268+
op_desc->SetAttr("use_inspector", Get<bool>("use_inspector"));
268269

269270
// we record all inputs' shapes in attr to check if they are consistent
270271
// with the real inputs' shapes retrieved from scope when trt runs.
@@ -375,6 +376,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
375376
trt_engine->SetWithInterleaved(Get<bool>("with_interleaved"));
376377
trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
377378
trt_engine->SetDLACore(Get<int>("trt_dla_core"));
379+
trt_engine->SetUseInspector(Get<bool>("use_inspector"));
378380

379381
trt_engine->SetWithErnie(
380382
graph->Has(framework::ir::kEmbEltwiseLayernormPass) &&

paddle/fluid/inference/api/analysis_config.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
194194
CP_MEMBER(trt_allow_build_at_runtime_);
195195
CP_MEMBER(collect_shape_range_info_);
196196
CP_MEMBER(shape_range_info_path_);
197+
CP_MEMBER(trt_use_inspector_);
197198
// Dlnne related
198199
CP_MEMBER(use_dlnne_);
199200
CP_MEMBER(dlnne_min_subgraph_size_);
@@ -427,6 +428,8 @@ void AnalysisConfig::EnableTensorRtDLA(int dla_core) {
427428
trt_dla_core_ = dla_core;
428429
}
429430

431+
void AnalysisConfig::EnableTensorRtInspector() { trt_use_inspector_ = true; }
432+
430433
void AnalysisConfig::Exp_DisableTensorRtOPs(
431434
const std::vector<std::string> &ops) {
432435
trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ void AnalysisPredictor::PrepareArgument() {
615615
config_.tuned_tensorrt_dynamic_shape());
616616
argument_.SetTensorRtAllowBuildAtRuntime(
617617
config_.trt_allow_build_at_runtime());
618+
argument_.SetTensorRtUseInspector(config_.trt_use_inspector_);
618619
}
619620

620621
if (config_.dlnne_enabled()) {

paddle/fluid/inference/api/paddle_analysis_config.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,9 @@ struct PD_INFER_DECL AnalysisConfig {
521521
///
522522
bool tensorrt_dla_enabled() { return trt_use_dla_; }
523523

524+
void EnableTensorRtInspector();
525+
bool tensorrt_inspector_enabled() { return trt_use_inspector_; }
526+
524527
void EnableDlnne(int min_subgraph_size = 3);
525528
bool dlnne_enabled() const { return use_dlnne_; }
526529

@@ -807,6 +810,7 @@ struct PD_INFER_DECL AnalysisConfig {
807810
bool trt_allow_build_at_runtime_{false};
808811
// tune to get dynamic_shape info.
809812
bool trt_tuned_dynamic_shape_{false};
813+
bool trt_use_inspector_{false};
810814

811815
// In CollectShapeInfo mode, we will collect the shape information of
812816
// all intermediate tensors in the compute graph and calculate the

paddle/fluid/inference/tensorrt/engine.cc

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
5757
} else {
5858
#if IS_TRT_VERSION_GE(6000)
5959
infer_context->enqueueV2(buffers->data(), stream, nullptr);
60-
GetEngineInfo();
6160
#endif
6261
}
6362
SetRuntimeBatch(batch_size);
@@ -244,8 +243,10 @@ void TensorRTEngine::FreezeNetwork() {
244243
#endif
245244
}
246245
#if IS_TRT_VERSION_GE(8200)
247-
infer_builder_config_->setProfilingVerbosity(
248-
nvinfer1::ProfilingVerbosity::kDETAILED);
246+
if (use_inspector_) {
247+
infer_builder_config_->setProfilingVerbosity(
248+
nvinfer1::ProfilingVerbosity::kDETAILED);
249+
}
249250
#endif
250251

251252
#if IS_TRT_VERSION_LT(8000)
@@ -411,6 +412,21 @@ void TensorRTEngine::freshDeviceId() {
411412
platform::SetDeviceId(device_id_);
412413
}
413414

415+
void TensorRTEngine::GetEngineInfo() {
416+
#if IS_TRT_VERSION_GE(8200)
417+
LOG(INFO) << "====== engine info ======";
418+
std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
419+
infer_engine_->createEngineInspector());
420+
auto infer_context = context();
421+
infer_inspector->setExecutionContext(infer_context);
422+
LOG(INFO) << infer_inspector->getEngineInformation(
423+
nvinfer1::LayerInformationFormat::kONELINE);
424+
LOG(INFO) << "====== engine info end ======";
425+
#else
426+
LOG(INFO) << "Inspector needs TensorRT version 8.2 and after.";
427+
#endif
428+
}
429+
414430
} // namespace tensorrt
415431
} // namespace inference
416432
} // namespace paddle

paddle/fluid/inference/tensorrt/engine.h

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -580,17 +580,10 @@ class TensorRTEngine {
580580
}
581581

582582
void SetProfileNum(int num) { max_profile_num_ = num; }
583-
void GetEngineInfo() {
584-
#if IS_TRT_VERSION_GE(8200)
585-
std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
586-
infer_engine_->createEngineInspector());
587-
infer_inspector->setExecutionContext(context());
588-
VLOG(3) << infer_inspector->getEngineInformation(
589-
nvinfer1::LayerInformationFormat::kJSON);
590-
#else
591-
VLOG(3) << "Inspector needs TensorRT version 8.2 and after.";
592-
#endif
593-
}
583+
584+
void GetEngineInfo();
585+
586+
void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
594587

595588
private:
596589
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
@@ -664,6 +657,7 @@ class TensorRTEngine {
664657
std::vector<std::unique_ptr<plugin::DynamicPluginTensorRT>> owned_pluginv2_;
665658
#endif
666659
std::mutex mutex_;
660+
bool use_inspector_;
667661
}; // class TensorRTEngine
668662

669663
// Add a layer__ into engine__ with args ARGS.

paddle/fluid/operators/tensorrt/tensorrt_engine_op.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
140140
bool enable_int8_;
141141
bool enable_fp16_;
142142
bool use_calib_mode_;
143+
bool use_inspector_;
143144
std::string calibration_data_;
144145
std::string engine_key_;
145146
std::string calibration_engine_key_;
@@ -175,6 +176,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
175176
shape_range_info_path_ = Attr<std::string>("shape_range_info_path");
176177
allow_build_at_runtime_ = Attr<bool>("allow_build_at_runtime");
177178
use_static_engine_ = Attr<bool>("use_static_engine");
179+
use_inspector_ = HasAttr("use_inspector") && Attr<bool>("use_inspector");
178180
if (use_static_engine_) {
179181
model_opt_cache_dir_ = Attr<std::string>("model_opt_cache_dir");
180182
}
@@ -285,6 +287,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
285287
return;
286288
}
287289
auto *trt_engine = GetEngine(scope, dev_place);
290+
if (use_inspector_) {
291+
trt_engine->GetEngineInfo();
292+
}
288293
if (trt_engine->with_dynamic_shape()) {
289294
// get runtime input shapes.
290295
std::map<std::string, std::vector<int32_t>> runtime_input_shape;
@@ -331,7 +336,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
331336
anc = &scope;
332337
}
333338
PrepareTRTEngine(*anc, trt_engine);
334-
335339
// update shape_range_info_pbtxt
336340
if (!shape_range_info_path_.empty()) {
337341
inference::UpdateShapeRangeInfo(

0 commit comments

Comments
 (0)