PaddlePaddle
diff --git a/‎AUTHORS.md‎
Lines changed: 1 addition & 0 deletions b/‎AUTHORS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/analysis/argument.h‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/analysis/argument.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/analysis/ir_pass_manager.cc‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/analysis/ir_pass_manager.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc‎
Lines changed: 2 additions & 0 deletions b/‎paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/analysis_config.cc‎
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/inference/api/analysis_config.cc‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 1 addition & 0 deletions b/‎paddle/fluid/inference/api/analysis_predictor.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎paddle/fluid/inference/api/paddle_analysis_config.h‎
Lines changed: 4 additions & 0 deletions b/‎paddle/fluid/inference/api/paddle_analysis_config.h‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/engine.cc‎
Lines changed: 19 additions & 3 deletions b/‎paddle/fluid/inference/tensorrt/engine.cc‎
Lines changed: 19 additions & 3 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/engine.h‎
Lines changed: 5 additions & 11 deletions b/‎paddle/fluid/inference/tensorrt/engine.h‎
Lines changed: 5 additions & 11 deletions
diff --git a/‎paddle/fluid/operators/tensorrt/tensorrt_engine_op.h‎
Lines changed: 5 additions & 1 deletion b/‎paddle/fluid/operators/tensorrt/tensorrt_engine_op.h‎
Lines changed: 5 additions & 1 deletion
@@ -83,3 +83,4 @@
 | jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
 | mingxu1067 | Ming Huang (NVIDIA) |
 | zlsh80826 | Reese Wang (NVIDIA) |
+| leo0519 | Leo Chen (NVIDIA) |
@@ -219,6 +219,7 @@ struct Argument {
  bool);
  DECL_ARGUMENT_FIELD(tensorrt_allow_build_at_runtime,
  TensorRtAllowBuildAtRuntime, bool);
+ DECL_ARGUMENT_FIELD(tensorrt_use_inspector, TensorRtUseInspector, bool);
 
  DECL_ARGUMENT_FIELD(use_dlnne, UseDlnne, bool);
  DECL_ARGUMENT_FIELD(dlnne_min_subgraph_size, DlnneMinSubgraphSize, int);
 
@@ -156,6 +156,7 @@ void IRPassManager::CreatePasses(Argument *argument,
  pass->Set("gpu_device_id", new int(argument->gpu_device_id()));
  pass->Set("use_static_engine", new bool(use_static_engine));
  pass->Set("model_from_memory", new bool(argument->model_from_memory()));
+ pass->Set("use_inspector", new bool(argument->tensorrt_use_inspector()));
 
  // tuned trt dynamic_shape
  pass->Set("trt_shape_range_info_path",
 
@@ -265,6 +265,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  op_desc->SetAttr("parameters", params);
  op_desc->SetAttr("allow_build_at_runtime", allow_build_at_runtime);
  op_desc->SetAttr("shape_range_info_path", shape_range_info_path);
+ op_desc->SetAttr("use_inspector", Get<bool>("use_inspector"));
 
  // we record all inputs' shapes in attr to check if they are consistent
  // with the real inputs' shapes retrieved from scope when trt runs.
@@ -375,6 +376,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  trt_engine->SetWithInterleaved(Get<bool>("with_interleaved"));
  trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
  trt_engine->SetDLACore(Get<int>("trt_dla_core"));
+ trt_engine->SetUseInspector(Get<bool>("use_inspector"));
 
  trt_engine->SetWithErnie(
  graph->Has(framework::ir::kEmbEltwiseLayernormPass) &&
 
@@ -194,6 +194,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
  CP_MEMBER(trt_allow_build_at_runtime_);
  CP_MEMBER(collect_shape_range_info_);
  CP_MEMBER(shape_range_info_path_);
+ CP_MEMBER(trt_use_inspector_);
  // Dlnne related
  CP_MEMBER(use_dlnne_);
  CP_MEMBER(dlnne_min_subgraph_size_);
@@ -427,6 +428,8 @@ void AnalysisConfig::EnableTensorRtDLA(int dla_core) {
  trt_dla_core_ = dla_core;
 }
 
+void AnalysisConfig::EnableTensorRtInspector() { trt_use_inspector_ = true; }
+
 void AnalysisConfig::Exp_DisableTensorRtOPs(
  const std::vector<std::string> &ops) {
  trt_disabled_ops_.insert(trt_disabled_ops_.end(), ops.begin(), ops.end());
 
@@ -615,6 +615,7 @@ void AnalysisPredictor::PrepareArgument() {
  config_.tuned_tensorrt_dynamic_shape());
  argument_.SetTensorRtAllowBuildAtRuntime(
  config_.trt_allow_build_at_runtime());
+ argument_.SetTensorRtUseInspector(config_.trt_use_inspector_);
  }
 
  if (config_.dlnne_enabled()) {
 
@@ -521,6 +521,9 @@ struct PD_INFER_DECL AnalysisConfig {
  ///
  bool tensorrt_dla_enabled() { return trt_use_dla_; }
 
+ void EnableTensorRtInspector();
+ bool tensorrt_inspector_enabled() { return trt_use_inspector_; }
+
  void EnableDlnne(int min_subgraph_size = 3);
  bool dlnne_enabled() const { return use_dlnne_; }
 
@@ -807,6 +810,7 @@ struct PD_INFER_DECL AnalysisConfig {
  bool trt_allow_build_at_runtime_{false};
  // tune to get dynamic_shape info.
  bool trt_tuned_dynamic_shape_{false};
+ bool trt_use_inspector_{false};
 
  // In CollectShapeInfo mode, we will collect the shape information of
  // all intermediate tensors in the compute graph and calculate the
 
@@ -57,7 +57,6 @@ void TensorRTEngine::Execute(int batch_size, std::vector<void *> *buffers,
  } else {
 #if IS_TRT_VERSION_GE(6000)
  infer_context->enqueueV2(buffers->data(), stream, nullptr);
- GetEngineInfo();
 #endif
  }
  SetRuntimeBatch(batch_size);
@@ -244,8 +243,10 @@ void TensorRTEngine::FreezeNetwork() {
 #endif
  }
 #if IS_TRT_VERSION_GE(8200)
- infer_builder_config_->setProfilingVerbosity(
- nvinfer1::ProfilingVerbosity::kDETAILED);
+ if (use_inspector_) {
+ infer_builder_config_->setProfilingVerbosity(
+ nvinfer1::ProfilingVerbosity::kDETAILED);
+ }
 #endif
 
 #if IS_TRT_VERSION_LT(8000)
@@ -411,6 +412,21 @@ void TensorRTEngine::freshDeviceId() {
  platform::SetDeviceId(device_id_);
 }
 
+void TensorRTEngine::GetEngineInfo() {
+#if IS_TRT_VERSION_GE(8200)
+ LOG(INFO) << "====== engine info ======";
+ std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
+ infer_engine_->createEngineInspector());
+ auto infer_context = context();
+ infer_inspector->setExecutionContext(infer_context);
+ LOG(INFO) << infer_inspector->getEngineInformation(
+ nvinfer1::LayerInformationFormat::kONELINE);
+ LOG(INFO) << "====== engine info end ======";
+#else
+ LOG(INFO) << "Inspector needs TensorRT version 8.2 and after.";
+#endif
+}
+
 } // namespace tensorrt
 } // namespace inference
 } // namespace paddle
@@ -580,17 +580,10 @@ class TensorRTEngine {
  }
 
  void SetProfileNum(int num) { max_profile_num_ = num; }
- void GetEngineInfo() {
-#if IS_TRT_VERSION_GE(8200)
- std::unique_ptr<nvinfer1::IEngineInspector> infer_inspector(
- infer_engine_->createEngineInspector());
- infer_inspector->setExecutionContext(context());
- VLOG(3) << infer_inspector->getEngineInformation(
- nvinfer1::LayerInformationFormat::kJSON);
-#else
- VLOG(3) << "Inspector needs TensorRT version 8.2 and after.";
-#endif
- }
+
+ void GetEngineInfo();
+
+ void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
 
  private:
  // Each ICudaEngine object is bound to a specific GPU when it is instantiated,
@@ -664,6 +657,7 @@ class TensorRTEngine {
  std::vector<std::unique_ptr<plugin::DynamicPluginTensorRT>> owned_pluginv2_;
 #endif
  std::mutex mutex_;
+ bool use_inspector_;
 }; // class TensorRTEngine
 
 // Add a layer__ into engine__ with args ARGS.
 
@@ -140,6 +140,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
  bool enable_int8_;
  bool enable_fp16_;
  bool use_calib_mode_;
+ bool use_inspector_;
  std::string calibration_data_;
  std::string engine_key_;
  std::string calibration_engine_key_;
@@ -175,6 +176,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
  shape_range_info_path_ = Attr<std::string>("shape_range_info_path");
  allow_build_at_runtime_ = Attr<bool>("allow_build_at_runtime");
  use_static_engine_ = Attr<bool>("use_static_engine");
+ use_inspector_ = HasAttr("use_inspector") && Attr<bool>("use_inspector");
  if (use_static_engine_) {
  model_opt_cache_dir_ = Attr<std::string>("model_opt_cache_dir");
  }
@@ -285,6 +287,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
  return;
  }
  auto *trt_engine = GetEngine(scope, dev_place);
+ if (use_inspector_) {
+ trt_engine->GetEngineInfo();
+ }
  if (trt_engine->with_dynamic_shape()) {
  // get runtime input shapes.
  std::map<std::string, std::vector<int32_t>> runtime_input_shape;
@@ -331,7 +336,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
  anc = &scope;
  }
  PrepareTRTEngine(*anc, trt_engine);
-
  // update shape_range_info_pbtxt
  if (!shape_range_info_path_.empty()) {
  inference::UpdateShapeRangeInfo(
Original file line number	Diff line number	Diff line change
`@@ -615,6 +615,7 @@ void AnalysisPredictor::PrepareArgument() {`
`615`	`615`	`config_.tuned_tensorrt_dynamic_shape());`
`616`	`616`	`argument_.SetTensorRtAllowBuildAtRuntime(`
`617`	`617`	`config_.trt_allow_build_at_runtime());`
	`618`	`+ argument_.SetTensorRtUseInspector(config_.trt_use_inspector_);`
`618`	`619`	`}`
`619`	`620`
`620`	`621`	`if (config_.dlnne_enabled()) {`