PaddlePaddle
diff --git a/‎paddle/fluid/inference/api/paddle_pass_builder.cc‎
Lines changed: 9 additions & 9 deletions b/‎paddle/fluid/inference/api/paddle_pass_builder.cc‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 13 additions & 13 deletions b/‎paddle/fluid/inference/tensorrt/op_teller.cc‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py‎
Lines changed: 38 additions & 0 deletions b/‎python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py‎
Lines changed: 38 additions & 0 deletions
@@ -198,15 +198,15 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
  // "embedding_fc_lstm_fuse_pass", //
  // TODO(wilber): fix correctness problem.
  // "fc_lstm_fuse_pass", //
- "mul_lstm_fuse_pass",  //
- "fc_gru_fuse_pass",  //
- "mul_gru_fuse_pass",  //
- "seq_concat_fc_fuse_pass",  //
- "squeeze2_matmul_fuse_pass",  //
- "reshape2_matmul_fuse_pass",  //
- "flatten2_matmul_fuse_pass",  //
- "map_matmul_v2_to_mul_pass",  //
- "map_matmul_v2_to_matmul_pass", //
+ "mul_lstm_fuse_pass", //
+ "fc_gru_fuse_pass", //
+ "mul_gru_fuse_pass", //
+ "seq_concat_fc_fuse_pass", //
+ "squeeze2_matmul_fuse_pass", //
+ "reshape2_matmul_fuse_pass", //
+ "flatten2_matmul_fuse_pass", //
+ "map_matmul_v2_to_mul_pass", //
+ // "map_matmul_v2_to_matmul_pass", //
  "map_matmul_to_mul_pass", //
  "fc_fuse_pass", //
  "repeated_fc_relu_fuse_pass", //
 
@@ -340,19 +340,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
  return false;
  }
 
- for (auto& param_name : desc.Inputs()) {
- for (auto& var_name : param_name.second) {
- auto* var_desc = block->FindVar(var_name);
- const auto shape = var_desc->GetShape();
- if (shape.size() < 3) {
- VLOG(3)
- << "matmul op dims < 3 not supported in tensorrt, but got dims "
- << shape.size() << ", so jump it.";
- return false;
- }
- }
- }
-
  // not support broadcast
  auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
  auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
@@ -371,6 +358,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
  return false;
  }
  }
+
+ for (auto& param_name : desc.Inputs()) {
+ for (auto& var_name : param_name.second) {
+ auto* var_desc = block->FindVar(var_name);
+ const auto shape = var_desc->GetShape();
+ if (shape.size() < 3) {
+ VLOG(3)
+ << "matmul op dims < 3 not supported in tensorrt, but got dims "
+ << shape.size() << ", so jump it.";
+ return false;
+ }
+ }
+ }
  }
  if (op_type == "softmax") {
  auto* block = desc.Block();
 
@@ -107,5 +107,43 @@ def set_params(self):
  self.alpha = 2.0
 
 
+class TensorRTMatMulBroadcastTest(InferencePassTest):
+ def setUp(self):
+ self.set_params()
+ place = fluid.CPUPlace()
+ with fluid.program_guard(self.main_program, self.startup_program):
+ data_x = fluid.data(
+ name="data_x", shape=[-1, 6, 24], dtype="float32")
+ data_y = fluid.data(name="data_y", shape=[24, 16], dtype="float32")
+ matmul_out = fluid.layers.matmul(
+ x=data_x,
+ y=data_y,
+ transpose_x=self.transpose_x,
+ transpose_y=self.transpose_y,
+ alpha=self.alpha)
+ out = fluid.layers.batch_norm(matmul_out, is_test=True)
+
+ self.feeds = {
+ "data_x": np.ones([2, 6, 24]).astype("float32"),
+ "data_y": np.ones([24, 16]).astype("float32")
+ }
+ self.enable_trt = True
+ self.trt_parameters = TensorRTMatMulBroadcastTest.TensorRTParam(
+ 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
+ self.fetch_list = [out]
+
+ def set_params(self):
+ self.transpose_x = False
+ self.transpose_y = False
+ self.alpha = 1.0
+
+ def test_check_output(self):
+ if core.is_compiled_with_cuda():
+ use_gpu = True
+ self.check_output_with_option(use_gpu)
+ self.assertTrue(
+ PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+
+
 if __name__ == "__main__":
  unittest.main()