PaddlePaddle
diff --git a/‎paddle/fluid/inference/api/CMakeLists.txt‎
Lines changed: 4 additions & 2 deletions b/‎paddle/fluid/inference/api/CMakeLists.txt‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 10 additions & 4 deletions b/‎paddle/fluid/inference/tests/api/CMakeLists.txt‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎paddle/fluid/memory/allocation/allocator_facade.cc‎
Lines changed: 2 additions & 1 deletion b/‎paddle/fluid/memory/allocation/allocator_facade.cc‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎paddle/fluid/memory/cuda_managed_memory_test.cu‎
Lines changed: 3 additions & 0 deletions b/‎paddle/fluid/memory/cuda_managed_memory_test.cu‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎paddle/fluid/operators/addmm_op.cc‎
Lines changed: 7 additions & 81 deletions b/‎paddle/fluid/operators/addmm_op.cc‎
Lines changed: 7 additions & 81 deletions
diff --git a/‎paddle/fluid/operators/cholesky_op.cc‎
Lines changed: 7 additions & 21 deletions b/‎paddle/fluid/operators/cholesky_op.cc‎
Lines changed: 7 additions & 21 deletions
diff --git a/‎paddle/fluid/operators/diag_v2_op.cc‎
Lines changed: 8 additions & 88 deletions b/‎paddle/fluid/operators/diag_v2_op.cc‎
Lines changed: 8 additions & 88 deletions
@@ -56,8 +56,10 @@ cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api)
 
 if(WITH_TESTING)
  if (NOT APPLE AND NOT WIN32)
- inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
- ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
+ if (WITH_GPU)
+ inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
+ ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
+ endif()
  elseif(WIN32)
  inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
  ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
 
@@ -299,7 +299,9 @@ inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR}
 set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
 download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1)
 download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62)
-inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
+if (WITH_GPU)
+ inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
+endif()
 inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)
 
 # Ernie large
@@ -551,7 +553,9 @@ endif()
 # bert, max_len=20, embedding_dim=128
 set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128")
 download_model_and_data_without_verify(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz")
-inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
+if (WITH_GPU)
+ inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
+endif()
 
 # multiple models prediction
 set(MMP_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/multi_model_prediction")
@@ -741,13 +745,15 @@ set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ner PROPERTIES TIMEOUT 120)
-set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120)
-set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
 set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120)
+if (WITH_GPU)
+ set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
+ set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
+endif()
 if(WITH_GPU AND TENSORRT_FOUND)
  set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120)
  if(WITH_MKLDNN)
 
@@ -493,7 +493,8 @@ class AllocatorFacadePrivate {
  "support allocating managed memory.\n"
  "If you don't actually need to use managed memory, please disable "
  "it with command `export FLAGS_use_cuda_managed_memory=false`.\n"
- "Or you must use the gpu device that supports managed memory."));
+ "Or you must use the gpu device that supports managed memory.",
+ p.device));
  }
  return std::make_shared<CUDAManagedAllocator>(p);
  }
 
@@ -128,6 +128,9 @@ TEST(ManagedMemoryTest, OversubscribeGPUMemoryTest) {
 }
 
 TEST(ManagedMemoryTest, OOMExceptionTest) {
+ if (!platform::IsGPUManagedMemorySupported(0)) {
+ return;
+ }
  EXPECT_THROW(Alloc(platform::CUDAPlace(0), size_t(1) << 60),
  memory::allocation::BadAlloc);
 }
 
@@ -16,7 +16,10 @@ limitations under the License. */
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/ternary.h"
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
@@ -33,85 +36,6 @@ class AddMMOp : public framework::OperatorWithKernel {
  public:
  using framework::OperatorWithKernel::OperatorWithKernel;
 
- void InferShape(framework::InferShapeContext* ctx) const override {
- PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true,
- platform::errors::NotFound(
- "Input(Input) of AddMMOp should not be null."));
- PADDLE_ENFORCE_EQ(
- ctx->HasInput("X"), true,
- platform::errors::NotFound("Input(X) of AddMMOp should not be null."));
- PADDLE_ENFORCE_EQ(
- ctx->HasInput("Y"), true,
- platform::errors::NotFound("Input(Y) of AddMMOp should not be null."));
- PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
- platform::errors::NotFound(
- "Output(Out) of AddMMOp should not be null."));
-
- auto input_dims = ctx->GetInputDim("Input");
- auto x_dims = ctx->GetInputDim("X");
- auto y_dims = ctx->GetInputDim("Y");
-
- auto ndim_input = input_dims.size();
- auto ndim_x = x_dims.size();
- auto ndim_y = y_dims.size();
-
- float alpha = ctx->Attrs().Get<float>("Alpha");
- float beta = ctx->Attrs().Get<float>("Beta");
-
- VLOG(3) << "addmm operator input.shape=" << input_dims
- << " x.shape=" << x_dims << " y.shape=" << y_dims
- << " beta=" << beta << " alpha=" << alpha
- << " ndim_input=" << ndim_input << " ndim_x=" << ndim_x
- << " ndim_y=" << ndim_y;
-
- PADDLE_ENFORCE_NE(phi::product(input_dims), 0,
- platform::errors::PreconditionNotMet(
- "The Input variable Input(%s) has not "
- "been initialized. You may need to confirm "
- "if you put exe.run(startup_program) "
- "after optimizer.minimize function.",
- ctx->Inputs("Input").front()));
-
- PADDLE_ENFORCE_NE(phi::product(x_dims), 0,
- platform::errors::PreconditionNotMet(
- "The Input variable X(%s) has not "
- "been initialized. You may need to confirm "
- "if you put exe.run(startup_program) "
- "after optimizer.minimize function.",
- ctx->Inputs("X").front()));
-
- PADDLE_ENFORCE_NE(phi::product(y_dims), 0,
- platform::errors::PreconditionNotMet(
- "The Input variable Y(%s) has not "
- "been initialized. You may need to confirm "
- "if you put exe.run(startup_program) "
- "after optimizer.minimize function.",
- ctx->Inputs("Y").front()));
- // dim check
- PADDLE_ENFORCE_EQ(ndim_input, 2,
- platform::errors::InvalidArgument(
- "The input tensor input's dimension must be 2. "
- "But received input's dimension = [%s].",
- ndim_input));
- PADDLE_ENFORCE_EQ(ndim_x, 2,
- platform::errors::InvalidArgument(
- "The input tensor x's dimension must be 2. "
- "But received x's dimension = [%s].",
- ndim_x));
- PADDLE_ENFORCE_EQ(ndim_y, 2,
- platform::errors::InvalidArgument(
- "The input tensor y's dimension must be 2. "
- "But received y's dimension = [%s].",
- ndim_y));
-
- std::vector<int64_t> output_dims;
- output_dims.push_back(x_dims[0]);
- output_dims.push_back(y_dims[1]);
-
- ctx->SetOutputDim("Out", phi::make_ddim(output_dims));
- ctx->ShareLoD("Input", /*->*/ "Out");
- }
-
  framework::OpKernelType GetExpectedKernelType(
  const framework::ExecutionContext& ctx) const {
  framework::LibraryType library = framework::LibraryType::kPlain;
@@ -223,9 +147,11 @@ class AddMMOpGradMaker : public framework::SingleGradOpMaker<T> {
 } // namespace paddle
 
 namespace ops = paddle::operators;
-
+DELCARE_INFER_SHAPE_FUNCTOR(addmm, AddmmInferShapeFunctor,
+ PT_INFER_META(phi::AddmmInferMeta));
 REGISTER_OPERATOR(addmm, ops::AddMMOp, ops::AddMMOpMaker,
  ops::AddMMOpGradMaker<paddle::framework::OpDesc>,
- ops::AddMMOpGradMaker<paddle::imperative::OpBase>);
+ ops::AddMMOpGradMaker<paddle::imperative::OpBase>,
+ AddmmInferShapeFunctor);
 
 REGISTER_OPERATOR(addmm_grad, ops::AddMMGradOp);
@@ -12,7 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/core/infermeta_utils.h"
+#include "paddle/phi/infermeta/unary.h"
 
 namespace paddle {
 namespace operators {
@@ -23,26 +26,6 @@ using framework::Tensor;
 class CholeskyOp : public framework::OperatorWithKernel {
  public:
  using framework::OperatorWithKernel::OperatorWithKernel;
-
- void InferShape(framework::InferShapeContext* ctx) const override {
- OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Cholesky");
- OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Cholesky");
- auto dims = ctx->GetInputDim("X");
- auto rank = dims.size();
- PADDLE_ENFORCE_GE(rank, 2,
- platform::errors::InvalidArgument(
- "The Input(X) should have at least 2 dimensions. But "
- "received a %d dimension tensor.",
- rank));
- PADDLE_ENFORCE_EQ(
- dims[rank - 2], dims[rank - 1],
- platform::errors::InvalidArgument(
- "The inner-most 2 dimensions of Input(X) all should be symmetric "
- "positive-definite matrices and have the same size. But received "
- "X's shape[-2] = %d and shape[-1] = %d.",
- dims[rank - 2], dims[rank - 1]));
- ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
- }
 };
 
 class CholeskyOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -107,7 +90,10 @@ class CholeskyGradOpMaker : public framework::SingleGradOpMaker<T> {
 } // namespace paddle
 
 namespace ops = paddle::operators;
+DELCARE_INFER_SHAPE_FUNCTOR(cholesky, CholeskyInferShapeFunctor,
+ PT_INFER_META(phi::CholeskyInferMeta));
 REGISTER_OPERATOR(cholesky, ops::CholeskyOp, ops::CholeskyOpMaker,
  ops::CholeskyGradOpMaker<paddle::framework::OpDesc>,
- ops::CholeskyGradOpMaker<paddle::imperative::OpBase>);
+ ops::CholeskyGradOpMaker<paddle::imperative::OpBase>,
+ CholeskyInferShapeFunctor);
 REGISTER_OPERATOR(cholesky_grad, ops::CholeskyGradOp);
@@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include "paddle/fluid/operators/diag_v2_op.h"
 #include <algorithm>
+
+#include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/phi/infermeta/unary.h"
 #include "paddle/phi/kernels/funcs/math_function.h"
 
 namespace paddle {
@@ -23,44 +25,6 @@ namespace operators {
 class DiagV2Op : public framework::OperatorWithKernel {
  public:
  using framework::OperatorWithKernel::OperatorWithKernel;
-
- void InferShape(framework::InferShapeContext* ctx) const override {
- OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "diag_v2");
- OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "diag_v2");
-
- auto x_dims = ctx->GetInputDim("X");
- auto offset = ctx->Attrs().Get<int>("offset");
-
- if (x_dims.size() == 1UL) {
- int64_t size_ = x_dims[0] + std::abs(offset);
- ctx->SetOutputDim("Out", {size_, size_});
- } else if (x_dims.size() == 2UL) {
- int64_t size_ = 0;
- if (offset >= 0) {
- // Note(LutaoChu): Do not use std::min here, otherwise the calculation
- // of `size_` will have unexpected result on Windows Python3.8
- if (x_dims[0] < x_dims[1] - offset) {
- size_ = x_dims[0];
- } else {
- size_ = x_dims[1] - offset;
- }
- } else {
- // Note(LutaoChu): Do not use std::min here, otherwise the calculation
- // of `size_` will have unexpected result on Windows Python3.8
- if (x_dims[0] + offset < x_dims[1]) {
- size_ = x_dims[0] + offset;
- } else {
- size_ = x_dims[1];
- }
- }
- ctx->SetOutputDim("Out", {size_});
- } else {
- PADDLE_THROW(platform::errors::InvalidArgument(
- "The input tensor X's dimensions of DiagV2Op should be either 1 or "
- "2, but received %d.",
- x_dims.size()));
- }
- }
 };
 
 class DiagV2OpMaker : public framework::OpProtoAndCheckerMaker {
@@ -94,59 +58,15 @@ class DiagV2OpMaker : public framework::OpProtoAndCheckerMaker {
  }
 };
 
-template <typename DeviceContext, typename T>
-class DiagV2Kernel : public framework::OpKernel<T> {
- public:
- void Compute(const framework::ExecutionContext& context) const override {
- auto* X = context.Input<framework::Tensor>("X");
- auto* x_data = X->data<T>();
- auto x_dims = X->dims();
- int offset = context.Attr<int>("offset");
- auto* out = context.Output<framework::Tensor>("Out");
- T* out_data = out->mutable_data<T>(context.GetPlace());
- auto out_dims = out->dims();
-
- int64_t i;
- if (x_dims.size() == 1) {
- float padding_value = context.Attr<float>("padding_value");
- phi::funcs::SetConstant<DeviceContext, T> set_padding_value;
- auto& dev_ctx = context.template device_context<DeviceContext>();
- set_padding_value(dev_ctx, out, static_cast<T>(padding_value));
-
- auto x_length = x_dims[0];
- const int& x_stride = ComputeStride(0, x_dims);
-
- auto out_stride_0 = ComputeStride(0, out_dims);
- auto out_stride_1 = ComputeStride(1, out_dims);
- out_data +=
- (offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0);
-
- for (i = 0; i < x_length; i++) {
- out_data[i * (out_stride_0 + out_stride_1)] = x_data[i * x_stride];
- }
- } else {
- auto out_length = out_dims[0];
- const int& x_stride_0 = ComputeStride(0, x_dims);
- const int& x_stride_1 = ComputeStride(1, x_dims);
-
- auto out_stride_0 = ComputeStride(0, out_dims);
- x_data += (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0);
- for (i = 0; i < out_length; i++) {
- out_data[i * out_stride_0] = x_data[i * (x_stride_0 + x_stride_1)];
- }
- }
- }
-};
 } // namespace operators
 } // namespace paddle
 
 namespace ops = paddle::operators;
+DELCARE_INFER_SHAPE_FUNCTOR(diag_v2, DiagInferShapeFunctor,
+ PT_INFER_META(phi::DiagInferMeta));
+
 REGISTER_OPERATOR(
  diag_v2, ops::DiagV2Op, ops::DiagV2OpMaker,
  paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
- paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
-REGISTER_OP_CPU_KERNEL(
- diag_v2, ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, int>,
- ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, float>,
- ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, double>,
- ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
+ paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
+ DiagInferShapeFunctor);
Original file line number	Diff line number	Diff line change
`@@ -493,7 +493,8 @@ class AllocatorFacadePrivate {`
`493`	`493`	`"support allocating managed memory.\n"`
`494`	`494`	`"If you don't actually need to use managed memory, please disable "`
`495`	`495`	"it with command `export FLAGS_use_cuda_managed_memory=false`.\n"
`496`		`- "Or you must use the gpu device that supports managed memory."));`
	`496`	`+ "Or you must use the gpu device that supports managed memory.",`
	`497`	`+ p.device));`
`497`	`498`	`}`
`498`	`499`	`return std::make_shared<CUDAManagedAllocator>(p);`
`499`	`500`	`}`
Original file line number	Diff line number	Diff line change
`@@ -128,6 +128,9 @@ TEST(ManagedMemoryTest, OversubscribeGPUMemoryTest) {`
`128`	`128`	`}`
`129`	`129`
`130`	`130`	`TEST(ManagedMemoryTest, OOMExceptionTest) {`
	`131`	`+ if (!platform::IsGPUManagedMemorySupported(0)) {`
	`132`	`+ return;`
	`133`	`+ }`
`131`	`134`	`EXPECT_THROW(Alloc(platform::CUDAPlace(0), size_t(1) << 60),`
`132`	`135`	`memory::allocation::BadAlloc);`
`133`	`136`	`}`