Skip to content

Commit f7c9f2b

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into dev/bf16_op_7
2 parents 1487bdb + 584844e commit f7c9f2b

File tree

81 files changed

+2170
-1055
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

81 files changed

+2170
-1055
lines changed

paddle/fluid/inference/api/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,10 @@ cc_test(test_paddle_inference_api SRCS api_tester.cc DEPS paddle_inference_api)
5656

5757
if(WITH_TESTING)
5858
if (NOT APPLE AND NOT WIN32)
59-
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
60-
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
59+
if (WITH_GPU)
60+
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS paddle_inference_shared
61+
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})
62+
endif()
6163
elseif(WIN32)
6264
inference_base_test(test_api_impl SRCS api_impl_tester.cc DEPS ${inference_deps}
6365
ARGS --word2vec_dirname=${WORD2VEC_MODEL_DIR} --book_dirname=${IMG_CLS_RESNET_INSTALL_DIR})

paddle/fluid/inference/tests/api/CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,9 @@ inference_analysis_api_test(test_analyzer_pyramid_dnn ${PYRAMID_DNN_INSTALL_DIR}
299299
set(ERNIE_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/Ernie")
300300
download_model_and_data(${ERNIE_INSTALL_DIR} "Ernie_model.tar.gz" aa59192dd41ed377f9f168e3a1309fa6 "Ernie_data.txt.tar.gz" 5396e63548edad7ca561e7e26a9476d1)
301301
download_result(${ERNIE_INSTALL_DIR} "Ernie_result.txt.tar.gz" 73beea65abda2edb61c1662cd3180c62)
302-
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
302+
if (WITH_GPU)
303+
inference_analysis_api_test(test_analyzer_ernie ${ERNIE_INSTALL_DIR} analyzer_ernie_tester.cc)
304+
endif()
303305
inference_analysis_api_int8_test(test_analyzer_ernie_int8 ${ERNIE_INSTALL_DIR} analyzer_ernie_int8_tester.cc)
304306

305307
# Ernie large
@@ -551,7 +553,9 @@ endif()
551553
# bert, max_len=20, embedding_dim=128
552554
set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert_emb128")
553555
download_model_and_data_without_verify(${BERT_INSTALL_DIR} "bert_emb128_model.tar.gz" "bert_data_len20.txt.tar.gz")
554-
inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
556+
if (WITH_GPU)
557+
inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc)
558+
endif()
555559

556560
# multiple models prediction
557561
set(MMP_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/multi_model_prediction")
@@ -741,13 +745,15 @@ set_tests_properties(lite_resnet50_test PROPERTIES TIMEOUT 120)
741745
set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
742746
set_tests_properties(test_analyzer_resnet50 PROPERTIES TIMEOUT 120)
743747
set_tests_properties(test_analyzer_ner PROPERTIES TIMEOUT 120)
744-
set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
745748
set_tests_properties(test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120)
746749
set_tests_properties(test_analyzer_googlenet PROPERTIES TIMEOUT 120)
747750
set_tests_properties(test_analyzer_small_dam PROPERTIES TIMEOUT 120)
748751
set_tests_properties(test_analyzer_transformer PROPERTIES TIMEOUT 120)
749-
set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
750752
set_tests_properties(test_analyzer_mobilenet_depthwise_conv PROPERTIES TIMEOUT 120)
753+
if (WITH_GPU)
754+
set_tests_properties(test_analyzer_bert PROPERTIES TIMEOUT 120)
755+
set_tests_properties(test_analyzer_ernie PROPERTIES TIMEOUT 120)
756+
endif()
751757
if(WITH_GPU AND TENSORRT_FOUND)
752758
set_tests_properties(trt_mobilenet_test PROPERTIES TIMEOUT 120)
753759
if(WITH_MKLDNN)

paddle/fluid/memory/allocation/allocator_facade.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,8 @@ class AllocatorFacadePrivate {
493493
"support allocating managed memory.\n"
494494
"If you don't actually need to use managed memory, please disable "
495495
"it with command `export FLAGS_use_cuda_managed_memory=false`.\n"
496-
"Or you must use the gpu device that supports managed memory."));
496+
"Or you must use the gpu device that supports managed memory.",
497+
p.device));
497498
}
498499
return std::make_shared<CUDAManagedAllocator>(p);
499500
}

paddle/fluid/memory/cuda_managed_memory_test.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,9 @@ TEST(ManagedMemoryTest, OversubscribeGPUMemoryTest) {
128128
}
129129

130130
TEST(ManagedMemoryTest, OOMExceptionTest) {
131+
if (!platform::IsGPUManagedMemorySupported(0)) {
132+
return;
133+
}
131134
EXPECT_THROW(Alloc(platform::CUDAPlace(0), size_t(1) << 60),
132135
memory::allocation::BadAlloc);
133136
}

paddle/fluid/operators/addmm_op.cc

Lines changed: 7 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ limitations under the License. */
1616
#include <string>
1717
#include <unordered_map>
1818
#include <vector>
19+
#include "paddle/fluid/framework/infershape_utils.h"
1920
#include "paddle/fluid/framework/op_registry.h"
21+
#include "paddle/phi/core/infermeta_utils.h"
22+
#include "paddle/phi/infermeta/ternary.h"
2023
#ifdef PADDLE_WITH_MKLDNN
2124
#include "paddle/fluid/platform/mkldnn_helper.h"
2225
#endif
@@ -33,85 +36,6 @@ class AddMMOp : public framework::OperatorWithKernel {
3336
public:
3437
using framework::OperatorWithKernel::OperatorWithKernel;
3538

36-
void InferShape(framework::InferShapeContext* ctx) const override {
37-
PADDLE_ENFORCE_EQ(ctx->HasInput("Input"), true,
38-
platform::errors::NotFound(
39-
"Input(Input) of AddMMOp should not be null."));
40-
PADDLE_ENFORCE_EQ(
41-
ctx->HasInput("X"), true,
42-
platform::errors::NotFound("Input(X) of AddMMOp should not be null."));
43-
PADDLE_ENFORCE_EQ(
44-
ctx->HasInput("Y"), true,
45-
platform::errors::NotFound("Input(Y) of AddMMOp should not be null."));
46-
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
47-
platform::errors::NotFound(
48-
"Output(Out) of AddMMOp should not be null."));
49-
50-
auto input_dims = ctx->GetInputDim("Input");
51-
auto x_dims = ctx->GetInputDim("X");
52-
auto y_dims = ctx->GetInputDim("Y");
53-
54-
auto ndim_input = input_dims.size();
55-
auto ndim_x = x_dims.size();
56-
auto ndim_y = y_dims.size();
57-
58-
float alpha = ctx->Attrs().Get<float>("Alpha");
59-
float beta = ctx->Attrs().Get<float>("Beta");
60-
61-
VLOG(3) << "addmm operator input.shape=" << input_dims
62-
<< " x.shape=" << x_dims << " y.shape=" << y_dims
63-
<< " beta=" << beta << " alpha=" << alpha
64-
<< " ndim_input=" << ndim_input << " ndim_x=" << ndim_x
65-
<< " ndim_y=" << ndim_y;
66-
67-
PADDLE_ENFORCE_NE(phi::product(input_dims), 0,
68-
platform::errors::PreconditionNotMet(
69-
"The Input variable Input(%s) has not "
70-
"been initialized. You may need to confirm "
71-
"if you put exe.run(startup_program) "
72-
"after optimizer.minimize function.",
73-
ctx->Inputs("Input").front()));
74-
75-
PADDLE_ENFORCE_NE(phi::product(x_dims), 0,
76-
platform::errors::PreconditionNotMet(
77-
"The Input variable X(%s) has not "
78-
"been initialized. You may need to confirm "
79-
"if you put exe.run(startup_program) "
80-
"after optimizer.minimize function.",
81-
ctx->Inputs("X").front()));
82-
83-
PADDLE_ENFORCE_NE(phi::product(y_dims), 0,
84-
platform::errors::PreconditionNotMet(
85-
"The Input variable Y(%s) has not "
86-
"been initialized. You may need to confirm "
87-
"if you put exe.run(startup_program) "
88-
"after optimizer.minimize function.",
89-
ctx->Inputs("Y").front()));
90-
// dim check
91-
PADDLE_ENFORCE_EQ(ndim_input, 2,
92-
platform::errors::InvalidArgument(
93-
"The input tensor input's dimension must be 2. "
94-
"But received input's dimension = [%s].",
95-
ndim_input));
96-
PADDLE_ENFORCE_EQ(ndim_x, 2,
97-
platform::errors::InvalidArgument(
98-
"The input tensor x's dimension must be 2. "
99-
"But received x's dimension = [%s].",
100-
ndim_x));
101-
PADDLE_ENFORCE_EQ(ndim_y, 2,
102-
platform::errors::InvalidArgument(
103-
"The input tensor y's dimension must be 2. "
104-
"But received y's dimension = [%s].",
105-
ndim_y));
106-
107-
std::vector<int64_t> output_dims;
108-
output_dims.push_back(x_dims[0]);
109-
output_dims.push_back(y_dims[1]);
110-
111-
ctx->SetOutputDim("Out", phi::make_ddim(output_dims));
112-
ctx->ShareLoD("Input", /*->*/ "Out");
113-
}
114-
11539
framework::OpKernelType GetExpectedKernelType(
11640
const framework::ExecutionContext& ctx) const {
11741
framework::LibraryType library = framework::LibraryType::kPlain;
@@ -223,9 +147,11 @@ class AddMMOpGradMaker : public framework::SingleGradOpMaker<T> {
223147
} // namespace paddle
224148

225149
namespace ops = paddle::operators;
226-
150+
DELCARE_INFER_SHAPE_FUNCTOR(addmm, AddmmInferShapeFunctor,
151+
PT_INFER_META(phi::AddmmInferMeta));
227152
REGISTER_OPERATOR(addmm, ops::AddMMOp, ops::AddMMOpMaker,
228153
ops::AddMMOpGradMaker<paddle::framework::OpDesc>,
229-
ops::AddMMOpGradMaker<paddle::imperative::OpBase>);
154+
ops::AddMMOpGradMaker<paddle::imperative::OpBase>,
155+
AddmmInferShapeFunctor);
230156

231157
REGISTER_OPERATOR(addmm_grad, ops::AddMMGradOp);

paddle/fluid/operators/cholesky_op.cc

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15+
#include "paddle/fluid/framework/infershape_utils.h"
1516
#include "paddle/fluid/framework/op_registry.h"
17+
#include "paddle/phi/core/infermeta_utils.h"
18+
#include "paddle/phi/infermeta/unary.h"
1619

1720
namespace paddle {
1821
namespace operators {
@@ -23,26 +26,6 @@ using framework::Tensor;
2326
class CholeskyOp : public framework::OperatorWithKernel {
2427
public:
2528
using framework::OperatorWithKernel::OperatorWithKernel;
26-
27-
void InferShape(framework::InferShapeContext* ctx) const override {
28-
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Cholesky");
29-
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Cholesky");
30-
auto dims = ctx->GetInputDim("X");
31-
auto rank = dims.size();
32-
PADDLE_ENFORCE_GE(rank, 2,
33-
platform::errors::InvalidArgument(
34-
"The Input(X) should have at least 2 dimensions. But "
35-
"received a %d dimension tensor.",
36-
rank));
37-
PADDLE_ENFORCE_EQ(
38-
dims[rank - 2], dims[rank - 1],
39-
platform::errors::InvalidArgument(
40-
"The inner-most 2 dimensions of Input(X) all should be symmetric "
41-
"positive-definite matrices and have the same size. But received "
42-
"X's shape[-2] = %d and shape[-1] = %d.",
43-
dims[rank - 2], dims[rank - 1]));
44-
ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
45-
}
4629
};
4730

4831
class CholeskyOpMaker : public framework::OpProtoAndCheckerMaker {
@@ -107,7 +90,10 @@ class CholeskyGradOpMaker : public framework::SingleGradOpMaker<T> {
10790
} // namespace paddle
10891

10992
namespace ops = paddle::operators;
93+
DELCARE_INFER_SHAPE_FUNCTOR(cholesky, CholeskyInferShapeFunctor,
94+
PT_INFER_META(phi::CholeskyInferMeta));
11095
REGISTER_OPERATOR(cholesky, ops::CholeskyOp, ops::CholeskyOpMaker,
11196
ops::CholeskyGradOpMaker<paddle::framework::OpDesc>,
112-
ops::CholeskyGradOpMaker<paddle::imperative::OpBase>);
97+
ops::CholeskyGradOpMaker<paddle::imperative::OpBase>,
98+
CholeskyInferShapeFunctor);
11399
REGISTER_OPERATOR(cholesky_grad, ops::CholeskyGradOp);

paddle/fluid/operators/diag_v2_op.cc

Lines changed: 8 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License. */
1414

15-
#include "paddle/fluid/operators/diag_v2_op.h"
1615
#include <algorithm>
16+
17+
#include "paddle/fluid/framework/infershape_utils.h"
1718
#include "paddle/fluid/framework/op_registry.h"
19+
#include "paddle/phi/infermeta/unary.h"
1820
#include "paddle/phi/kernels/funcs/math_function.h"
1921

2022
namespace paddle {
@@ -23,44 +25,6 @@ namespace operators {
2325
class DiagV2Op : public framework::OperatorWithKernel {
2426
public:
2527
using framework::OperatorWithKernel::OperatorWithKernel;
26-
27-
void InferShape(framework::InferShapeContext* ctx) const override {
28-
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "diag_v2");
29-
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "diag_v2");
30-
31-
auto x_dims = ctx->GetInputDim("X");
32-
auto offset = ctx->Attrs().Get<int>("offset");
33-
34-
if (x_dims.size() == 1UL) {
35-
int64_t size_ = x_dims[0] + std::abs(offset);
36-
ctx->SetOutputDim("Out", {size_, size_});
37-
} else if (x_dims.size() == 2UL) {
38-
int64_t size_ = 0;
39-
if (offset >= 0) {
40-
// Note(LutaoChu): Do not use std::min here, otherwise the calculation
41-
// of `size_` will have unexpected result on Windows Python3.8
42-
if (x_dims[0] < x_dims[1] - offset) {
43-
size_ = x_dims[0];
44-
} else {
45-
size_ = x_dims[1] - offset;
46-
}
47-
} else {
48-
// Note(LutaoChu): Do not use std::min here, otherwise the calculation
49-
// of `size_` will have unexpected result on Windows Python3.8
50-
if (x_dims[0] + offset < x_dims[1]) {
51-
size_ = x_dims[0] + offset;
52-
} else {
53-
size_ = x_dims[1];
54-
}
55-
}
56-
ctx->SetOutputDim("Out", {size_});
57-
} else {
58-
PADDLE_THROW(platform::errors::InvalidArgument(
59-
"The input tensor X's dimensions of DiagV2Op should be either 1 or "
60-
"2, but received %d.",
61-
x_dims.size()));
62-
}
63-
}
6428
};
6529

6630
class DiagV2OpMaker : public framework::OpProtoAndCheckerMaker {
@@ -94,59 +58,15 @@ class DiagV2OpMaker : public framework::OpProtoAndCheckerMaker {
9458
}
9559
};
9660

97-
template <typename DeviceContext, typename T>
98-
class DiagV2Kernel : public framework::OpKernel<T> {
99-
public:
100-
void Compute(const framework::ExecutionContext& context) const override {
101-
auto* X = context.Input<framework::Tensor>("X");
102-
auto* x_data = X->data<T>();
103-
auto x_dims = X->dims();
104-
int offset = context.Attr<int>("offset");
105-
auto* out = context.Output<framework::Tensor>("Out");
106-
T* out_data = out->mutable_data<T>(context.GetPlace());
107-
auto out_dims = out->dims();
108-
109-
int64_t i;
110-
if (x_dims.size() == 1) {
111-
float padding_value = context.Attr<float>("padding_value");
112-
phi::funcs::SetConstant<DeviceContext, T> set_padding_value;
113-
auto& dev_ctx = context.template device_context<DeviceContext>();
114-
set_padding_value(dev_ctx, out, static_cast<T>(padding_value));
115-
116-
auto x_length = x_dims[0];
117-
const int& x_stride = ComputeStride(0, x_dims);
118-
119-
auto out_stride_0 = ComputeStride(0, out_dims);
120-
auto out_stride_1 = ComputeStride(1, out_dims);
121-
out_data +=
122-
(offset >= 0 ? offset * out_stride_1 : -offset * out_stride_0);
123-
124-
for (i = 0; i < x_length; i++) {
125-
out_data[i * (out_stride_0 + out_stride_1)] = x_data[i * x_stride];
126-
}
127-
} else {
128-
auto out_length = out_dims[0];
129-
const int& x_stride_0 = ComputeStride(0, x_dims);
130-
const int& x_stride_1 = ComputeStride(1, x_dims);
131-
132-
auto out_stride_0 = ComputeStride(0, out_dims);
133-
x_data += (offset >= 0 ? offset * x_stride_1 : -offset * x_stride_0);
134-
for (i = 0; i < out_length; i++) {
135-
out_data[i * out_stride_0] = x_data[i * (x_stride_0 + x_stride_1)];
136-
}
137-
}
138-
}
139-
};
14061
} // namespace operators
14162
} // namespace paddle
14263

14364
namespace ops = paddle::operators;
65+
DELCARE_INFER_SHAPE_FUNCTOR(diag_v2, DiagInferShapeFunctor,
66+
PT_INFER_META(phi::DiagInferMeta));
67+
14468
REGISTER_OPERATOR(
14569
diag_v2, ops::DiagV2Op, ops::DiagV2OpMaker,
14670
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
147-
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
148-
REGISTER_OP_CPU_KERNEL(
149-
diag_v2, ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, int>,
150-
ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, float>,
151-
ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, double>,
152-
ops::DiagV2Kernel<paddle::platform::CPUDeviceContext, int64_t>);
71+
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
72+
DiagInferShapeFunctor);

0 commit comments

Comments
 (0)