Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
8bf0344
Supported Complex2Real Conversion for Eager Dygraph
jim19930609 Feb 24, 2022
10645f7
Supported Complex2Real Conversion for Eager Dygraph
jim19930609 Feb 24, 2022
b360c23
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Feb 24, 2022
62c5d5e
Enabled complex type promotion test for matmul_v2
jim19930609 Feb 24, 2022
884dddb
Fix CI issues
jim19930609 Feb 25, 2022
9f0bf2b
Merged develop branch
jim19930609 Feb 26, 2022
753798e
Support initializing specific grad tensors to zero for selected opera…
jim19930609 Feb 27, 2022
d98e938
Merged adj_edges_ with GradSlotMeta
jim19930609 Mar 2, 2022
4855da1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 2, 2022
1ded93a
Fixed monir issue
jim19930609 Mar 2, 2022
e478404
Merge develop
jim19930609 Mar 3, 2022
bb5c5bc
Adjusted num runs
jim19930609 Mar 3, 2022
e641d8b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 3, 2022
3cb3c8a
Recovered Eager performance tests configurations
jim19930609 Mar 3, 2022
9942837
Recovered Eager performance tests configurations
jim19930609 Mar 3, 2022
6e06997
Adjusted performance tests configurations
jim19930609 Mar 7, 2022
489e146
Fixed Minor Issues with performance tests
jim19930609 Mar 5, 2022
cc67f30
Merge branch 'support_complex' of https://github.com/jim19930609/Padd…
jim19930609 Mar 15, 2022
489580e
Moved out Edge from GradSlotMeta
jim19930609 Mar 15, 2022
96d0960
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 16, 2022
a0a89db
Fixed issues from merge
jim19930609 Mar 16, 2022
b8538de
Fixed typo
jim19930609 Mar 16, 2022
27991c5
Merge branch 'support_complex' of https://github.com/jim19930609/Padd…
jim19930609 Mar 16, 2022
a25d534
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 17, 2022
ae44285
Addressed review comments
jim19930609 Mar 17, 2022
303f06d
Fixed merge issues
jim19930609 Mar 17, 2022
02efb72
Merge branch 'support_complex' of https://github.com/jim19930609/Padd…
jim19930609 Mar 17, 2022
91dbbe3
Fixed minor issues
jim19930609 Mar 17, 2022
bcb7137
Merge branch 'support_complex' of https://github.com/jim19930609/Padd…
jim19930609 Mar 17, 2022
1410253
Fixed minor issue
jim19930609 Mar 18, 2022
908a9a6
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 18, 2022
bca12a1
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 21, 2022
b8c311c
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 21, 2022
ed54418
Fixed major issues and enabled auto_prune test cases
jim19930609 Mar 22, 2022
4e31a54
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
jim19930609 Mar 22, 2022
154fdd6
Fixed issues from merge
jim19930609 Mar 22, 2022
7eb8252
Merged develop
jim19930609 Mar 22, 2022
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions paddle/fluid/eager/accumulation/accumulation_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,9 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}

std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph) {
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph) {
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
PADDLE_ENFORCE(grads.size() == 1,
paddle::platform::errors::Fatal(
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/accumulation/accumulation_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class GradNodeAccumulation : public GradNodeBase {

// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false) override;

void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,9 @@ void GradNodeScale::SetTensorWrappers_X(
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }

std::vector<std::vector<paddle::experimental::Tensor>> GradNodeScale::
operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph) {
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph) {
// 1. Check Output Size
PADDLE_ENFORCE(
((grads.size() == 1) && (grads[0].size() == 1)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class GradNodeScale : public GradNodeBase {

// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false) override;

void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
Expand Down
29 changes: 21 additions & 8 deletions paddle/fluid/eager/auto_code_generator/eager_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ std::unordered_map<std::string, std::vector<std::string>>
static std::unordered_map<std::string, paddle::framework::AttributeMap>
operators_with_attrs = {};

static std::unordered_set<std::string> ops_to_fill_zero_for_empty_grads = {
"split"};

/* --- Black Ops list that's NO NEED to apply code generation --- */
static std::unordered_set<std::string> black_ops_list = {"run_program"};

Expand Down Expand Up @@ -2243,11 +2246,21 @@ static std::string GenerateGradNodeCCContents(
// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
"std::vector<std::vector<paddle::experimental::Tensor>> "
"GradNode%s::operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, "
"bool create_graph) {\n%s\n}";
std::string grad_function_str = paddle::string::Sprintf(
GRAD_FUNCTION_TEMPLATE, fwd_op_type, generated_grad_function_body);
"GradNode%s::operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"create_graph) {\n"
"%s"
"%s"
"\n}";
std::string fill_zero_str = "";
if (ops_to_fill_zero_for_empty_grads.count(fwd_op_type)) {
fill_zero_str =
"egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, "
"this->InputMeta());\n";
}
std::string grad_function_str =
paddle::string::Sprintf(GRAD_FUNCTION_TEMPLATE, fwd_op_type,
fill_zero_str, generated_grad_function_body);

VLOG(6) << "Generated returns";

Expand Down Expand Up @@ -2279,9 +2292,9 @@ static std::string GenerateGradNodeHeaderContents(
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
"operator()(const "
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, const "
"bool create_graph = false) "
"operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"create_graph = false) "
"override;\n"
"\n"
" void ClearTensorWrappers() override { \n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
import argparse
import os

ops_to_fill_zero_for_empty_grads = set(list("split"))

# For API dispatch used at python-level
# { op_name : [arg_name, ...] }
core_ops_returns_info = {}
Expand Down Expand Up @@ -598,7 +600,8 @@ class {} : public egr::GradNodeBase {{
~{}() override = default;

virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph = false) override;
std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph = false) override;

std::string name() override {{ return \" {} \"; }}

void ClearTensorWrappers() override {{
Expand Down Expand Up @@ -656,10 +659,11 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
for _, (ttype, fwd_position,
grad_api_position) in backward_grad_input_map.items():
if IsPlainTensorType(ttype):
grad_api_args[grad_api_position] = f"grads[{fwd_position}][0]"
grad_api_args[
grad_api_position] = f"hooked_grads[{fwd_position}][0]"
else:
assert IsVectorTensorType(ttype)
grad_api_args[grad_api_position] = f"grads[{fwd_position}]"
grad_api_args[grad_api_position] = f"hooked_grads[{fwd_position}]"

for name, _, _, grad_api_position in backward_attrs_list:
saved_attribute_name = GetSavedName(name)
Expand Down Expand Up @@ -687,23 +691,30 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,

grad_node_name = GetGradNodeName(fwd_api_name)

fill_zero_str = ""
if fwd_api_name in ops_to_fill_zero_for_empty_grads:
fill_zero_str = "egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, this->InputMeta());\n"

if len(namespace) > 0:
grad_api_namespace = f"paddle::experimental::{namespace}"
else:
grad_api_namespace = f"paddle::experimental"

FUNCTION_TEMPLATE = """
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph) {{
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph) {{
{}
auto hooked_grads = ApplyGradientHooks(grads);

// Call grad_api function
VLOG(3) << \"Finally State Running: \" << \"{}\";
VLOG(3) << \"Final State Running: \" << \"{}\";
auto grad_api_returns = {}::{}({});
{}
}}
"""

node_definition_str = FUNCTION_TEMPLATE.format(
grad_node_name, grad_node_name, grad_api_namespace, bwd_api_name,
grad_api_args_str, returns_str)
grad_node_name, fill_zero_str, grad_node_name, grad_api_namespace,
bwd_api_name, grad_api_args_str, returns_str)

return node_definition_str

Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/eager/custom_operator/custom_operator_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> RunCustomOpNode::
operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph) {
operator()(std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph) { // NOLINT
paddle::CustomOpKernelContext ctx;
auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/eager/custom_operator/custom_operator_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ class RunCustomOpNode : public GradNodeBase {

// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph) override;
std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph = false) // NOLINT
override;

std::string name() {
return paddle::string::Sprintf("RunCustomOpNode: %s_grad", op_type_);
Expand Down
20 changes: 20 additions & 0 deletions paddle/fluid/eager/grad_node_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::OutputMeta() const {

void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
size_t slot_rank) {
VLOG(6) << "Set GradSlotMeta for Grad Inputs";
auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out);
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
Expand All @@ -117,6 +118,12 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
auto& meta = metas[0];
meta.SetStopGradient(fwd_out_meta->StopGradient());

if (!fwd_out.is_initialized()) {
VLOG(6)
<< "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
return;
}

// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out.impl().get())) {
// Only Copy Meta
Expand All @@ -128,7 +135,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with phi::DataType::UNDEFINED,"
"which is illegal."));

meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out.inner_place());

if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
Expand All @@ -143,6 +152,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
void GradNodeBase::SetGradInMeta(
const std::vector<paddle::experimental::Tensor>& fwd_out,
size_t slot_rank) {
VLOG(6) << "Set GradSlotMeta for Grad Inputs";
size_t slot_size = fwd_out.size();
PADDLE_ENFORCE_LE(
slot_rank, (bwd_in_meta_.size() - 1),
Expand Down Expand Up @@ -172,6 +182,12 @@ void GradNodeBase::SetGradInMeta(
meta.SetStopGradient(fwd_out_meta->StopGradient());
}

if (!fwd_out_tensor.is_initialized()) {
VLOG(6)
<< "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor";
return;
}

// Record TensorMeta
if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) {
// Only Copy Meta
Expand All @@ -184,6 +200,8 @@ void GradNodeBase::SetGradInMeta(
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out_tensor.inner_place());

if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
need_complex_to_real_ = true;
Expand Down Expand Up @@ -228,6 +246,7 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_in.inner_place());
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
Expand Down Expand Up @@ -272,6 +291,7 @@ void GradNodeBase::SetGradOutMeta(
"phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_in_tensor.inner_place());
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
Expand Down
6 changes: 5 additions & 1 deletion paddle/fluid/eager/grad_node_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ class GradSlotMeta {
return *meta_.get();
}

void SetPlace(const phi::Place& place) { place_ = place; }
const phi::Place& GetPlace() const { return place_; }

private:
bool stop_gradient_{false};
phi::Place place_;
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
};

Expand All @@ -102,7 +106,7 @@ class GradNodeBase {
* is better choice to fit this format.
* **/
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false) = 0;

virtual void ClearTensorWrappers() = 0;
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/eager/grad_tensor_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class GradTensorHolder {
return buffer_[pos];
}

const std::vector<std::vector<paddle::experimental::Tensor>>& Buffers() {
std::vector<std::vector<paddle::experimental::Tensor>>& Buffers() {
return buffer_;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,15 @@ TEST(AccumulationNode, Tensor) {
grad_meta->SetStopGradient(false);

// operator()
paddle::experimental::Tensor ret_et0 = node->operator()({{et0}})[0][0];
std::vector<std::vector<paddle::experimental::Tensor>> et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));

paddle::experimental::Tensor ret_et1 = node->operator()({{et1}})[0][0];
std::vector<std::vector<paddle::experimental::Tensor>> et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];

auto* ret_et1_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et1.impl())
Expand Down Expand Up @@ -121,7 +123,7 @@ TEST(AccumulationNode, Tensor) {
std::make_shared<egr::CppTensorVoidHook>(reduce_hook_1));

// operator()
paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0];
paddle::experimental::Tensor _ret = node->operator()(et0_vec)[0][0];

// Check operator() result, should be 36.0
auto* _ret_ptr = std::dynamic_pointer_cast<phi::DenseTensor>(_ret.impl())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class GradTestNode : public egr::GradNodeBase {
GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; }
std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
std::vector<std::vector<paddle::experimental::Tensor>>& grads,
bool create_graph = false) override {
val_ = std::dynamic_pointer_cast<phi::DenseTensor>(grads[0][0].impl())
->data<float>()[0];
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,4 +247,20 @@ TEST(EagerUtils, GetGradAccumulationNode) {
ASSERT_ANY_THROW(egr::EagerUtils::GetGradAccumulationNode(t0));
}

TEST(EagerUtils, FillZeroForEmptyGradInputs) {
std::vector<std::vector<paddle::experimental::Tensor>> grads = {
std::vector<paddle::experimental::Tensor>(1)};
std::vector<std::vector<GradSlotMeta>> slot_metas = {
std::vector<GradSlotMeta>(1)};

phi::DenseTensorMeta tensor_meta;
tensor_meta.dtype = paddle::experimental::DataType::FLOAT32;
tensor_meta.dims = {2, 4};
slot_metas[0][0].SetTensorMeta(tensor_meta);
slot_metas[0][0].SetPlace(phi::CPUPlace());

EagerUtils::FillZeroForEmptyGradInputs(&grads, slot_metas);
eager_test::CompareTensorWithValue<float>(grads[0][0], 0.0);
}

} // namespace egr
2 changes: 1 addition & 1 deletion paddle/fluid/eager/to_static/run_program_op_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ class GradNodeRunProgram : public egr::GradNodeBase {
~GradNodeRunProgram() override = default;
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
const std::vector<std::vector<paddle::experimental::Tensor>> &grads,
std::vector<std::vector<paddle::experimental::Tensor>> &grads, // NOLINT
bool create_graph) override {
VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram";
PADDLE_ENFORCE_EQ(
Expand Down
25 changes: 25 additions & 0 deletions paddle/fluid/eager/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include "paddle/phi/api/all.h"
#include "paddle/phi/common/layout.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/tensor_meta.h"

#include "paddle/fluid/framework/data_layout.h"
Expand Down Expand Up @@ -392,4 +393,28 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode(
}
}

void EagerUtils::FillZeroForEmptyGradInputs(
std::vector<std::vector<paddle::experimental::Tensor>>* in_grads,
const std::vector<std::vector<GradSlotMeta>>& grad_in_metas) {
for (size_t i = 0; i < in_grads->size(); i++) {
for (size_t j = 0; j < (*in_grads)[0].size(); j++) {
paddle::experimental::Tensor& grad = (*in_grads)[i][j];
if (!grad.is_initialized()) {
const GradSlotMeta& grad_in_meta = grad_in_metas[i][j];
PADDLE_ENFORCE(
grad_in_meta.HasTensorMeta(),
paddle::platform::errors::Fatal(
"Unable to fill empty grad inputs due to empty GradSlotMeta"));

const auto& tensor_meta = grad_in_meta.GetTensorMeta();
phi::Place place = grad_in_meta.GetPlace();

auto tensor_with_zero = paddle::experimental::full(
phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, place);
grad.set_impl(tensor_with_zero.impl());
}
}
}
}

} // namespace egr
Loading