Skip to content

Commit ba8d79e

Browse files
committed
Support eager grad interface with allow_unused and multi startup_op
1 parent 9fc70fe commit ba8d79e

File tree

10 files changed

+114
-26
lines changed

10 files changed

+114
-26
lines changed

paddle/fluid/eager/accumulation/accumulation_node.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
3939
}
4040

4141
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
42-
operator()(
43-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
42+
operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
43+
const bool create_graph) {
4444
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
4545
PADDLE_ENFORCE(grads.size() == 1,
4646
paddle::platform::errors::Fatal(

paddle/fluid/eager/accumulation/accumulation_node.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ class GradNodeAccumulation : public GradNodeBase {
3535

3636
// Functor: perform backward computations
3737
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
38-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
39-
override;
38+
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
39+
const bool create_graph = false) override;
4040

4141
std::string name() { return "GradNodeAccumulation"; }
4242

paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ void GradNodeScale::SetTensorWrappers_X(
145145
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }
146146

147147
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeScale::
148-
operator()(
149-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {
148+
operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
149+
const bool create_graph) {
150150
// 1. Check Output Size
151151
PADDLE_ENFORCE(
152152
((grads.size() == 1) && (grads[0].size() == 1)),

paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ class GradNodeScale : public GradNodeBase {
3939

4040
// Functor: perform backward computations
4141
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
42-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
43-
override;
42+
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
43+
const bool create_graph = false) override;
4444

4545
void SetTensorWrappers_X(
4646
const std::vector<paddle::experimental::Tensor>& tensors);

paddle/fluid/eager/auto_code_generator/eager_generator.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2043,6 +2043,7 @@ static std::string GenerateGradNodeCCContents(
20432043
}
20442044

20452045
const char* BWD_RETURN_TEMPLATE =
2046+
" VLOG(1) << \"Run in GradNode, create_graph is: \" << create_graph; \n"
20462047
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
20472048
"GradNode%s::ApplyGradientHooks(grads);\n"
20482049
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
@@ -2056,7 +2057,8 @@ static std::string GenerateGradNodeCCContents(
20562057
const char* GRAD_FUNCTION_TEMPLATE =
20572058
"std::vector<std::vector<paddle::experimental::Tensor>> "
20582059
"GradNode%s::operator()(const "
2059-
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) {\n%s\n}";
2060+
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, const "
2061+
"bool create_graph) {\n%s\n}";
20602062
std::string grad_function_str = paddle::string::Sprintf(
20612063
GRAD_FUNCTION_TEMPLATE, fwd_op_type, generated_grad_function_body);
20622064

@@ -2091,7 +2093,8 @@ static std::string GenerateGradNodeHeaderContents(
20912093
"\n"
20922094
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
20932095
"operator()(const "
2094-
"std::vector<std::vector<paddle::experimental::Tensor>>& grads) "
2096+
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, const "
2097+
"bool create_graph = false) "
20952098
"override;\n"
20962099
"\n"
20972100
" std::string name() override { return \" GradNode%s \"; } \n "

paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ class {} : public egr::GradNodeBase {{
533533
~{}() override = default;
534534
535535
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
536-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) override;
536+
const std::vector<std::vector<paddle::experimental::Tensor>>& grads, const bool create_graph = false) override;
537537
std::string name() override {{ return \" {} \"; }}
538538
// SetTensorWrapperX, SetTensorWrapperY, ...
539539
{}
@@ -609,7 +609,7 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map,
609609

610610
grad_node_name = GetGradNodeName(fwd_api_name)
611611
FUNCTION_TEMPLATE = """
612-
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads) {{
612+
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(const std::vector<std::vector<paddle::experimental::Tensor>>& grads, const bool create_graph) {{
613613
// Call grad_api function
614614
auto grad_api_returns = paddle::experimental::{}({});
615615
{}

paddle/fluid/eager/backward.cc

Lines changed: 64 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
3434
// pass
3535
std::unordered_map<GradNodeBase*, int> node_in_degree_map;
3636

37+
// init potential startup node's indegree
38+
std::queue<GradNodeBase*> queue_tmp = init_queue;
39+
while (!queue_tmp.empty()) {
40+
GradNodeBase* node = queue_tmp.front();
41+
queue_tmp.pop();
42+
node_in_degree_map[node] = 0;
43+
}
44+
3745
// Copy nodes
3846
std::queue<GradNodeBase*> queue = init_queue;
3947
std::unordered_set<GradNodeBase*> visited;
@@ -164,6 +172,7 @@ void GetGraphInfoBetweenTargets(
164172
}
165173
}
166174
}
175+
167176
UpdateGraphInfo(target_nodes, depending_nodes, potential_stop_nodes);
168177
}
169178

@@ -193,17 +202,33 @@ void GetTargetNodesInfo(const std::vector<paddle::experimental::Tensor>& inputs,
193202

194203
std::vector<paddle::experimental::Tensor> GetResults(
195204
const std::vector<paddle::experimental::Tensor>& inputs,
196-
std::unordered_map<GradNodeBase*, paddle::experimental::Tensor>&
197-
result_map) {
205+
const std::unordered_map<GradNodeBase*, paddle::experimental::Tensor>&
206+
results_map,
207+
bool allow_unused) {
198208
VLOG(1) << "Run in GetResults";
199209
if (inputs.empty()) return {};
200210

201211
std::vector<paddle::experimental::Tensor> results;
202212
results.reserve(inputs.size());
203-
for (auto input : inputs) {
213+
auto results_map_ = results_map;
214+
for (size_t i = 0; i < inputs.size(); ++i) {
215+
auto& input = inputs[i];
204216
AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(input);
205217
auto target_node = auto_grad_meta->GetMutableGradNode().get();
206-
results.emplace_back(result_map[target_node]);
218+
219+
if (results_map_.find(target_node) != results_map_.end()) {
220+
// TODO(wuweilong): set StopGradient
221+
// result_map[target_node].SetOverridedStopGradient(!create_graph_);
222+
results.emplace_back(results_map_[target_node]);
223+
} else {
224+
PADDLE_ENFORCE_EQ(allow_unused, true,
225+
paddle::platform::errors::InvalidArgument(
226+
"The %d-th input does not appear in the backward "
227+
"graph. Please check the input variable or set "
228+
"allow_unused=True to get None result.",
229+
i));
230+
results.emplace_back();
231+
}
207232
}
208233
return results;
209234
}
@@ -220,6 +245,20 @@ std::vector<paddle::experimental::Tensor> RunBackward(
220245
// *Inplace version check should perform at node-level
221246
// *Cross-batch accumulation happens at forward pass
222247

248+
/* --- Preprocess --- */
249+
250+
// TODO(wuweilong): output tensor duplicate check
251+
// TODO(wuweilong): build no_grad_vars_grads according no_grad_vars
252+
// TODO(wuweilong): output tensor' gradient is not in no_grad_vars
253+
254+
// TODO(wuweilong): check input tensor has grad op and stop_gradient = False
255+
// TODO(wuweilong): input tensor duplicate check
256+
// TODO(wuweilong): input tensor' gradient is not in no_grad_vars
257+
258+
// TODO(wuweilong): Prune output_targets which is not the input of startup_ops
259+
// TODO(wuweilong): input == output case
260+
// TODO(wuweilong): output_targets.size() should eaqul to output_grads.size()
261+
223262
/* --- Initialization --- */
224263
// 1. Init queue with starting nodes
225264
// 2. Prepare initial input buffers
@@ -288,14 +327,28 @@ std::vector<paddle::experimental::Tensor> RunBackward(
288327
getInDegreeMap(queue);
289328

290329
std::unordered_map<GradNodeBase*, AutogradMeta*> target_nodes_inputmeta_map;
291-
std::unordered_set<GradNodeBase*> target_nodes;
330+
std::unordered_set<GradNodeBase*> target_nodes; // should be updated?
292331
GetTargetNodesInfo(inputs, &target_nodes, &target_nodes_inputmeta_map);
293332

294333
std::unordered_map<GradNodeBase*, GradNodeBase*> depending_nodes;
295334
std::unordered_set<GradNodeBase*> potential_stop_nodes;
296335
GetGraphInfoBetweenTargets(queue, &target_nodes, &depending_nodes,
297336
&potential_stop_nodes);
298337

338+
std::unordered_set<GradNodeBase*> startup_ops_;
339+
// ready_queue store all startup nodes
340+
std::queue<GradNodeBase*> ready_queue;
341+
342+
// startup op's indegree should be 0
343+
for (auto& pair : node_in_degree_map) {
344+
if (pair.second == 0) {
345+
auto* op = pair.first;
346+
startup_ops_.emplace(op);
347+
ready_queue.emplace(op);
348+
}
349+
}
350+
VLOG(1) << " startup_ops' size is :" << startup_ops_.size();
351+
299352
std::unordered_map<GradNodeBase*, paddle::experimental::Tensor> results_map;
300353

301354
/* --- Topological Visit --- */
@@ -306,9 +359,9 @@ std::vector<paddle::experimental::Tensor> RunBackward(
306359
// |- Prepare for next node
307360
// 3. Update queue
308361
VLOG(6) << "Run Backward";
309-
while (!queue.empty()) {
310-
GradNodeBase* node = queue.front();
311-
queue.pop();
362+
while (!ready_queue.empty()) {
363+
GradNodeBase* node = ready_queue.front();
364+
ready_queue.pop();
312365

313366
// Run node: This is where Hook happens
314367
PADDLE_ENFORCE(
@@ -334,7 +387,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
334387

335388
// Run Pre Backward Node and get outputs
336389
std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors =
337-
(*node)(node_input_buffer->Buffers());
390+
(*node)(node_input_buffer->Buffers(), create_graph);
338391
// TODO(jiabin): Should we erase it or find a more efficient way.
339392
node_input_buffers_dict.erase(node);
340393

@@ -410,13 +463,13 @@ std::vector<paddle::experimental::Tensor> RunBackward(
410463
}
411464

412465
if (node_in_degree_map[next_node] == 0 && !is_potential_stop_node) {
413-
queue.emplace(std::move(next_node));
466+
ready_queue.emplace(std::move(next_node));
414467
}
415468
}
416469
}
417470
}
418471
if (!inputs.empty()) {
419-
return GetResults(inputs, results_map);
472+
return GetResults(inputs, results_map, allow_unused);
420473
}
421474

422475
VLOG(1) << "Run backward in the end, return {}";

paddle/fluid/eager/grad_node_info.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ class GradNodeBase {
9595
* is better choice to fit this format.
9696
* **/
9797
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
98-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads) = 0;
98+
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
99+
const bool create_graph = false) = 0;
99100

100101
/**
101102
* AddEdges is designed to set input tensors' backward Node as current

paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ class GradTestNode : public egr::GradNodeBase {
3232
GradTestNode() : GradNodeBase() { val_ = 1.0; }
3333
std::string name() override { return "GradTestNode"; }
3434
std::vector<std::vector<paddle::experimental::Tensor>> operator()(
35-
const std::vector<std::vector<paddle::experimental::Tensor>>& grads)
36-
override {
35+
const std::vector<std::vector<paddle::experimental::Tensor>>& grads,
36+
const bool create_graph = false) override {
3737
val_ = std::dynamic_pointer_cast<phi::DenseTensor>(grads[0][0].impl())
3838
->data<float>()[0];
3939
phi::DenseTensorMeta meta =

python/paddle/fluid/tests/unittests/test_imperative_double_grad.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import unittest
2020
from unittest import TestCase
2121
import numpy as np
22+
import paddle.compat as cpt
2223
from paddle.fluid.framework import _test_eager_guard
2324

2425

@@ -61,6 +62,36 @@ def test_simple_example_eager_grad(self):
6162
with _test_eager_guard():
6263
self.func_simple_example_eager_grad()
6364
self.func_simple_example_eager_grad()
65+
66+
def func_simple_example_eager_grad_allow_unused(self):
67+
np.random.seed(2021)
68+
paddle.set_device('cpu')
69+
np_x = np.random.random((3, 3))
70+
np_y = np.random.random((3, 1))
71+
np_z = np.random.random((3, 1))
72+
x = paddle.to_tensor(np_x, dtype="float64", stop_gradient=False)
73+
y = paddle.to_tensor(np_y, dtype="float64", stop_gradient=False)
74+
z = paddle.to_tensor(np_z, dtype="float64", stop_gradient=False)
75+
out_z = paddle.nn.functional.sigmoid(z)
76+
out = paddle.matmul(x, y)
77+
78+
dx = fluid.dygraph.grad(out, [x, z], allow_unused=True)
79+
80+
dout = np.ones_like(np_y)
81+
expected_dx = np.matmul(dout, np.transpose(np_y))
82+
83+
self.assertTrue(np.allclose(dx[0].numpy(), expected_dx[0]))
84+
85+
try:
86+
dx = fluid.dygraph.grad(out, [x, z])
87+
except ValueError as e:
88+
error_msg = cpt.get_exception_message(e)
89+
assert error_msg.find("allow_unused") > 0
90+
91+
def test_simple_example_eager_grad_allow_unused(self):
92+
with _test_eager_guard():
93+
self.func_simple_example_eager_grad_allow_unused()
94+
self.func_simple_example_eager_grad_allow_unused()
6495

6596

6697
class TestDygraphDoubleGrad(TestCase):

0 commit comments

Comments
 (0)