PaddlePaddle · levi131 · Nov 29, 2021 · Sep 22, 2021 · Sep 22, 2021 · Sep 22, 2021
diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py
@@ -18,6 +18,7 @@
 from .py_layer import PyLayer, PyLayerContext # noqa: F401
 from ..framework import set_grad_enabled # noqa: F401
 from ..fluid.dygraph.base import no_grad_ as no_grad # noqa: F401
-from .functional import vjp, jvp, jacobian, hessian, vhp # noqa: F401
+from .functional import jacobian, hessian, batch_jacobian, batch_hessian # noqa: F401
+from .functional import vjp, jvp, vhp # noqa: F401
 
 __all__ = ['backward', 'PyLayer', 'PyLayerContext']
diff --git a/python/paddle/autograd/functional.py b/python/paddle/autograd/functional.py
@@ -385,6 +385,297 @@ def func(x, y):
  return jacobian
 
 
+@framework.dygraph_only
+def batch_jacobian(func, inputs, create_graph=False, allow_unused=False):
+ ''' 
+ .. note::
+ **This API is ONLY available in the imperative mode.**
+
+ This function computes the batch Jacobian matrix of `func` with respect to `inputs`.
+ Noted that the first dimension of inputs is batch size.
+
+ Parameters:
+ func (function): a Python function that takes a Tensor or a Tensor
+ list/tuple as inputs(the first dimension is batch size) and 
+ returns a Tensor or a Tensor tuple.
+ inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or 
+ Tensor list/tuple of the function ``func``, Noted that
+ the first dimension of inputs is batch size.
+ create_graph (bool, optional): whether to create the gradient graphs
+ of the computing process. When it is True, higher order derivatives
+ are supported to compute; when it is False, the gradient graphs of
+ the computing process would be discarded. Defaults to ``False``.
+ allow_unused (bool, optional): whether to raise error or return None if
+ some Tensors of `inputs` are unreachable in the graph. Error would
+ be raised if allow_unused=False, and None would be returned as
+ their gradients if allow_unused=True. Default False.
+ Returns:
+ Jacobian (Tensor or nested tuple of Tensors): if function ``func``
+ takes a Tensor as inputs and returns a Tensor as outputs, Jacobian
+ will be a single Tensor containing the Jacobian matrix for the
+ linearized inputs and outputs. If one of the inputs and outputs is
+ a Tensor, and another is a Tensor list/tuple, then the Jacobian will
+ be a tuple of Tensors. If both of inputs and outputs are Tensor
+ list/tuple, then the Jacobian will be a tuple of tuple of Tensors.
+ Noted that the first dimension of inputs is batch size.
+
+ For example,
+ the inputs shape and outputs shape of function ``func` is [batch_size, num] 
+ and [batch_size, num] respectively, then the Jacobian will be a Tensor with
+ a shape of [num, batch_size * num], where ``Jacobian[i][j]`` will contain 
+ the Jacobian matrix of the ``i``th column output and the ``j``th input and 
+ will have same dtype and device as the corresponding input.
+ Other situations can be deduced by analogy.
+
+ Examples 1:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x):
+ return paddle.matmul(paddle.matmul(x, weight), y)
+
+ x.stop_gradient = False
+ batch_jacobian = paddle.autograd.batch_jacobian(func, x)
+ print(batch_jacobian)
+ # Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[4., 4., 4., 4., 4., 4., 4., 4.],
+ # [4., 4., 4., 4., 4., 4., 4., 4.]])
+
+ Examples 2:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x):
+ return paddle.matmul(paddle.matmul(x, weight), y), x * x
+
+ x.stop_gradient = False
+ batch_jacobian = paddle.autograd.batch_jacobian(func, x) 
+ print(batch_jacobian) 
+ # (Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[4., 4., 4., 4., 4., 4., 4., 4.],
+ # [4., 4., 4., 4., 4., 4., 4., 4.]]), Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[2., 0., 2., 0., 2., 0., 2., 0.],
+ # [0., 2., 0., 2., 0., 2., 0., 2.]]))
+
+ Examples 3:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x, y):
+ return x * y
+
+ x.stop_gradient = False
+ y.stop_gradient = False
+ batch_jacobian = paddle.autograd.batch_jacobian(func, [x, y])
+ print(batch_jacobian)
+ # (Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[1., 0., 1., 0., 1., 0., 1., 0.],
+ # [0., 1., 0., 1., 0., 1., 0., 1.]]), Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[1., 0., 1., 0., 1., 0., 1., 0.],
+ # [0., 1., 0., 1., 0., 1., 0., 1.]]))
+
+ '''
+ inputs = _tensors(inputs, "inputs")
+ outputs = _tensors(func(*inputs), "outputs")
+ batch_size = inputs[0].shape[0]
+ for input in inputs:
+ assert input.shape[
+ 0] == batch_size, "The first dimension of input should equals to the same batch size!"
+ for output in outputs:
+ assert output.shape[
+ 0] == batch_size, "The first dimension of output should equals to the same batch size!"
+ fin_size = len(inputs)
+ fout_size = len(outputs)
+ flat_outputs = tuple(
+ reshape(
+ output, shape=[batch_size, -1]) for output in outputs)
+ jacobian = tuple()
+ for i, flat_output in enumerate(flat_outputs):
+ jac_i = list([] for _ in range(fin_size))
+ for k in range(flat_output.shape[1]):
+ row_k = grad(
+ flat_output[:, k],
+ inputs,
+ create_graph=create_graph,
+ retain_graph=True,
+ allow_unused=allow_unused)
+ for j in range(fin_size):
+ jac_i[j].append(
+ reshape(
+ row_k[j], shape=[-1])
+ if isinstance(row_k[j], paddle.Tensor) else None)
+ jacobian += (tuple(
+ _stack_tensor_or_return_none(jac_i_j) for jac_i_j in jac_i), )
+ if fin_size == 1 and fout_size == 1:
+ return jacobian[0][0]
+ elif fin_size == 1 and fout_size != 1:
+ return tuple(jacobian[i][0] for i in range(fout_size))
+ elif fin_size != 1 and fout_size == 1:
+ return jacobian[0]
+ else:
+ return jacobian
+
+
+@framework.dygraph_only
+def batch_hessian(func, inputs, create_graph=False, allow_unused=False):
+ ''' 
+ .. note::
+ **This API is ONLY available in the imperative mode.**
+
+ This function computes the batch Hessian matrix of `func` with respect to `inputs`.
+ Noted that the first dimension of inputs is batch size.
+
+ Parameters:
+ func (function): a Python function that takes a Tensor or a Tensor
+ list/tuple as inputs(the first dimension is batch size) and
+ returns a Tensor with shape [batch_size, 1].
+ inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or 
+ Tensor list/tuple of the function ``func``.
+ Noted that the first dimension of inputs is batch size.
+ create_graph (bool, optional): whether to create the gradient graphs
+ of the computing process. When it is True, higher order derivatives
+ are supported to compute; when it is False, the gradient graphs of
+ the computing process would be discarded. Defaults to ``False``.
+ allow_unused (bool, optional): whether to raise error or return None if
+ some Tensors of `inputs` are unreachable in the graph. Error would
+ be raised if allow_unused=False, and None would be returned as
+ their gradients if allow_unused=True. Default False.
+ Returns:
+ Hessian (Tensor or a tuple of tuple of Tensors): if function ``func``
+ takes a Tensor as ``inputs``, Hessian will be a single Tensor containing
+ the Hessian matrix for the linearized ``inputs`` Tensor. If function
+ ``func`` takes a Tensor list/tuple as ``inputs``, then the Hessian will
+ be a tuple of tuple of Tensors. Noted that the first dimension of inputs 
+ is batch size and the execution step is to obtain the result of the 
+ first order differentiation, and then differentiate the batch input.
+
+ For example,
+ the inputs shape and outputs shape of function ``func` is [batch_size, num] 
+ and [batch_size, 1] respectively, then the batched Hessian will be a Tensor with
+ a shape of [num, batch_size * num].
+
+ Why the final shape in this case is that?
+ because batch_hessian will create a inner func(the wrapper of paddle.grad() func)
+ to computes the sum of gradients of `outputs` with respect to each `inputs`,
+ this inner func will get the first order differentiation and shape is [batch_size, num], 
+ then call batch_jacobian to compute jacobian between the first order differentiation
+ and the origin inputs. The final result ``Hessian[i][j]`` will contain the Jacobian 
+ matrix of the ``i``th column output(Noted that this output means the first order 
+ differentiation) and the ``j``th input and will have same dtype and device as the 
+ corresponding input. Other situations can be deduced by analogy.
+
+
+ Examples 1:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x):
+ return paddle.matmul(x * x, weight)[:, 0:1]
+
+
+ x.stop_gradient = False
+ batch_hessian = paddle.autograd.batch_hessian(func, x)
+ print(batch_hessian)
+ # Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[2., 0., 2., 0., 2., 0., 2., 0.],
+ # [0., 2., 0., 2., 0., 2., 0., 2.]])
+
+ Examples 2:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x, y):
+ return paddle.matmul(x * x * y * y, weight)[:, 0:1]
+
+ x.stop_gradient = False
+ y.stop_gradient = False
+ batch_hessian = paddle.autograd.batch_hessian(func, [x, y])
+ print(batch_hessian)
+ # ((Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[2., 0., 2., 0., 2., 0., 2., 0.],
+ # [0., 2., 0., 2., 0., 2., 0., 2.]]), 
+ # Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[4., 0., 4., 0., 4., 0., 4., 0.],
+ # [0., 4., 0., 4., 0., 4., 0., 4.]])), 
+ # (Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[4., 0., 4., 0., 4., 0., 4., 0.],
+ # [0., 4., 0., 4., 0., 4., 0., 4.]]), 
+ # Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[2., 0., 2., 0., 2., 0., 2., 0.],
+ # [0., 2., 0., 2., 0., 2., 0., 2.]])))
+
+
+ Examples 3:
+ .. code-block:: python
+
+ import paddle
+
+ x = paddle.ones(shape=(4, 2), dtype='float64')
+ weight = paddle.ones(shape=(2, 4), dtype='float64')
+ y = paddle.ones(shape=(4, 2), dtype='float64')
+
+ def func(x, y):
+ return paddle.matmul(x * x, weight)[:, 0:1]
+
+ x.stop_gradient = False
+ y.stop_gradient = False
+ batch_hessian = paddle.autograd.batch_hessian(func, [x, y], allow_unused=True)
+ print(batch_hessian)
+ # ((Tensor(shape=[2, 8], dtype=float64, place=CUDAPlace(0), stop_gradient=True,
+ # [[2., 0., 2., 0., 2., 0., 2., 0.],
+ # [0., 2., 0., 2., 0., 2., 0., 2.]]), None), (None, None))
+
+ '''
+ inputs = _tensors(inputs, "inputs")
+ outputs = func(*inputs)
+ batch_size = inputs[0].shape[0]
+ for input in inputs:
+ assert input.shape[
+ 0] == batch_size, "The first dimension of input should equals to the same batch size!"
+ assert isinstance(outputs, paddle.Tensor) and outputs.shape == [
+ batch_size, 1
+ ], "The function to compute batched Hessian matrix should return a Tensor of shape [batch_size, 1]"
+
+ def jac_func(*ins):
+ grad_inputs = grad(
+ outputs,
+ ins,
+ create_graph=True,
+ retain_graph=True,
+ allow_unused=allow_unused)
+ return tuple(
+ _replace_none_with_zero_tensor(grad_inputs[i], inputs[i])
+ for i in range(len(inputs)))
+
+ return batch_jacobian(
+ jac_func, inputs, create_graph=create_graph, allow_unused=allow_unused)
+
+
 @framework.dygraph_only
 def hessian(func, inputs, create_graph=False, allow_unused=False):
  ''' 

diff --git a/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt b/python/paddle/fluid/tests/unittests/autograd/CMakeLists.txt
@@ -6,6 +6,6 @@ foreach(TEST_OP ${TEST_OPS})
  py_test_modules(${TEST_OP} MODULES ${TEST_OP} ENVS ${GC_ENVS})
 endforeach(TEST_OP)
 
-set_tests_properties(test_jacobian PROPERTIES TIMEOUT 20)
+set_tests_properties(test_jacobian PROPERTIES TIMEOUT 50)
 set_tests_properties(test_hessian PROPERTIES TIMEOUT 50)
 set_tests_properties(test_vhp PROPERTIES TIMEOUT 50)