Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions python/paddle/nn/functional/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

import paddle
from paddle import _C_ops, in_dynamic_mode
Expand Down Expand Up @@ -150,14 +150,18 @@ def elu_(x: Tensor, alpha: float = 1.0, name: str | None = None) -> Tensor:


def gelu(
x: Tensor, approximate: bool = False, name: str | None = None
x: Tensor,
approximate: Literal["tanh", "none"] | bool = False,
name: str | None = None,
) -> Tensor:
r"""
gelu activation.

The activation function of Gelu is calculated element by element. More information refers to :ref: `Gaussian Error Linear Units`.

if approximate is True
approximate parameter must be True, False, "tanh", "none".

if approximate is True or "tanh"

.. math::

Expand All @@ -171,7 +175,7 @@ def gelu(

Parameters:
x (Tensor): The input Tensor with data type float32, float64.
approximate (bool, optional): Whether to enable approximation. Default is False.
approximate (str|bool, optional): Whether to enable approximation. Default is False.
name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.

Returns:
Expand All @@ -194,8 +198,23 @@ def gelu(
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15880796, 0.34571400],
[ 0.84119201, 1.39957154]])
>>> out3 = F.gelu(x, "none")
>>> print(out3)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
[ 0.84134471, 1.39978933]])
>>> out4 = F.gelu(x, "tanh")
>>> print(out4)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15880796, 0.34571400],
[ 0.84119201, 1.39957154]])
"""

if approximate == "tanh":
approximate = True
elif approximate == "none":
approximate = False

if in_dynamic_or_pir_mode():
return _C_ops.gelu(x, approximate)
else:
Expand Down
30 changes: 26 additions & 4 deletions python/paddle/nn/layer/activation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Literal

from paddle.framework import get_default_dtype

Expand Down Expand Up @@ -176,7 +176,9 @@ class GELU(Layer):
r"""
GELU Activation.

If approximate is True
approximate parameter must be True, False, "tanh", "none".

If approximate is True or "tanh"

.. math::

Expand All @@ -189,7 +191,7 @@ class GELU(Layer):
GELU(x) = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))

Parameters:
approximate (bool, optional): Whether to enable approximation. Default is False.
approximate (str|bool, optional): Whether to enable approximation. Default is False.
name (str|None, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.

Expand All @@ -208,6 +210,24 @@ class GELU(Layer):
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
[ 0.84134471, 1.39978933]])
>>> m = paddle.nn.GELU(False)
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
[ 0.84134471, 1.39978933]])
>>> m = paddle.nn.GELU("none")
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15865529, 0.34573123],
[ 0.84134471, 1.39978933]])
>>> m = paddle.nn.GELU("tanh")
>>> out = m(x)
>>> print(out)
Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
[[-0.15880796, 0.34571400],
[ 0.84119201, 1.39957154]])
>>> m = paddle.nn.GELU(True)
>>> out = m(x)
>>> print(out)
Expand All @@ -217,7 +237,9 @@ class GELU(Layer):
"""

def __init__(
self, approximate: bool = False, name: str | None = None
self,
approximate: Literal["tanh", "none"] | bool = False,
name: str | None = None,
) -> None:
super().__init__()
self._approximate = approximate
Expand Down
152 changes: 123 additions & 29 deletions test/legacy_test/test_gelu_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
import paddle
import paddle.base.dygraph as dg
import paddle.nn.functional as F
from paddle import base
from paddle import base, nn


def gelu(x, approximate):
if approximate == "tanh":
approximate = True
if approximate == "none":
approximate = False
if approximate:
y_ref = (
0.5
Expand All @@ -46,9 +50,14 @@ def _test_case1_cpu(self, approximate):
place = base.CPUPlace()
with dg.guard(place) as g:
x_var = paddle.to_tensor(x)
y_var = F.gelu(x_var, approximate)
y_test = y_var.numpy()
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
y_var1 = F.gelu(x_var, approximate)
y_test1 = y_var1.numpy()

func = nn.GELU(approximate)
y_var2 = func(x_var)
y_test2 = y_var2.numpy()
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)

def _test_case1_gpu(self, approximate):
x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
Expand All @@ -57,12 +66,17 @@ def _test_case1_gpu(self, approximate):
place = base.CUDAPlace(0)
with dg.guard(place) as g:
x_var = paddle.to_tensor(x)
y_var = F.gelu(x_var, approximate)
y_test = y_var.numpy()
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
y_var1 = F.gelu(x_var, approximate)
y_test1 = y_var1.numpy()

func = nn.GELU(approximate)
y_var2 = func(x_var)
y_test2 = y_var2.numpy()
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)

def test_cases(self):
for approximate in [True, False]:
for approximate in [True, False, "none", "tanh"]:
self._test_case1_cpu(approximate)
if base.is_compiled_with_cuda():
self._test_case1_gpu(approximate)
Expand All @@ -86,15 +100,36 @@ def run_gelu_op(approximate):
x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
return y.numpy(), x_grad.numpy()

def run_gelu_class(approximate):
with dg.guard():
x = paddle.to_tensor(x_np)
x.stop_gradient = False
func = nn.GELU(approximate=approximate)
y = func(x)
x_grad = paddle.grad([y], [x], [paddle.to_tensor(y_g_np)])[0]
return y.numpy(), x_grad.numpy()

use_fast_math(True)
y_fast_math, x_g_fast_math = run_gelu_op(True)
y_fast_math1, x_g_fast_math1 = run_gelu_op(True)
y_fast_math2, x_g_fast_math2 = run_gelu_class(True)
use_fast_math(False)

y_ref, x_g_ref = run_gelu_op(True)
np.testing.assert_allclose(y_ref, y_fast_math, rtol=1e-05, atol=0.0005)
y_ref1, x_g_ref1 = run_gelu_op(True)
y_ref2, x_g_ref2 = run_gelu_class(True)
np.testing.assert_allclose(
y_ref1, y_fast_math1, rtol=1e-05, atol=0.0005
)

np.testing.assert_allclose(
x_g_ref1, x_g_fast_math1, rtol=1e-05, atol=0.0005
)

np.testing.assert_allclose(
y_ref2, y_fast_math2, rtol=1e-05, atol=0.0005
)

np.testing.assert_allclose(
x_g_ref, x_g_fast_math, rtol=1e-05, atol=0.0005
x_g_ref2, x_g_fast_math2, rtol=1e-05, atol=0.0005
)


Expand All @@ -105,38 +140,97 @@ def _test_case1_cpu(self, approximate):

place = base.CPUPlace()
with dg.guard(place) as g:
x_var = paddle.to_tensor(x)
x_var.stop_gradient = False
y_var = F.gelu(x_var, approximate)
y_test = y_var.numpy()
x_var1 = paddle.to_tensor(x)
x_var2 = paddle.to_tensor(x)

x_var1.stop_gradient = False
x_var2.stop_gradient = False

y_var1 = F.gelu(x_var1, approximate)
y_test1 = y_var1.numpy()

func = nn.GELU(approximate)
y_var2 = func(x_var2)
y_test2 = y_var2.numpy()

loss = paddle.sum(y_var)
loss.backward()
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var.grad.shape, x_var.shape)
loss1 = paddle.sum(y_var1)
loss1.backward()

loss2 = paddle.sum(y_var2)
loss2.backward()
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var1.grad.shape, x_var1.shape)

np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var2.grad.shape, x_var2.shape)

def _test_case1_gpu(self, approximate):
x = np.random.uniform(-1, 1, size=(0, 17)).astype(np.float32)
y_ref = gelu(x, approximate)

place = base.CUDAPlace(0)
with dg.guard(place) as g:
x_var = paddle.to_tensor(x)
x_var.stop_gradient = False
y_var = F.gelu(x_var, approximate)
y_test = y_var.numpy()
x_var1 = paddle.to_tensor(x)
x_var2 = paddle.to_tensor(x)

x_var1.stop_gradient = False
x_var2.stop_gradient = False

y_var1 = F.gelu(x_var1, approximate)
y_test1 = y_var1.numpy()

loss = paddle.sum(y_var)
loss.backward()
np.testing.assert_allclose(y_ref, y_test, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var.grad.shape, x_var.shape)
func = nn.GELU(approximate)
y_var2 = func(x_var2)
y_test2 = y_var2.numpy()

loss1 = paddle.sum(y_var1)
loss1.backward()

loss2 = paddle.sum(y_var2)
loss2.backward()
np.testing.assert_allclose(y_ref, y_test1, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var1.grad.shape, x_var1.shape)

np.testing.assert_allclose(y_ref, y_test2, rtol=1e-05, atol=1e-08)
np.testing.assert_allclose(x_var2.grad.shape, x_var2.shape)

def test_cases(self):
for approximate in [True, False]:
for approximate in [True, False, "none", "tanh"]:
self._test_case1_cpu(approximate)
if base.is_compiled_with_cuda():
self._test_case1_gpu(approximate)


class TestGeluError(unittest.TestCase):

def setUp(self):
x = np.random.uniform(-1, 1, size=(11, 17)).astype(np.float32)
self.x = paddle.to_tensor(x)

def test_gelu_op_error(self):

def test_type_error1():
y = F.gelu(self.x, "tan")

def test_type_error2():
y = F.gelu(self.x, 1234)

self.assertRaises(TypeError, test_type_error1)
self.assertRaises(TypeError, test_type_error2)

def test_gelu_class_error(self):

def test_type_error1():
func = nn.GELU("tan")
y = func(self.x)

def test_type_error2():
func = nn.GELU(1234)
y = func(self.x)

self.assertRaises(TypeError, test_type_error1)
self.assertRaises(TypeError, test_type_error2)


if __name__ == '__main__':
unittest.main()