PaddlePaddle
diff --git a/‎python/paddle/v2/framework/framework.py‎
Lines changed: 2 additions & 0 deletions b/‎python/paddle/v2/framework/framework.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎python/paddle/v2/framework/optimizer.py‎
Lines changed: 3 additions & 0 deletions b/‎python/paddle/v2/framework/optimizer.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎python/paddle/v2/framework/regularizer.py‎
Lines changed: 99 additions & 0 deletions b/‎python/paddle/v2/framework/regularizer.py‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎python/paddle/v2/framework/tests/test_regularizer.py‎
Lines changed: 43 additions & 0 deletions b/‎python/paddle/v2/framework/tests/test_regularizer.py‎
Lines changed: 43 additions & 0 deletions
@@ -505,6 +505,8 @@ def __init__(self, block, shape, dtype, **kwargs):
 
  self.optimize_attr = kwargs.get('optimize_attr', {'learning_rate': 1.0})
 
+ self.regularizer = kwargs.get('regularizer', None)
+
 
 # program is a global instance.
 g_program = Program()
 
@@ -2,6 +2,7 @@
 
 import paddle.v2.framework.framework as framework
 from paddle.v2.framework.backward import append_backward_ops
+from paddle.v2.framework.regularizer import append_regularization_ops
 
 __all__ = [
  'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer',
@@ -161,6 +162,8 @@ def minimize(self, loss, parameter_list=None, no_grad_set=None):
  """
  params_grads = append_backward_ops(loss, parameter_list, no_grad_set or
  set())
+ # Add regularization if any 
+ params_grads = append_regularization_ops(params_grads)
  optimize_ops = self.create_optimization_pass(params_grads, loss)
  return optimize_ops
 
 
@@ -0,0 +1,99 @@
+import paddle.v2.framework.framework as framework
+
+__all__ = ['append_regularization_ops', 'L2DecayRegularizer']
+
+
+def append_regularization_ops(parameters_and_grads):
+ """Create and add backward regularization Operators
+
+ Creates and adds backward regularization operators in the BlockDesc.
+ This will add gradients of the regularizer function to the gradients
+ of the parameters and return these modified gradients. This is the
+ same as implementing weight decay in optimizers for regularization.
+
+ Args:
+ parameters_and_grads: A list of (parameters, gradients) pairs
+ that need to be regularized.
+
+ Returns:
+ list of (parameters, gradients) pair with the regularized gradient
+
+ Raises:
+ Exception: Unknown regularization type
+ """
+ params_and_grads = []
+ for param, grad in parameters_and_grads:
+ # If no gradient or no regularization specified,
+ # then we don't need to do anything
+ if grad is None or param.regularizer is None:
+ params_and_grads.append((param, grad))
+ continue
+
+ # Add variable for regularization term in grad block
+ regularization_term = param.regularizer(param, grad.block)
+ assert grad.shape == regularization_term.shape
+
+ grad.block.append_op(
+ type='elementwise_add',
+ inputs={"X": grad,
+ "Y": regularization_term},
+ outputs={"Out": grad})
+ params_and_grads.append((param, grad))
+
+ return params_and_grads
+
+
+class WeightDecayRegularizer(object):
+ """Base class for weight decay regularizers
+
+ Defines the common interface of weight-decay regularizers.
+ Weight-decay regularizers are added only during the backward
+ pass for faster regularization. They add operations to the network
+ that correspond to gradient of the regularization function.
+ Users should not use this class directly, but need to use one
+ of its implementations
+ """
+
+ def __init__(self):
+ pass
+
+ def __call__(self, param, block):
+ """Add corresponding weight decay operations to the network
+ """
+ raise NotImplementedError()
+
+
+class L2DecayRegularizer(WeightDecayRegularizer):
+ """Implements the L2 Weight Decay Regularization
+ """
+
+ def __init__(self, regularization_coeff=0.0):
+ assert regularization_coeff is not None
+ super(L2DecayRegularizer, self).__init__()
+ self._regularization_coeff = regularization_coeff
+
+ def __call__(self, param, block):
+ """Add L2 weight decay ops to network
+
+ Adds L2 weight decay ops.
+ L2WeightDecay = reg_coeff * parameter
+
+ Args:
+ param: parameter variable for which regularization is applied
+ block: block in which variable is to be created
+
+ Returns:
+ new variable for weight decay
+ """
+ assert isinstance(param, framework.Parameter)
+ assert isinstance(block, framework.Block)
+ decay = block.create_var(
+ dtype="float32", shape=param.shape, lod_level=param.lod_level)
+ # Append Op to calculate decay
+ block.append_op(
+ type='scale',
+ inputs={"X": param},
+ outputs={"Out": decay},
+ attrs={"scale": self._regularization_coeff})
+
+ return decay
@@ -0,0 +1,43 @@
+import unittest
+
+import paddle.v2.framework.framework as framework
+import paddle.v2.framework.optimizer as optimizer
+import paddle.v2.framework.regularizer as regularizer
+from paddle.v2.framework.backward import append_backward_ops
+
+
+class TestL2DecayRegularizer(unittest.TestCase):
+ def test_l2decay_regularizer(self):
+ program = framework.Program()
+ block = program.global_block()
+ mul_x = block.create_parameter(
+ dtype="float32",
+ shape=[5, 10],
+ lod_level=0,
+ name="mul.x",
+ regularizer=regularizer.L2DecayRegularizer(0.5))
+ self.assertTrue(mul_x.regularizer is not None)
+ self.assertTrue(
+ isinstance(mul_x.regularizer, regularizer.L2DecayRegularizer))
+ mul_y = block.create_var(
+ dtype="float32", shape=[10, 8], lod_level=0, name="mul.y")
+ mul_out = block.create_var(
+ dtype="float32", shape=[5, 8], lod_level=0, name="mul.out")
+ block.append_op(
+ type="mul",
+ inputs={"X": mul_x,
+ "Y": mul_y},
+ outputs={"Out": mul_out},
+ attrs={"x_num_col_dims": 1})
+ params_grads = append_backward_ops(mul_out)
+ self.assertEqual(len(params_grads), 1)
+ count_ops = len(block.ops)
+ params_grads = optimizer.append_regularization_ops(params_grads)
+ self.assertEqual(len(params_grads), 1)
+ self.assertEqual(len(block.ops), count_ops + 2)
+ self.assertEqual(block.ops[-1].type, 'elementwise_add')
+ self.assertEqual(block.ops[-2].type, 'scale')
+
+
+if __name__ == '__main__':
+ unittest.main()