|  | 
|  | 1 | +import paddle.v2.framework.framework as framework | 
|  | 2 | + | 
|  | 3 | +__all__ = ['append_regularization_ops', 'L2DecayRegularizer'] | 
|  | 4 | + | 
|  | 5 | + | 
|  | 6 | +def append_regularization_ops(parameters_and_grads): | 
|  | 7 | + """Create and add backward regularization Operators | 
|  | 8 | +
 | 
|  | 9 | + Creates and adds backward regularization operators in the BlockDesc. | 
|  | 10 | + This will add gradients of the regularizer function to the gradients | 
|  | 11 | + of the parameters and return these modified gradients. This is the | 
|  | 12 | + same as implementing weight decay in optimizers for regularization. | 
|  | 13 | +
 | 
|  | 14 | + Args: | 
|  | 15 | + parameters_and_grads: A list of (parameters, gradients) pairs | 
|  | 16 | + that need to be regularized. | 
|  | 17 | +
 | 
|  | 18 | + Returns: | 
|  | 19 | + list of (parameters, gradients) pair with the regularized gradient | 
|  | 20 | +
 | 
|  | 21 | + Raises: | 
|  | 22 | + Exception: Unknown regularization type | 
|  | 23 | + """ | 
|  | 24 | + params_and_grads = [] | 
|  | 25 | + for param, grad in parameters_and_grads: | 
|  | 26 | + # If no gradient or no regularization specified, | 
|  | 27 | + # then we don't need to do anything | 
|  | 28 | + if grad is None or param.regularizer is None: | 
|  | 29 | + params_and_grads.append((param, grad)) | 
|  | 30 | + continue | 
|  | 31 | + | 
|  | 32 | + # Add variable for regularization term in grad block | 
|  | 33 | + regularization_term = param.regularizer(param, grad.block) | 
|  | 34 | + assert grad.shape == regularization_term.shape | 
|  | 35 | + | 
|  | 36 | + grad.block.append_op( | 
|  | 37 | + type='elementwise_add', | 
|  | 38 | + inputs={"X": grad, | 
|  | 39 | + "Y": regularization_term}, | 
|  | 40 | + outputs={"Out": grad}) | 
|  | 41 | + params_and_grads.append((param, grad)) | 
|  | 42 | + | 
|  | 43 | + return params_and_grads | 
|  | 44 | + | 
|  | 45 | + | 
|  | 46 | +class WeightDecayRegularizer(object): | 
|  | 47 | + """Base class for weight decay regularizers | 
|  | 48 | +
 | 
|  | 49 | + Defines the common interface of weight-decay regularizers. | 
|  | 50 | + Weight-decay regularizers are added only during the backward | 
|  | 51 | + pass for faster regularization. They add operations to the network | 
|  | 52 | + that correspond to gradient of the regularization function. | 
|  | 53 | + Users should not use this class directly, but need to use one | 
|  | 54 | + of its implementations | 
|  | 55 | + """ | 
|  | 56 | + | 
|  | 57 | + def __init__(self): | 
|  | 58 | + pass | 
|  | 59 | + | 
|  | 60 | + def __call__(self, param, block): | 
|  | 61 | + """Add corresponding weight decay operations to the network | 
|  | 62 | + """ | 
|  | 63 | + raise NotImplementedError() | 
|  | 64 | + | 
|  | 65 | + | 
|  | 66 | +class L2DecayRegularizer(WeightDecayRegularizer): | 
|  | 67 | + """Implements the L2 Weight Decay Regularization | 
|  | 68 | + """ | 
|  | 69 | + | 
|  | 70 | + def __init__(self, regularization_coeff=0.0): | 
|  | 71 | + assert regularization_coeff is not None | 
|  | 72 | + super(L2DecayRegularizer, self).__init__() | 
|  | 73 | + self._regularization_coeff = regularization_coeff | 
|  | 74 | + | 
|  | 75 | + def __call__(self, param, block): | 
|  | 76 | + """Add L2 weight decay ops to network | 
|  | 77 | +
 | 
|  | 78 | + Adds L2 weight decay ops. | 
|  | 79 | + L2WeightDecay = reg_coeff * parameter | 
|  | 80 | +
 | 
|  | 81 | + Args: | 
|  | 82 | + param: parameter variable for which regularization is applied | 
|  | 83 | + block: block in which variable is to be created | 
|  | 84 | +
 | 
|  | 85 | + Returns: | 
|  | 86 | + new variable for weight decay | 
|  | 87 | + """ | 
|  | 88 | + assert isinstance(param, framework.Parameter) | 
|  | 89 | + assert isinstance(block, framework.Block) | 
|  | 90 | + decay = block.create_var( | 
|  | 91 | + dtype="float32", shape=param.shape, lod_level=param.lod_level) | 
|  | 92 | + # Append Op to calculate decay | 
|  | 93 | + block.append_op( | 
|  | 94 | + type='scale', | 
|  | 95 | + inputs={"X": param}, | 
|  | 96 | + outputs={"Out": decay}, | 
|  | 97 | + attrs={"scale": self._regularization_coeff}) | 
|  | 98 | + | 
|  | 99 | + return decay | 
0 commit comments