-
Couldn't load subscription status.
- Fork 5.9k
Add polynomial_decay and piecewise_decay #8013
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9115017 b591ac7 7d09fe6 8652da8 e804f06 9ee8f77 9ae65c4 3d5b807 0b4e4c9 7af4dda 5a659e8 bdfb835 33fcaed 83e1bc9 5fe5936 942bdcb 9d1385b 410db57 511cb49 2af2a18 d0f2928 e91c85d 1e6f229 33079d9 04e8a23 c29a1cc 7d86a0c 061f0b1 835dc2f d3e148f 60a45f8 7b69b0b 0217065 8f02fdf 06d87f9 edacca8 c2d3207 59a814a 7fd322a 31aa827 003ed1a File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -15,7 +15,10 @@ | |
| import layers | ||
| from framework import Variable | ||
| | ||
| __all__ = ['exponential_decay', 'natural_exp_decay', 'inverse_time_decay'] | ||
| __all__ = [ | ||
| 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', | ||
| 'polynomial_decay', 'piecewise_decay' | ||
| ] | ||
| """ | ||
| When training a model, it's often useful to decay the | ||
| learning rate during training process, this is called | ||
| | @@ -101,7 +104,7 @@ def inverse_time_decay(learning_rate, | |
| ```python | ||
| if staircase: | ||
| decayed_learning_rate = learning_rate / (1 + decay_rate * floor(global_step / decay_step)) | ||
| else | ||
| else: | ||
| decayed_learning_rate = learning_rate / (1 + decay_rate * global_step / decay_step) | ||
| ``` | ||
| Args: | ||
| | @@ -123,3 +126,98 @@ def inverse_time_decay(learning_rate, | |
| div_res = layers.floor(x=div_res) | ||
| | ||
| return learning_rate / (1 + decay_rate * div_res) | ||
| | ||
| | ||
| def polynomial_decay(learning_rate, | ||
| global_step, | ||
| decay_steps, | ||
| end_learning_rate=0.0001, | ||
| power=1.0, | ||
| cycle=False): | ||
| """Applies polynomial decay to the initial learning rate. | ||
| | ||
| ```python | ||
| if cycle: | ||
| decay_steps = decay_steps * ceil(global_step / decay_steps) | ||
| else: | ||
| global_step = min(global_step, decay_steps) | ||
| decayed_learning_rate = (learning_rate - end_learning_rate) * | ||
| (1 - global_step / decay_steps) ^ power + | ||
| end_learning_rate | ||
| ``` | ||
| Args: | ||
| learning_rate: A scalar float32 value or a Variable. This | ||
| will be the initial learning rate during training | ||
| global_step: A Variable that record the training step. | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, where is the global_step variable created? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be create outside optimizer. like global_step = fluid.layers.create_global_var(shape=[1], value=0, dtype='float32', force_cpu=True) sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.learning_rate_decay.exponential_decay( learning_rate=0.0001, global_step=global_step, decay_steps=100000, decay_rate=0.5, staircase=True), global_step=global_step)There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If global_step variable is force cpu, does decay_steps also force cpu? | ||
| decay_steps: A Python `int32` number. | ||
| end_learning_rate: A Python `float` number. | ||
| power: A Python `float` number | ||
| cycle: Boolean. If set true, decay the learning rate every decay_steps. | ||
| | ||
| Returns: | ||
| The decayed learning rate | ||
| """ | ||
| if not isinstance(global_step, Variable): | ||
| raise ValueError("global_step is required for inverse_time_decay.") | ||
| | ||
| if cycle: | ||
| div_res = layers.ceil(x=(global_step / decay_steps)) | ||
| zero_var = layers.fill_constant(shape=[1], dtype='float32', value=0.0) | ||
| one_var = layers.fill_constant(shape=[1], dtype='float32', value=1.0) | ||
| | ||
| with layers.Switch() as switch: | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, our switch operator only support scalar condition now? And our if-else operator support vector condition. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, the current switch operator only supports scalar condition. because it uses conditional_block, it's easy to support tensor as input in the future. | ||
| with switch.case(layers.equal(x=global_step, y=zero_var)): | ||
| layers.assign(input=one_var, output=div_res) | ||
| decay_steps = decay_steps * div_res | ||
| else: | ||
| decay_steps_var = layers.fill_constant( | ||
| shape=[1], dtype='float32', value=float(decay_steps)) | ||
| global_step = layers.elementwise_min(x=global_step, y=decay_steps_var) | ||
| | ||
| return (learning_rate - end_learning_rate) * \ | ||
| ((1 - global_step / decay_steps) ** power) + end_learning_rate | ||
| | ||
| | ||
| def piecewise_decay(global_step, boundaries, values): | ||
| """Applies piecewise decay to the initial learning rate. | ||
| | ||
| ```python | ||
| boundaries = [10000, 20000] | ||
| values = [1.0, 0.5, 0.1] | ||
| | ||
| if step < 10000: | ||
| learning_rate = 1.0 | ||
| elif step >= 10000 and step < 20000: | ||
| learning_rate = 0.5 | ||
| else: | ||
| learning_rate = 0.1 | ||
| ``` | ||
| """ | ||
| | ||
| if len(values) - len(boundaries) != 1: | ||
| raise ValueError("len(values) - len(boundaries) should be 1") | ||
| | ||
| if not isinstance(global_step, Variable): | ||
| raise ValueError("global_step is required for piecewise_decay.") | ||
| | ||
| lr = layers.create_global_var( | ||
| shape=[1], | ||
| value=0.0, | ||
| dtype='float32', | ||
| persistable=True, | ||
| name="learning_rate") | ||
| | ||
| with layers.Switch() as switch: | ||
| for i in range(len(boundaries)): | ||
| boundary_val = layers.fill_constant( | ||
| shape=[1], dtype='float32', value=float(boundaries[i])) | ||
| value_var = layers.fill_constant( | ||
| shape=[1], dtype='float32', value=float(values[i])) | ||
| with switch.case(layers.less_than(global_step, boundary_val)): | ||
| layers.assign(value_var, lr) | ||
| last_value_var = layers.fill_constant( | ||
| shape=[1], dtype='float32', value=float(values[len(values) - 1])) | ||
| with switch.default(): | ||
| layers.assign(last_value_var, lr) | ||
| | ||
| return lr | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Equal is a element-wise operator. And it also can be override in Python.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I find that our equal is not like python eq. Python eq will return a bool, but our equal will return a vector.