PaddlePaddle
diff --git a/‎python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py‎
Lines changed: 59 additions & 0 deletions b/‎python/paddle/fluid/tests/unittests/hybrid_parallel_mp_fp16.py‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py‎
Lines changed: 3 additions & 0 deletions b/‎python/paddle/fluid/tests/unittests/test_parallel_dygraph_tensor_parallel.py‎
Lines changed: 3 additions & 0 deletions
@@ -0,0 +1,59 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import numpy as np
+from hybrid_parallel_mp_model import TestDistMPTraning
+import paddle.distributed.fleet as fleet
+import unittest
+
+
+class TestMPFP16(TestDistMPTraning):
+ def build_optimizer(self, model):
+ grad_clip = paddle.nn.ClipGradByGlobalNorm(1.0)
+ scheduler = paddle.optimizer.lr.ExponentialDecay(
+ learning_rate=0.001, gamma=0.999, verbose=True)
+ optimizer = paddle.optimizer.SGD(scheduler,
+ grad_clip=grad_clip,
+ parameters=model.parameters())
+
+ model, optimizer = paddle.amp.decorate(
+ models=model,
+ optimizers=optimizer,
+ level='O2',
+ save_dtype='float32')
+
+ return optimizer
+
+ def train_batch(self, batch, model, optimizer, is_mp):
+ scaler = paddle.amp.GradScaler(init_loss_scaling=5160)
+ if is_mp:
+ scaler = fleet.distributed_scaler(scaler)
+ with paddle.amp.auto_cast(enable=True, level="O2"):
+ output = model(batch)
+ loss = output.mean()
+
+ scaled = scaler.scale(loss)
+ scaled.backward()
+ scaler.step(optimizer)
+ scaler.update()
+ optimizer.clear_grad()
+ return scaled
+
+
+if __name__ == "__main__":
+ unittest.main()
@@ -30,6 +30,9 @@ def test_hybrid_parallel_mp_model(self):
  def test_hybrid_parallel_mp_amp(self):
  self.run_mnist_2gpu('hybrid_parallel_mp_amp.py')
 
+ def test_hybrid_parallel_mp_fp16(self):
+ self.run_mnist_2gpu('hybrid_parallel_mp_fp16.py')
+
  def test_hybrid_parallel_mp_clip_grad(self):
  self.run_mnist_2gpu('hybrid_parallel_mp_clip_grad.py')