elasticdeeplearning · gongweibao · Jun 11, 2020 · Jun 11, 2020
diff --git a/example/distill/README.md b/example/distill/README.md
@@ -82,7 +82,7 @@ train_reader = dr.set_sample_list_generator(train_reader)
 ``` 
 The run student code.
 ``` python
-python train_student.py
+python train_with_fleet.py --use_distill_service True
 ```
 
 ## On Kubernetes

diff --git a/example/distill/mnist_distill/README_CN.md b/example/distill/mnist_distill/README_CN.md
@@ -43,12 +43,12 @@ python train_with_fleet.py --use_distill_service True --distill_teachers 127.0.0
 teacher服务使用paddle_serving部署，需保存成serving模型。可以有两种方式获取(详见[如何保存Serving模型](https://github.com/PaddlePaddle/Serving/blob/develop/doc/SAVE.md))。
 1. 直接在训练中保存serving模型。
 ``` bash
-python train_with_fleet.py --save_serving_model
+python train_with_fleet.py --save_serving_model True
 ```
-保存的代码见[train_with_fleet.py](train_with_fleet.py)。模型输入为img，模型输出为prediction，mnist_model为serving模型的目录。
-serving_conf为保存的client配置文件。
+保存的代码见[train_with_fleet.py](train_with_fleet.py)。模型输入为img，模型输出为prediction。
+模型保存到output目录，mnist_model为保存的serving模型，serving_conf为保存的client配置文件。
 ``` bash
-serving_io.save_model("mnist_cnn_model", "serving_conf",
+serving_io.save_model("output/mnist_cnn_model", "output/serving_conf",
  {img.name: img}, {prediction.name: prediction},
  test_program)
 ```
@@ -103,7 +103,7 @@ dr.set_dynamic_teacher(discovery_servers, teacher_service_name)
 ``` python
 python -m paddle_edl.distill.redis.balance_server \
  --server 127.0.0.1:7001 \
- --db_endpoints 127.0.0.1:6379```
+ --db_endpoints 127.0.0.1:6379
 ```
 #### 4.2 服务注册
 在已启动好teacher后，需要往redis数据库注册teacher服务。

diff --git a/example/distill/mnist_distill/train_with_fleet.py b/example/distill/mnist_distill/train_with_fleet.py
@@ -154,11 +154,11 @@ def train(nn_type,
  train_nranks = fleet.worker_num()
 
  optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
- if train_nranks != 1:
+ if use_cuda:
  optimizer = fleet.distributed_optimizer(optimizer)
  optimizer.minimize(loss)
 
- main_program = fleet.main_program if train_nranks != 1 else main_program
+ main_program = fleet.main_program if use_cuda else main_program
  gpu_id = int(os.getenv("FLAGS_selected_gpus", "0"))
 
  def train_test(train_test_program, train_test_reader):
@@ -183,20 +183,20 @@ def train_test(train_test_program, train_test_reader):
  py_train_reader.set_sample_list_generator(train_reader, reader_places)
  py_test_reader = fluid.io.DataLoader.from_generator(
  feed_list=test_inputs, capacity=64)
- py_test_reader.set_sample_list_generator(test_reader, reader_places)
+ py_test_reader.set_sample_list_generator(test_reader, place)
 
  exe = fluid.Executor(place)
  exe.run(startup_program)
- epochs = [epoch_id for epoch_id in range(PASS_NUM)]
+ epochs = [epoch_id for epoch_id in range(NUM_EPOCHS)]
 
  lists = []
  step = 0
  for epoch_id in epochs:
  for step_id, data in enumerate(py_train_reader()):
  metrics = exe.run(main_program, feed=data, fetch_list=[loss, acc])
  if step % 100 == 0:
- print("Pass {}, Epoch {}, Cost {}".format(step, epoch_id,
-  metrics[0]))
+ print("Pass {}, Step {}, Cost {}".format(epoch_id, step,
+ metrics[0].mean()))
  step += 1
 
  if train_rank == 0:
@@ -205,7 +205,7 @@ def train_test(train_test_program, train_test_reader):
  train_test_program=test_program,
  train_test_reader=py_test_reader)
 
- print("Test with Epoch %d, avg_cost: %s, acc: %s" %
+ print("Test with Pass %d, avg_cost: %s, acc: %s" %
  (epoch_id, avg_loss_val, acc_val))
  lists.append((epoch_id, avg_loss_val, acc_val))
  if save_dirname is not None:
@@ -218,15 +218,17 @@ def train_test(train_test_program, train_test_reader):
  if train_rank == 0:
  if args.save_serving_model:
  import paddle_serving_client.io as serving_io
- serving_io.save_model("mnist_cnn_model", "serving_conf",
- {img.name: img},
+ if not os.path.isdir('output'):
+ os.mkdir('output')
+ serving_io.save_model("output/mnist_cnn_model",
+ "output/serving_conf", {img.name: img},
  {prediction.name: prediction}, test_program)
  print('save serving model, feed_names={}, fetch_names={}'.format(
  [img.name], [prediction.name]))
 
  # find the best pass
  best = sorted(lists, key=lambda list: float(list[1]))[0]
- print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1]))
+ print('Best pass is %s, testing Avg cost is %s' % (best[0], best[1]))
  print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))
 
 
@@ -292,7 +294,7 @@ def main(use_cuda, nn_type):
 if __name__ == '__main__':
  args = parse_args()
  BATCH_SIZE = 64
- PASS_NUM = args.num_epochs
+ NUM_EPOCHS = args.num_epochs
  use_cuda = args.use_gpu
  # predict = 'softmax_regression' # uncomment for Softmax
  #predict = 'multilayer_perceptron' # uncomment for MLP