Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion example/distill/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ train_reader = dr.set_sample_list_generator(train_reader)
```
The run student code.
``` python
python train_student.py
python train_with_fleet.py --use_distill_service True
```

## On Kubernetes
Expand Down
10 changes: 5 additions & 5 deletions example/distill/mnist_distill/README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ python train_with_fleet.py --use_distill_service True --distill_teachers 127.0.0
teacher服务使用paddle_serving部署,需保存成serving模型。可以有两种方式获取(详见[如何保存Serving模型](https://github.com/PaddlePaddle/Serving/blob/develop/doc/SAVE.md))。
1. 直接在训练中保存serving模型。
``` bash
python train_with_fleet.py --save_serving_model
python train_with_fleet.py --save_serving_model True
```
保存的代码见[train_with_fleet.py](train_with_fleet.py)。模型输入为img,模型输出为prediction,mnist_model为serving模型的目录
serving_conf为保存的client配置文件。
保存的代码见[train_with_fleet.py](train_with_fleet.py)。模型输入为img,模型输出为prediction。
模型保存到output目录,mnist_model为保存的serving模型,serving_conf为保存的client配置文件。
``` bash
serving_io.save_model("mnist_cnn_model", "serving_conf",
serving_io.save_model("output/mnist_cnn_model", "output/serving_conf",
{img.name: img}, {prediction.name: prediction},
test_program)
```
Expand Down Expand Up @@ -103,7 +103,7 @@ dr.set_dynamic_teacher(discovery_servers, teacher_service_name)
``` python
python -m paddle_edl.distill.redis.balance_server \
--server 127.0.0.1:7001 \
--db_endpoints 127.0.0.1:6379```
--db_endpoints 127.0.0.1:6379
```
#### 4.2 服务注册
在已启动好teacher后,需要往redis数据库注册teacher服务。
Expand Down
24 changes: 13 additions & 11 deletions example/distill/mnist_distill/train_with_fleet.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,11 @@ def train(nn_type,
train_nranks = fleet.worker_num()

optimizer = fluid.optimizer.Momentum(learning_rate=0.001, momentum=0.9)
if train_nranks != 1:
if use_cuda:
optimizer = fleet.distributed_optimizer(optimizer)
optimizer.minimize(loss)

main_program = fleet.main_program if train_nranks != 1 else main_program
main_program = fleet.main_program if use_cuda else main_program
gpu_id = int(os.getenv("FLAGS_selected_gpus", "0"))

def train_test(train_test_program, train_test_reader):
Expand All @@ -183,20 +183,20 @@ def train_test(train_test_program, train_test_reader):
py_train_reader.set_sample_list_generator(train_reader, reader_places)
py_test_reader = fluid.io.DataLoader.from_generator(
feed_list=test_inputs, capacity=64)
py_test_reader.set_sample_list_generator(test_reader, reader_places)
py_test_reader.set_sample_list_generator(test_reader, place)

exe = fluid.Executor(place)
exe.run(startup_program)
epochs = [epoch_id for epoch_id in range(PASS_NUM)]
epochs = [epoch_id for epoch_id in range(NUM_EPOCHS)]

lists = []
step = 0
for epoch_id in epochs:
for step_id, data in enumerate(py_train_reader()):
metrics = exe.run(main_program, feed=data, fetch_list=[loss, acc])
if step % 100 == 0:
print("Pass {}, Epoch {}, Cost {}".format(step, epoch_id,
metrics[0]))
print("Pass {}, Step {}, Cost {}".format(epoch_id, step,
metrics[0].mean()))
step += 1

if train_rank == 0:
Expand All @@ -205,7 +205,7 @@ def train_test(train_test_program, train_test_reader):
train_test_program=test_program,
train_test_reader=py_test_reader)

print("Test with Epoch %d, avg_cost: %s, acc: %s" %
print("Test with Pass %d, avg_cost: %s, acc: %s" %
(epoch_id, avg_loss_val, acc_val))
lists.append((epoch_id, avg_loss_val, acc_val))
if save_dirname is not None:
Expand All @@ -218,15 +218,17 @@ def train_test(train_test_program, train_test_reader):
if train_rank == 0:
if args.save_serving_model:
import paddle_serving_client.io as serving_io
serving_io.save_model("mnist_cnn_model", "serving_conf",
{img.name: img},
if not os.path.isdir('output'):
os.mkdir('output')
serving_io.save_model("output/mnist_cnn_model",
"output/serving_conf", {img.name: img},
{prediction.name: prediction}, test_program)
print('save serving model, feed_names={}, fetch_names={}'.format(
[img.name], [prediction.name]))

# find the best pass
best = sorted(lists, key=lambda list: float(list[1]))[0]
print('Best pass is %s, testing Avgcost is %s' % (best[0], best[1]))
print('Best pass is %s, testing Avg cost is %s' % (best[0], best[1]))
print('The classification accuracy is %.2f%%' % (float(best[2]) * 100))


Expand Down Expand Up @@ -292,7 +294,7 @@ def main(use_cuda, nn_type):
if __name__ == '__main__':
args = parse_args()
BATCH_SIZE = 64
PASS_NUM = args.num_epochs
NUM_EPOCHS = args.num_epochs
use_cuda = args.use_gpu
# predict = 'softmax_regression' # uncomment for Softmax
#predict = 'multilayer_perceptron' # uncomment for MLP
Expand Down