Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 56 additions & 16 deletions fluid/ocr_recognition/crnn_ctc_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ def conv_bn_pool(input,
bias_attr=bias,
is_test=is_test)
tmp = fluid.layers.pool2d(
input=tmp, pool_size=2, pool_type='max', pool_stride=2, use_cudnn=True)
input=tmp,
pool_size=2,
pool_type='max',
pool_stride=2,
use_cudnn=True,
ceil_mode=True)

return tmp

Expand Down Expand Up @@ -136,26 +141,61 @@ def encoder_net(images,
def ctc_train_net(images, label, args, num_classes):
regularizer = fluid.regularizer.L2Decay(args.l2)
gradient_clip = None
fc_out = encoder_net(
images,
num_classes,
regularizer=regularizer,
gradient_clip=gradient_clip)
if args.parallel:
places = fluid.layers.get_places()
pd = fluid.layers.ParallelDo(places)
with pd.do():
images_ = pd.read_input(images)
label_ = pd.read_input(label)

fc_out = encoder_net(
images_,
num_classes,
regularizer=regularizer,
gradient_clip=gradient_clip)

cost = fluid.layers.warpctc(
input=fc_out,
label=label_,
blank=num_classes,
norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)

decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)

pd.write_output(sum_cost)
pd.write_output(decoded_out)

sum_cost, decoded_out = pd()
sum_cost = fluid.layers.reduce_sum(sum_cost)

else:
fc_out = encoder_net(
images,
num_classes,
regularizer=regularizer,
gradient_clip=gradient_clip)

cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)
decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)

cost = fluid.layers.warpctc(
input=fc_out, label=label, blank=num_classes, norm_by_times=True)
sum_cost = fluid.layers.reduce_sum(cost)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)

inference_program = fluid.default_main_program().clone()
with fluid.program_guard(inference_program):
inference_program = fluid.io.get_inference_program(error_evaluator)

optimizer = fluid.optimizer.Momentum(
learning_rate=args.learning_rate, momentum=args.momentum)
optimizer.minimize(sum_cost)
_, params_grads = optimizer.minimize(sum_cost)

decoded_out = fluid.layers.ctc_greedy_decoder(
input=fc_out, blank=num_classes)
casted_label = fluid.layers.cast(x=label, dtype='int64')
error_evaluator = fluid.evaluator.EditDistance(
input=decoded_out, label=casted_label)
return sum_cost, error_evaluator
return sum_cost, error_evaluator, inference_program


def ctc_infer(images, num_classes):
Expand Down
10 changes: 3 additions & 7 deletions fluid/ocr_recognition/ctc_train.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""Trainer for OCR CTC model."""
import paddle.v2 as paddle
import paddle.fluid as fluid
import dummy_reader
import ctc_reader
Expand All @@ -24,12 +23,12 @@
add_arg('rnn_hidden_size',int, 200, "Hidden size of rnn layers.")
add_arg('device', int, 0, "Device id.'-1' means running on CPU"
"while '0' means GPU-0.")
add_arg('parallel', bool, True, "Whether use parallel training.")
# yapf: disable

def load_parameter(place):
params = load_param('./name.map', './data/model/results_without_avg_window/pass-00000/')
for name in params:
# print "param: %s" % name
t = fluid.global_scope().find_var(name).get_tensor()
t.set(params[name], place)

Expand All @@ -41,7 +40,8 @@ def train(args, data_reader=dummy_reader):
# define network
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int32', lod_level=1)
sum_cost, error_evaluator = ctc_train_net(images, label, args, num_classes)
sum_cost, error_evaluator, inference_program = ctc_train_net(images, label, args, num_classes)

# data reader
train_reader = data_reader.train(args.batch_size)
test_reader = data_reader.test()
Expand All @@ -51,11 +51,8 @@ def train(args, data_reader=dummy_reader):
place = fluid.CUDAPlace(args.device)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())

#load_parameter(place)

inference_program = fluid.io.get_inference_program(error_evaluator)

for pass_id in range(args.pass_num):
error_evaluator.reset(exe)
batch_id = 1
Expand All @@ -78,7 +75,6 @@ def train(args, data_reader=dummy_reader):
sys.stdout.flush()
batch_id += 1

# evaluate model on test data
error_evaluator.reset(exe)
for data in test_reader():
exe.run(inference_program, feed=get_feeder_data(data, place))
Expand Down