|
16 | 16 |
|
17 | 17 | parser = argparse.ArgumentParser(description=__doc__) |
18 | 18 | parser.add_argument( |
19 | | - "--num_samples", |
| 19 | + "--batch_size", |
20 | 20 | default=100, |
21 | 21 | type=int, |
22 | | - help="Number of samples for parameters tuning. (default: %(default)s)") |
| 22 | + help="Minibatch size for evaluation. (default: %(default)s)") |
23 | 23 | parser.add_argument( |
24 | 24 | "--num_conv_layers", |
25 | 25 | default=2, |
|
57 | 57 | help="Manifest path for normalizer. (default: %(default)s)") |
58 | 58 | parser.add_argument( |
59 | 59 | "--decode_manifest_path", |
60 | | - default='datasets/manifest.test', |
| 60 | + default='datasets/manifest.dev', |
61 | 61 | type=str, |
62 | 62 | help="Manifest path for decoding. (default: %(default)s)") |
63 | 63 | parser.add_argument( |
|
82 | 82 | help="Path for language model. (default: %(default)s)") |
83 | 83 | parser.add_argument( |
84 | 84 | "--alpha_from", |
85 | | - default=0.1, |
| 85 | + default=0.22, |
86 | 86 | type=float, |
87 | 87 | help="Where alpha starts from. (default: %(default)f)") |
88 | 88 | parser.add_argument( |
89 | 89 | "--num_alphas", |
90 | | - default=14, |
| 90 | + default=10, |
91 | 91 | type=int, |
92 | 92 | help="Number of candidate alphas. (default: %(default)d)") |
93 | 93 | parser.add_argument( |
94 | 94 | "--alpha_to", |
95 | | - default=0.36, |
| 95 | + default=0.40, |
96 | 96 | type=float, |
97 | 97 | help="Where alpha ends with. (default: %(default)f)") |
98 | 98 | parser.add_argument( |
|
102 | 102 | help="Where beta starts from. (default: %(default)f)") |
103 | 103 | parser.add_argument( |
104 | 104 | "--num_betas", |
105 | | - default=20, |
| 105 | + default=7, |
106 | 106 | type=float, |
107 | 107 | help="Number of candidate betas. (default: %(default)d)") |
108 | 108 | parser.add_argument( |
109 | 109 | "--beta_to", |
110 | | - default=1.0, |
| 110 | + default=0.35, |
111 | 111 | type=float, |
112 | 112 | help="Where beta ends with. (default: %(default)f)") |
113 | 113 | parser.add_argument( |
@@ -160,55 +160,77 @@ def tune(): |
160 | 160 | # prepare infer data |
161 | 161 | batch_reader = data_generator.batch_reader_creator( |
162 | 162 | manifest_path=args.decode_manifest_path, |
163 | | - batch_size=args.num_samples, |
| 163 | + batch_size=args.batch_size, |
| 164 | + min_batch_size=1, |
164 | 165 | sortagrad=False, |
165 | 166 | shuffle_method=None) |
166 | | - # get one batch data for tuning |
167 | | - infer_data = batch_reader().next() |
168 | | - |
169 | | - # run inference |
170 | | - infer_results = paddle.infer( |
171 | | - output_layer=output_probs, parameters=parameters, input=infer_data) |
172 | | - num_steps = len(infer_results) // len(infer_data) |
173 | | - probs_split = [ |
174 | | - infer_results[i * num_steps:(i + 1) * num_steps] |
175 | | - for i in xrange(0, len(infer_data)) |
176 | | - ] |
| 167 | + |
| 168 | + # define inferer |
| 169 | + inferer = paddle.inference.Inference( |
| 170 | + output_layer=output_probs, parameters=parameters) |
177 | 171 |
|
178 | 172 | # create grid for search |
179 | 173 | cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) |
180 | 174 | cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) |
181 | 175 | params_grid = [(alpha, beta) for alpha in cand_alphas |
182 | 176 | for beta in cand_betas] |
183 | 177 |
|
| 178 | + # external scorer |
184 | 179 | ext_scorer = LmScorer(args.alpha_from, args.beta_from, |
185 | 180 | args.language_model_path) |
186 | | - ## tune parameters in loop |
187 | | - for alpha, beta in params_grid: |
188 | | - wer_sum, wer_counter = 0, 0 |
189 | | - # reset scorer |
190 | | - ext_scorer.reset_params(alpha, beta) |
191 | | - # beam search using multiple processes |
192 | | - beam_search_results = ctc_beam_search_decoder_batch( |
193 | | - probs_split=probs_split, |
194 | | - vocabulary=data_generator.vocab_list, |
195 | | - beam_size=args.beam_size, |
196 | | - cutoff_prob=args.cutoff_prob, |
197 | | - blank_id=len(data_generator.vocab_list), |
198 | | - num_processes=args.num_processes_beam_search, |
199 | | - ext_scoring_func=ext_scorer, ) |
200 | | - for i, beam_search_result in enumerate(beam_search_results): |
201 | | - target_transcription = ''.join([ |
202 | | - data_generator.vocab_list[index] for index in infer_data[i][1] |
203 | | - ]) |
204 | | - wer_sum += wer(target_transcription, beam_search_result[0][1]) |
205 | | - wer_counter += 1 |
206 | 181 |
|
207 | | - print("alpha = %f\tbeta = %f\tWER = %f" % |
208 | | - (alpha, beta, wer_sum / wer_counter)) |
| 182 | + wer_sum = [0.0 for i in xrange(len(params_grid))] |
| 183 | + wer_counter = [0 for i in xrange(len(params_grid))] |
| 184 | + ave_wer = [0.0 for i in xrange(len(params_grid))] |
| 185 | + num_batches = 0 |
| 186 | + |
| 187 | + ## incremental tuning batch by batch |
| 188 | + for infer_data in batch_reader(): |
| 189 | + # run inference |
| 190 | + infer_results = inferer.infer(input=infer_data) |
| 191 | + num_steps = len(infer_results) // len(infer_data) |
| 192 | + probs_split = [ |
| 193 | + infer_results[i * num_steps:(i + 1) * num_steps] |
| 194 | + for i in xrange(0, len(infer_data)) |
| 195 | + ] |
| 196 | + # target transcription |
| 197 | + target_transcription = [ |
| 198 | + ''.join([ |
| 199 | + data_generator.vocab_list[index] for index in infer_data[i][1] |
| 200 | + ]) for i, probs in enumerate(probs_split) |
| 201 | + ] |
| 202 | + |
| 203 | + # grid search on current batch |
| 204 | + for index, (alpha, beta) in enumerate(params_grid): |
| 205 | + # reset scorer |
| 206 | + ext_scorer.reset_params(alpha, beta) |
| 207 | + beam_search_results = ctc_beam_search_decoder_batch( |
| 208 | + probs_split=probs_split, |
| 209 | + vocabulary=data_generator.vocab_list, |
| 210 | + beam_size=args.beam_size, |
| 211 | + blank_id=len(data_generator.vocab_list), |
| 212 | + num_processes=args.num_processes_beam_search, |
| 213 | + ext_scoring_func=ext_scorer, |
| 214 | + cutoff_prob=args.cutoff_prob, ) |
| 215 | + for i, beam_search_result in enumerate(beam_search_results): |
| 216 | + wer_sum[index] += wer(target_transcription[i], |
| 217 | + beam_search_result[0][1]) |
| 218 | + wer_counter[index] += 1 |
| 219 | + ave_wer[index] = wer_sum[index] / wer_counter[index] |
| 220 | + print("alpha = %f, beta = %f, WER = %f" % |
| 221 | + (alpha, beta, ave_wer[index])) |
| 222 | + |
| 223 | + # output tuning result til current batch |
| 224 | + ave_wer_min = min(ave_wer) |
| 225 | + min_index = ave_wer.index(ave_wer_min) |
| 226 | + print("Finish batch %d, alpha_opt = %f, beta_opt = %f, WER_opt = %f\n" % |
| 227 | + (num_batches, params_grid[min_index][0], |
| 228 | + params_grid[min_index][1], ave_wer_min)) |
| 229 | + num_batches += 1 |
209 | 230 |
|
210 | 231 |
|
211 | 232 | def main(): |
| 233 | + utils.print_arguments(args) |
212 | 234 | paddle.init(use_gpu=args.use_gpu, trainer_count=1) |
213 | 235 | tune() |
214 | 236 |
|
|
0 commit comments