|
24 | 24 | ### Parse options |
25 | 25 | ### |
26 | 26 | from optparse import OptionParser |
27 | | -usage="%prog [options] <feat-dim> <num-leaves> >nnet-proto-file" |
| 27 | + |
| 28 | +usage = "%prog [options] <feat-dim> <num-leaves> >nnet-proto-file" |
28 | 29 | parser = OptionParser(usage) |
29 | 30 | # |
30 | | -parser.add_option('--num-cells', dest='num_cells', type='int', default=800, |
31 | | - help='Number of LSTM cells [default: %default]'); |
32 | | -parser.add_option('--num-recurrent', dest='num_recurrent', type='int', default=512, |
33 | | - help='Number of LSTM recurrent units [default: %default]'); |
34 | | -parser.add_option('--num-layers', dest='num_layers', type='int', default=2, |
35 | | - help='Number of LSTM layers [default: %default]'); |
36 | | -parser.add_option('--lstm-stddev-factor', dest='lstm_stddev_factor', type='float', default=0.01, |
37 | | - help='Standard deviation of initialization [default: %default]'); |
38 | | -parser.add_option('--param-stddev-factor', dest='param_stddev_factor', type='float', default=0.04, |
39 | | - help='Standard deviation in output layer [default: %default]'); |
40 | | -parser.add_option('--clip-gradient', dest='clip_gradient', type='float', default=5.0, |
41 | | - help='Clipping constant applied to gradients [default: %default]'); |
| 31 | +parser.add_option( |
| 32 | + "--num-cells", dest="num_cells", type="int", default=800, help="Number of LSTM cells [default: %default]" |
| 33 | +) |
| 34 | +parser.add_option( |
| 35 | + "--num-recurrent", |
| 36 | + dest="num_recurrent", |
| 37 | + type="int", |
| 38 | + default=512, |
| 39 | + help="Number of LSTM recurrent units [default: %default]", |
| 40 | +) |
| 41 | +parser.add_option( |
| 42 | + "--num-layers", dest="num_layers", type="int", default=2, help="Number of LSTM layers [default: %default]" |
| 43 | +) |
| 44 | +parser.add_option( |
| 45 | + "--lstm-stddev-factor", |
| 46 | + dest="lstm_stddev_factor", |
| 47 | + type="float", |
| 48 | + default=0.01, |
| 49 | + help="Standard deviation of initialization [default: %default]", |
| 50 | +) |
| 51 | +parser.add_option( |
| 52 | + "--param-stddev-factor", |
| 53 | + dest="param_stddev_factor", |
| 54 | + type="float", |
| 55 | + default=0.04, |
| 56 | + help="Standard deviation in output layer [default: %default]", |
| 57 | +) |
| 58 | +parser.add_option( |
| 59 | + "--clip-gradient", |
| 60 | + dest="clip_gradient", |
| 61 | + type="float", |
| 62 | + default=5.0, |
| 63 | + help="Clipping constant applied to gradients [default: %default]", |
| 64 | +) |
42 | 65 | # |
43 | | -(o,args) = parser.parse_args() |
44 | | -if len(args) != 2 : |
45 | | - parser.print_help() |
46 | | - sys.exit(1) |
| 66 | +(o, args) = parser.parse_args() |
| 67 | +if len(args) != 2: |
| 68 | + parser.print_help() |
| 69 | + sys.exit(1) |
47 | 70 |
|
48 | | -(feat_dim, num_leaves) = list(map(int,args)); |
| 71 | +(feat_dim, num_leaves) = list(map(int, args)) |
49 | 72 |
|
50 | 73 | # Original prototype from Jiayu, |
51 | | -#<NnetProto> |
52 | | -#<Transmit> <InputDim> 40 <OutputDim> 40 |
53 | | -#<LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4 |
54 | | -#<AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04 |
55 | | -#<Softmax> <InputDim> 8000 <OutputDim> 8000 |
56 | | -#</NnetProto> |
| 74 | +# <NnetProto> |
| 75 | +# <Transmit> <InputDim> 40 <OutputDim> 40 |
| 76 | +# <LstmProjectedStreams> <InputDim> 40 <OutputDim> 512 <CellDim> 800 <ParamScale> 0.01 <NumStream> 4 |
| 77 | +# <AffineTransform> <InputDim> 512 <OutputDim> 8000 <BiasMean> 0.000000 <BiasRange> 0.000000 <ParamStddev> 0.04 |
| 78 | +# <Softmax> <InputDim> 8000 <OutputDim> 8000 |
| 79 | +# </NnetProto> |
57 | 80 |
|
58 | 81 | print("<NnetProto>") |
59 | 82 | # normally we won't use more than 2 layers of LSTM |
60 | 83 | if o.num_layers == 1: |
61 | | - print("<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \ |
62 | | - (feat_dim, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)) |
| 84 | + print( |
| 85 | + "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" |
| 86 | + % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) |
| 87 | + ) |
63 | 88 | elif o.num_layers == 2: |
64 | | - print("<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \ |
65 | | - (feat_dim, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)) |
66 | | - print("<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" % \ |
67 | | - (2*o.num_recurrent, 2*o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient)) |
| 89 | + print( |
| 90 | + "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" |
| 91 | + % (feat_dim, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) |
| 92 | + ) |
| 93 | + print( |
| 94 | + "<BLstmProjectedStreams> <InputDim> %d <OutputDim> %d <CellDim> %s <ParamScale> %f <ClipGradient> %f" |
| 95 | + % (2 * o.num_recurrent, 2 * o.num_recurrent, o.num_cells, o.lstm_stddev_factor, o.clip_gradient) |
| 96 | + ) |
68 | 97 | else: |
69 | 98 | sys.stderr.write("make_lstm_proto.py ERROR: more than 2 layers of LSTM, not supported yet.\n") |
70 | 99 | sys.exit(1) |
71 | | -print("<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" % \ |
72 | | - (2*o.num_recurrent, num_leaves, o.param_stddev_factor)) |
73 | | -print("<Softmax> <InputDim> %d <OutputDim> %d" % \ |
74 | | - (num_leaves, num_leaves)) |
| 100 | +print( |
| 101 | + "<AffineTransform> <InputDim> %d <OutputDim> %d <BiasMean> 0.0 <BiasRange> 0.0 <ParamStddev> %f" |
| 102 | + % (2 * o.num_recurrent, num_leaves, o.param_stddev_factor) |
| 103 | +) |
| 104 | +print("<Softmax> <InputDim> %d <OutputDim> %d" % (num_leaves, num_leaves)) |
75 | 105 | print("</NnetProto>") |
76 | | - |
77 | | - |
|
0 commit comments