Skip to content

Commit 9b9b561

Browse files
authored
Kaldi 5.1.0 (#1342)
2 parents 68cee21 + 2145519 commit 9b9b561

File tree

360 files changed

+24835
-9056
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

360 files changed

+24835
-9056
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ GSYMS
8888
/tools/openfst-1.3.4/
8989
/tools/openfst-1.4.1.tar.gz
9090
/tools/openfst-1.4.1/
91+
/tools/openfst-1.5.4.tar.gz
92+
/tools/openfst-1.5.4/
93+
/tools/openfst-1.6.0.tar.gz
94+
/tools/openfst-1.6.0/
9195
/tools/pa_stable_v19_20111121.tgz
9296
/tools/portaudio/
9397
/tools/sctk-2.4.0-20091110-0958.tar.bz2

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ addons:
2020
branches:
2121
only:
2222
- master
23+
- shortcut
2324

2425
before_install:
2526
- cat /proc/sys/kernel/core_pattern

egs/fisher_swbd/s5/local/nnet3/run_tdnn_discriminative.sh

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ set -o pipefail
88
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
99
# since the lattice generation runs in about real-time, so takes of the order of
1010
# 1000 hours of CPU time.
11-
#
11+
#
1212
. ./cmd.sh
1313

1414

@@ -38,27 +38,21 @@ dir=${srcdir}_${criterion}
3838
## Egs options
3939
frames_per_eg=150
4040
frames_overlap_per_eg=30
41-
truncate_deriv_weights=10
4241

4342
## Nnet training options
4443
effective_learning_rate=0.00000125
4544
max_param_change=1
4645
num_jobs_nnet=4
4746
num_epochs=2
48-
regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options
47+
regularization_opts= # Applicable for providing --xent-regularize and --l2-regularize options
4948
minibatch_size=64
50-
adjust_priors=true # May need to be set to false
51-
# because it does not help in some setups
52-
modify_learning_rates=true
53-
last_layer_factor=0.1
54-
5549
## Decode options
5650
decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we decided to run more.
5751

5852
if $use_gpu; then
5953
if ! cuda-compiled; then
60-
cat <<EOF && exit 1
61-
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
54+
cat <<EOF && exit 1
55+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
6256
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
6357
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
6458
EOF
@@ -89,7 +83,7 @@ fi
8983
if [ -z "$lats_dir" ]; then
9084
lats_dir=${srcdir}_denlats
9185
if [ $stage -le 2 ]; then
92-
nj=100
86+
nj=100
9387
# this doesn't really affect anything strongly, except the num-jobs for one of
9488
# the phases of get_egs_discriminative.sh below.
9589
num_threads_denlats=6
@@ -102,8 +96,8 @@ if [ -z "$lats_dir" ]; then
10296
fi
10397
fi
10498

105-
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
106-
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`
99+
model_left_context=$(nnet3-am-info $srcdir/final.mdl | grep "^left-context:" | awk '{print $2}')
100+
model_right_context=$(nnet3-am-info $srcdir/final.mdl | grep "^right-context:" | awk '{print $2}')
107101

108102
left_context=$[model_left_context + extra_left_context]
109103
right_context=$[model_right_context + extra_right_context]
@@ -113,7 +107,7 @@ if [ -f $srcdir/frame_subsampling_factor ]; then
113107
frame_subsampling_opt="--frame-subsampling-factor $(cat $srcdir/frame_subsampling_factor)"
114108
fi
115109

116-
cmvn_opts=`cat $srcdir/cmvn_opts`
110+
cmvn_opts=`cat $srcdir/cmvn_opts`
117111

118112
if [ -z "$degs_dir" ]; then
119113
degs_dir=${srcdir}_degs
@@ -126,15 +120,12 @@ if [ -z "$degs_dir" ]; then
126120
# have a higher maximum num-jobs if
127121
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
128122

129-
degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true"
130-
131123
steps/nnet3/get_egs_discriminative.sh \
132124
--cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \
133-
--adjust-priors $adjust_priors \
134125
--online-ivector-dir $online_ivector_dir \
135126
--left-context $left_context --right-context $right_context \
136127
$frame_subsampling_opt \
137-
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \
128+
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \
138129
$train_data_dir data/lang ${srcdir}_ali $lats_dir $srcdir/final.mdl $degs_dir ;
139130
fi
140131
fi
@@ -147,9 +138,7 @@ if [ $stage -le 4 ]; then
147138
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \
148139
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
149140
--regularization-opts "$regularization_opts" \
150-
--truncate-deriv-weights $truncate_deriv_weights --adjust-priors $adjust_priors \
151-
--modify-learning-rates $modify_learning_rates --last-layer-factor $last_layer_factor \
152-
${degs_dir} $dir
141+
${degs_dir} $dir
153142
fi
154143

155144
graph_dir=exp/tri5a/graph_fsh_sw1_tg
@@ -158,8 +147,8 @@ if [ $stage -le 5 ]; then
158147
for decode_set in eval2000 rt03; do
159148
(
160149
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
161-
iter=epoch$x.adj
162-
150+
iter=epoch${x}_adj
151+
163152
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
164153
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
165154
$graph_dir data/${decode_set}_hires $dir/decode_${decode_set}_fsh_sw1_tg_$iter ;
@@ -181,4 +170,3 @@ fi
181170

182171

183172
exit 0;
184-

egs/fisher_swbd/s5/local/rt03_data_prep.sh

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
#!/bin/bash
22

3-
# RT-03 data preparation (conversational telephone speech part only)
3+
# RT-03 data preparation (conversational telephone speech part only)
44
# Adapted from Arnab Ghoshal's script for Hub-5 Eval 2000 by Peng Qi
55

66
# To be run from one directory above this script.
77

88
# Expects the standard directory layout for RT-03
99

1010
if [ $# -ne 1 ]; then
11-
echo "Usage: "`basename $0`" <rt03-dir>"
11+
echo "Usage: $0 <rt03-dir>"
12+
echo "e.g.: $0 /export/corpora/LDC/LDC2007S10"
1213
echo "See comments in the script for more details"
1314
exit 1
1415
fi
@@ -19,7 +20,7 @@ sdir=$1
1920
[ ! -d $sdir/data/references/eval03/english/cts ] \
2021
&& echo Expecting directory $tdir/data/references/eval03/english/cts to be present && exit 1;
2122

22-
. path.sh
23+
. path.sh
2324

2425
dir=data/local/rt03
2526
mkdir -p $dir
@@ -37,7 +38,7 @@ sph2pipe=$KALDI_ROOT/tools/sph2pipe_v2.5/sph2pipe
3738
&& echo "Could not execute the sph2pipe program at $sph2pipe" && exit 1;
3839

3940
awk -v sph2pipe=$sph2pipe '{
40-
printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2);
41+
printf("%s-A %s -f wav -p -c 1 %s |\n", $1, sph2pipe, $2);
4142
printf("%s-B %s -f wav -p -c 2 %s |\n", $1, sph2pipe, $2);
4243
}' < $dir/sph.scp | sort > $dir/wav.scp || exit 1;
4344
#side A - channel 1, side B - channel 2
@@ -47,7 +48,7 @@ awk -v sph2pipe=$sph2pipe '{
4748
# sw02001-A_000098-001156 sw02001-A 0.98 11.56
4849
#pem=$sdir/english/hub5e_00.pem
4950
#[ ! -f $pem ] && echo "No such file $pem" && exit 1;
50-
# pem file has lines like:
51+
# pem file has lines like:
5152
# en_4156 A unknown_speaker 301.85 302.48
5253

5354
#grep -v ';;' $pem \
@@ -59,7 +60,7 @@ cat $tdir/*.stm | grep -v ';;' | grep -v inter_segment_gap \
5960
| sort -u > $dir/segments
6061

6162
# stm file has lines like:
62-
# en_4156 A en_4156_A 357.64 359.64 <O,en,F,en-F> HE IS A POLICE OFFICER
63+
# en_4156 A en_4156_A 357.64 359.64 <O,en,F,en-F> HE IS A POLICE OFFICER
6364
# TODO(arnab): We should really be lowercasing this since the Edinburgh
6465
# recipe uses lowercase. This is not used in the actual scoring.
6566
#grep -v ';;' $tdir/reference/hub5e00.english.000405.stm \
@@ -77,7 +78,7 @@ cat $tdir/*.stm | \
7778
grep -v inter_segment_gap | \
7879
awk '{
7980
printf $1; if ($1==";;") printf(" %s",$2); else printf(($2==1)?" A":" B"); for(n=3;n<=NF;n++) printf(" %s", $n); print ""; }'\
80-
> $dir/stm
81+
> $dir/stm
8182
#$tdir/reference/hub5e00.english.000405.stm > $dir/stm
8283
cp $rtroot/data/trans_rules/en20030506.glm $dir/glm
8384

@@ -87,10 +88,10 @@ cp $rtroot/data/trans_rules/en20030506.glm $dir/glm
8788
echo "Segments from pem file and stm file do not match." && exit 1;
8889

8990
grep -v IGNORE_TIME_SEGMENT_ $dir/text.all > $dir/text
90-
91+
9192
# create an utt2spk file that assumes each conversation side is
9293
# a separate speaker.
93-
awk '{print $1,$2;}' $dir/segments > $dir/utt2spk
94+
awk '{print $1,$2;}' $dir/segments > $dir/utt2spk
9495
utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt
9596

9697
# cp $dir/segments $dir/segments.tmp
@@ -110,4 +111,3 @@ done
110111

111112
echo Data preparation and formatting completed for RT-03
112113
echo "(but not MFCC extraction)"
113-

egs/librispeech/s5/local/chain/run_tdnn_discriminative.sh

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ set -e
1010
# note: this relies on having a cluster that has plenty of CPUs as well as GPUs,
1111
# since the lattice generation runs in about real-time, so takes of the order of
1212
# 1000 hours of CPU time.
13-
#
13+
#
1414

1515

1616
stage=0
@@ -44,7 +44,6 @@ dir=${srcdir}_${criterion}
4444
## Egs options
4545
frames_per_eg=150
4646
frames_overlap_per_eg=30
47-
truncate_deriv_weights=10
4847

4948
## Nnet training options
5049
effective_learning_rate=0.000001
@@ -59,8 +58,8 @@ decode_start_epoch=1 # can be used to avoid decoding all epochs, e.g. if we deci
5958

6059
if $use_gpu; then
6160
if ! cuda-compiled; then
62-
cat <<EOF && exit 1
63-
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
61+
cat <<EOF && exit 1
62+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
6463
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
6564
where "nvcc" is installed. Otherwise, call this script with --use-gpu false
6665
EOF
@@ -95,15 +94,15 @@ if [ $frame_subsampling_factor -ne 1 ]; then
9594
rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true
9695

9796
data_dirs=
98-
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
97+
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
9998
steps/shift_feats.sh --cmd "$train_cmd --max-jobs-run 40" --nj 350 \
10099
$x $train_data_dir exp/shift_hires mfcc_hires
101100
utils/fix_data_dir.sh ${train_data_dir}_fs$x
102101
data_dirs="$data_dirs ${train_data_dir}_fs$x"
103102
awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
104103
done
105104
utils/combine_data.sh ${train_data_dir}_fs $data_dirs
106-
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
105+
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
107106
rm -r ${train_data_dir}_fs$x
108107
done
109108
fi
@@ -112,9 +111,9 @@ if [ $frame_subsampling_factor -ne 1 ]; then
112111

113112
affix=_fs
114113
fi
115-
114+
116115
rm ${train_ivector_dir}_fs/ivector_online.scp 2>/dev/null || true
117-
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
116+
for x in `seq -$[frame_subsampling_factor/2] $[frame_subsampling_factor/2]`; do
118117
awk -v nfs=$x '{print "fs"nfs"-"$0}' $train_ivector_dir/ivector_online.scp >> ${train_ivector_dir}_fs/ivector_online.scp
119118
done
120119
train_ivector_dir=${train_ivector_dir}_fs
@@ -133,7 +132,7 @@ fi
133132
if [ -z "$lats_dir" ]; then
134133
lats_dir=${srcdir}_denlats${affix}
135134
if [ $stage -le 2 ]; then
136-
nj=50
135+
nj=50
137136
# this doesn't really affect anything strongly, except the num-jobs for one of
138137
# the phases of get_egs_discriminative.sh below.
139138
num_threads_denlats=6
@@ -147,16 +146,13 @@ if [ -z "$lats_dir" ]; then
147146
fi
148147
fi
149148

150-
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
151-
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`
149+
model_left_context=`nnet3-am-info $srcdir/final.mdl | grep "left-context:" | awk '{print $2}'`
150+
model_right_context=`nnet3-am-info $srcdir/final.mdl | grep "right-context:" | awk '{print $2}'`
152151

153152
left_context=$[model_left_context + extra_left_context]
154153
right_context=$[model_right_context + extra_right_context]
155154

156-
valid_left_context=$[valid_left_context + frames_per_eg]
157-
valid_right_context=$[valid_right_context + frames_per_eg]
158-
159-
cmvn_opts=`cat $srcdir/cmvn_opts`
155+
cmvn_opts=`cat $srcdir/cmvn_opts`
160156

161157
if [ -z "$degs_dir" ]; then
162158
degs_dir=${srcdir}_degs${affix}
@@ -169,16 +165,13 @@ if [ -z "$degs_dir" ]; then
169165
# have a higher maximum num-jobs if
170166
if [ -d ${srcdir}_degs/storage ]; then max_jobs=10; else max_jobs=5; fi
171167

172-
degs_opts="--determinize true --minimize true --remove-output-symbols true --remove-epsilons true --collapse-transition-ids true"
173-
174168
steps/nnet3/get_egs_discriminative.sh \
175169
--cmd "$decode_cmd --max-jobs-run $max_jobs --mem 20G" --stage $get_egs_stage --cmvn-opts "$cmvn_opts" \
176170
--adjust-priors false --acwt 1.0 \
177171
--online-ivector-dir $train_ivector_dir \
178172
--left-context $left_context --right-context $right_context \
179-
--valid-left-context $valid_left_context --valid-right-context $valid_right_context \
180-
--priors-left-context $valid_left_context --priors-right-context $valid_right_context $frame_subsampling_opt \
181-
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg ${degs_opts} \
173+
$frame_subsampling_opt \
174+
--frames-per-eg $frames_per_eg --frames-overlap-per-eg $frames_overlap_per_eg \
182175
$train_data_dir $lang ${srcdir}_ali${affix} $lats_dir $srcdir/final.mdl $degs_dir ;
183176
fi
184177
fi
@@ -191,7 +184,7 @@ if [ $stage -le 4 ]; then
191184
--num-epochs $num_epochs --one-silence-class $one_silence_class --minibatch-size $minibatch_size \
192185
--num-jobs-nnet $num_jobs_nnet --num-threads $num_threads \
193186
--regularization-opts "$regularization_opts" --use-frame-shift false \
194-
--truncate-deriv-weights $truncate_deriv_weights --adjust-priors false \
187+
--adjust-priors false \
195188
${degs_dir} $dir ;
196189
fi
197190

@@ -202,7 +195,7 @@ if [ $stage -le 5 ]; then
202195
(
203196
num_jobs=`cat data/${decode_set}_hires/utt2spk|cut -d' ' -f2|sort -u|wc -l`
204197
iter=epoch$[x*frame_subsampling_factor]
205-
198+
206199
steps/nnet3/decode.sh --nj $num_jobs --cmd "$decode_cmd" --iter $iter \
207200
--acwt 1.0 --post-decode-acwt 10.0 \
208201
--online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${decode_set}_hires \
@@ -219,7 +212,7 @@ if [ $stage -le 5 ]; then
219212
done
220213
done
221214
wait
222-
[ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
215+
[ -f $dir/.error ] && echo "$0: there was a problem while decoding" && exit 1
223216
fi
224217

225218
if [ $stage -le 6 ] && $cleanup; then
@@ -231,4 +224,3 @@ fi
231224

232225

233226
exit 0;
234-

0 commit comments

Comments
 (0)