- Notifications
You must be signed in to change notification settings - Fork 5.4k
Some modifications to the SRE16 v2 recipe #1986
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b19c1a2 0db3716 6d0cb4c 2cebce7 aff609a File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -13,6 +13,7 @@ norm_vars=false | |
| center=true | ||
| compress=true | ||
| cmn_window=300 | ||
| write_utt2num_frames=false # if true writes utt2num_frames | ||
| | ||
| echo "$0 $@" # Print the command line for logging | ||
| | ||
| | @@ -41,7 +42,18 @@ done | |
| # Set various variables. | ||
| mkdir -p $dir/log | ||
| mkdir -p $data_out | ||
| featdir=${PWD}/$dir | ||
| featdir=`readlink -f $dir` | ||
| ||
| | ||
| if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then | ||
| utils/create_split_dir.pl \ | ||
| /export/b{14,15,16,17}/$USER/kaldi-data/egs/sre16/v2/xvector-$(date +'%m_%d_%H_%M')/xvector_feats/storage $featdir/storage | ||
| fi | ||
| | ||
| for n in $(seq $nj); do | ||
| # the next command does nothing unless $mfccdir/storage/ exists, see | ||
| ||
| # utils/create_data_link.pl for more info. | ||
| utils/create_data_link.pl $featdir/xvector_feats_${name}.${n}.ark | ||
| done | ||
| | ||
| cp $data_in/utt2spk $data_out/utt2spk | ||
| cp $data_in/spk2utt $data_out/spk2utt | ||
| | @@ -53,18 +65,31 @@ for n in $(seq $nj); do | |
| utils/create_data_link.pl $featdir/xvector_feats_${name}.$n.ark | ||
| done | ||
| ||
| | ||
| if $write_utt2num_frames; then | ||
| write_num_frames_opt="--write-num-frames=ark,t:$featdir/log/utt2num_frames.JOB" | ||
| else | ||
| write_num_frames_opt= | ||
| fi | ||
| | ||
| sdata_in=$data_in/split$nj; | ||
| utils/split_data.sh $data_in $nj || exit 1; | ||
| | ||
| $cmd JOB=1:$nj $dir/log/create_xvector_feats_${name}.JOB.log \ | ||
| apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=$cmn_window \ | ||
| scp:${sdata_in}/JOB/feats.scp ark:- \| \ | ||
| select-voiced-frames ark:- scp,s,cs:${sdata_in}/JOB/vad.scp ark:- \| \ | ||
| copy-feats --compress=$compress ark:- \ | ||
| copy-feats --compress=$compress $write_num_frames_opt ark:- \ | ||
| ark,scp:$featdir/xvector_feats_${name}.JOB.ark,$featdir/xvector_feats_${name}.JOB.scp || exit 1; | ||
| | ||
| for n in $(seq $nj); do | ||
| cat $featdir/xvector_feats_${name}.$n.scp || exit 1; | ||
| done > ${data_out}/feats.scp || exit 1 | ||
| | ||
| if $write_utt2num_frames; then | ||
| for n in $(seq $nj); do | ||
| cat $featdir/log/utt2num_frames.$n || exit 1; | ||
| done > $data_out/utt2num_frames || exit 1 | ||
| rm $featdir/log/utt2num_frames.* | ||
| fi | ||
| | ||
| echo "$0: Succeeded creating xvector features for $name" | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -113,7 +113,7 @@ if [ $stage -le 5 ]; then | |
| output-layer name=output include-log-softmax=true dim=${num_targets} | ||
| EOF | ||
| | ||
| steps/nnet3/xconfig_to_configs.py \ | ||
| python steps/nnet3/xconfig_to_configs.py \ | ||
| ||
| --xconfig-file $nnet_dir/configs/network.xconfig \ | ||
| --config-dir $nnet_dir/configs/ | ||
| cp $nnet_dir/configs/final.config $nnet_dir/nnet.config | ||
| | @@ -127,7 +127,7 @@ fi | |
| dropout_schedule='0,0@0.20,0.1@0.50,0' | ||
| srand=123 | ||
| if [ $stage -le 6 ]; then | ||
| steps/nnet3/train_raw_dnn.py --stage=$train_stage \ | ||
| python steps/nnet3/train_raw_dnn.py --stage=$train_stage \ | ||
| --cmd="$train_cmd" \ | ||
| --trainer.optimization.proportional-shrink 10 \ | ||
| --trainer.optimization.momentum=0.5 \ | ||
| Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this change correct? I think the shebang at the top of the python file should take care of it. Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It works but not necessary. Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you should revert this change. It seems customary to rely on what's in the shebang. Contributor Author There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. Contributor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For example, look at https://github.com/kaldi-asr/kaldi/blob/master/egs/swbd/s5c/local/chain/tuning/run_tdnn_7m.sh line 173 | ||
| | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -82,15 +82,19 @@ fi | |
| | ||
| if [ $stage -le 1 ]; then | ||
| # Make filterbanks and compute the energy-based VAD for each dataset | ||
| if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then | ||
| utils/create_split_dir.pl \ | ||
| /export/b{14,15,16,17}/$USER/kaldi-data/egs/sre16/v2/xvector-$(date +'%m_%d_%H_%M')/mfccs/storage $mfccdir/storage | ||
| fi | ||
| for name in sre swbd sre16_eval_enroll sre16_eval_test sre16_major; do | ||
| steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ | ||
| steps/make_mfcc.sh --write-utt2num-frames true --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \ | ||
| data/${name} exp/make_mfcc $mfccdir | ||
| utils/fix_data_dir.sh data/${name} | ||
| sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \ | ||
| data/${name} exp/make_vad $vaddir | ||
| utils/fix_data_dir.sh data/${name} | ||
| done | ||
| utils/combine_data.sh data/swbd_sre data/swbd data/sre | ||
| utils/combine_data.sh --extra-files "utt2num_frames" data/swbd_sre data/swbd data/sre | ||
| utils/fix_data_dir.sh data/swbd_sre | ||
| fi | ||
| | ||
| | @@ -99,7 +103,6 @@ fi | |
| # The combined list will be used to train the xvector DNN. The SRE | ||
| # subset will be used to train the PLDA model. | ||
| if [ $stage -le 2 ]; then | ||
| utils/data/get_utt2num_frames.sh --nj 40 --cmd "$train_cmd" data/swbd_sre | ||
| frame_shift=0.01 | ||
| awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/swbd_sre/utt2num_frames > data/swbd_sre/reco2dur | ||
| | ||
| | @@ -178,10 +181,9 @@ if [ $stage -le 3 ]; then | |
| # wasteful, as it roughly doubles the amount of training data on disk. After | ||
| # creating training examples, this can be removed. | ||
| local/nnet3/xvector/prepare_feats_for_egs.sh --nj 40 --cmd "$train_cmd" \ | ||
| --write_utt2num_frames true \ | ||
| ||
| data/swbd_sre_combined data/swbd_sre_combined_no_sil exp/swbd_sre_combined_no_sil | ||
| utils/fix_data_dir.sh data/swbd_sre_combined_no_sil | ||
| utils/data/get_utt2num_frames.sh --nj 40 --cmd "$train_cmd" data/swbd_sre_combined_no_sil | ||
| utils/fix_data_dir.sh data/swbd_sre_combined_no_sil | ||
| | ||
| # Now, we need to remove features that are too short after removing silence | ||
| # frames. We want atleast 5s (500 frames) per utterance. | ||
| | @@ -203,7 +205,7 @@ if [ $stage -le 3 ]; then | |
| utils/filter_scp.pl data/swbd_sre_combined_no_sil/utt2spk data/swbd_sre_combined_no_sil/utt2num_frames > data/swbd_sre_combined_no_sil/utt2num_frames.new | ||
| mv data/swbd_sre_combined_no_sil/utt2num_frames.new data/swbd_sre_combined_no_sil/utt2num_frames | ||
| | ||
| # Now we're reaady to create training examples. | ||
| # Now we're ready to create training examples. | ||
| utils/fix_data_dir.sh data/swbd_sre_combined_no_sil | ||
| fi | ||
| | ||
| | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there any circumstance under which you don't need this option? It seems to me it would be better to not have an option at all, and just write this in any case.