Skip to content

Commit 64d5cf2

Browse files
Szu-JuiChendanpovey
authored andcommitted
[egs] Update reverb recipe (#2753)
1 parent 8755661 commit 64d5cf2

34 files changed

+2532
-844
lines changed

egs/reverb/s5/RESULTS

Lines changed: 298 additions & 149 deletions
Large diffs are not rendered by default.

egs/reverb/s5/conf/decode_dnn.config

Lines changed: 0 additions & 2 deletions
This file was deleted.

egs/reverb/s5/conf/fbank.conf

Lines changed: 0 additions & 2 deletions
This file was deleted.

egs/reverb/s5/conf/mfcc_hires.conf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# config for high-resolution MFCC features, intended for neural network training.
2+
# Note: we keep all cepstra, so it has the same info as filterbank features,
3+
# but MFCC is more easily compressible (because less correlated) which is why
4+
# we prefer this method.
5+
--use-energy=false # use average of log energy, not energy.
6+
--sample-frequency=16000
7+
--num-mel-bins=40
8+
--num-ceps=40
9+
--low-freq=40
10+
--high-freq=-400
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
2+
3+
# scrolling size to compute the delays
4+
scroll_size = 250
5+
6+
# cross correlation computation window size
7+
window_size = 500
8+
9+
#amount of maximum points for the xcorrelation taken into account
10+
nbest_amount = 4
11+
12+
#flag wether to apply an automatic noise thresholding
13+
do_noise_threshold = 1
14+
15+
#Percentage of frames with lower xcorr taken as noisy
16+
noise_percent = 10
17+
18+
######## acoustic modelling parameters
19+
20+
#transition probabilities weight for multichannel decoding
21+
trans_weight_multi = 25
22+
trans_weight_nbest = 25
23+
24+
###
25+
26+
#flag wether to print the feaures after setting them, or not
27+
print_features = 1
28+
29+
#flag wether to use the bad frames in the sum process
30+
do_avoid_bad_frames = 1
31+
32+
#flag to use the best channel (SNR) as a reference
33+
#defined from command line
34+
do_compute_reference = 1
35+
36+
#flag wether to use a uem file or not(process all the file)
37+
do_use_uem_file = 0
38+
39+
#flag wether to use an adaptative weights scheme or fixed weights
40+
do_adapt_weights = 1
41+
42+
#flag wether to output the sph files or just run the system to create the auxiliary files
43+
do_write_sph_files = 1
44+
45+
####directories where to store/retrieve info####
46+
#channels_file = ./cfg-files/channels
47+
48+
#show needs to be passed as argument normally, here a default one is given just in case
49+
#show_id = Ttmp
50+

egs/reverb/s5/local/Generate_mcTrainData_cut.m

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
22
%
33
% Input variables:
4-
% WSJ_dir_name: string name of user's clean wsjcam0 corpus directory
5-
% (*Directory structure for wsjcam0 corpushas to be kept as it is after obtaining it from LDC.
4+
% WSJ_dir_name: string name of WAV file directory converted from original wsjcam0 SPHERE files
5+
% (*Directory structure for wsjcam0 corpus to be kept as it is after obtaining it from LDC.
66
% Otherwise this script does not work.)
77
%
88
% This function generates multi-condition traiing data
99
% based on the following items:
10-
% 1. wsjcam0 corpus (distributed from the LDC)
10+
% 1. wsjcam0 corpus (WAV files)
1111
% 2. room impulse responses (ones under ./RIR/)
1212
% 3. noise (ones under ./NOISE/).
1313
% Generated data has the same directory structure as original wsjcam0 corpus.
@@ -26,8 +26,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
2626

2727
display(['Name of directory for original wsjcam0: ',WSJ_dir_name])
2828
display(['Name of directory to save generated multi-condition training data: ',save_dir])
29-
unix(['chmod u+x sphere_to_wave.csh']);
30-
unix(['chmod u+x bin/*']);
3129

3230
% Parameters related to acoustic conditions
3331
SNRdB=20;
@@ -89,7 +87,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
8987
save_dir_tr=[save_dir,'/data/mc_train/'];
9088
end
9189
mkdir([save_dir_tr]);
92-
%mkdir([save_dir,'/taskfiles/'])
9390

9491
mic_idx=['A';'B';'C';'D';'E';'F';'G';'H'];
9592
prev_fname='dummy';
@@ -114,13 +111,12 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
114111
end
115112
prev_fname=fname(1:idx1(end));
116113

117-
% load (sphere format) speech signal
118-
x=read_sphere([WSJ_dir_name,'/data/', fname]);
119-
x=x/(2^15); % conversion from short-int to float
114+
% load speech signal
115+
x=audioread([WSJ_dir_name, '/data/', fname, '.wav'])';
120116

121117
% load RIR and noise for "THIS" utterance
122-
eval(['RIR=wavread(RIR_sim',num2str(rcount),');']);
123-
eval(['NOISE=wavread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
118+
eval(['RIR=audioread(RIR_sim',num2str(rcount),');']);
119+
eval(['NOISE=audioread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
124120

125121
% Generate 8ch noisy reverberant data
126122
y=gen_obs(x,RIR,NOISE,SNRdB);
@@ -138,8 +134,9 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
138134
y=y/4; % common normalization to all the data to prevent clipping
139135
% denominator was decided experimentally
140136

141-
for ch=1:8
142-
eval(['wavwrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']);
137+
for ch=1:8
138+
outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav'];
139+
eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']);
143140
end
144141

145142
display(['sentence ',num2str(fcount),' (out of 7861) finished! (Multi-condition training data)'])

egs/reverb/s5/local/REVERB_create_mcdata.sh

Lines changed: 0 additions & 74 deletions
This file was deleted.

egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh

Lines changed: 0 additions & 165 deletions
This file was deleted.

0 commit comments

Comments
 (0)