kaldi-asr
diff --git a/‎egs/reverb/s5/RESULTS‎
Lines changed: 298 additions & 149 deletions b/‎egs/reverb/s5/RESULTS‎
Lines changed: 298 additions & 149 deletions
diff --git a/‎egs/reverb/s5/conf/decode_dnn.config‎
Lines changed: 0 additions & 2 deletions b/‎egs/reverb/s5/conf/decode_dnn.config‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎egs/reverb/s5/conf/fbank.conf‎
Lines changed: 0 additions & 2 deletions b/‎egs/reverb/s5/conf/fbank.conf‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎egs/reverb/s5/conf/mfcc_hires.conf‎
Lines changed: 10 additions & 0 deletions b/‎egs/reverb/s5/conf/mfcc_hires.conf‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎egs/reverb/s5/conf/online_cmvn.conf‎
Lines changed: 1 addition & 0 deletions b/‎egs/reverb/s5/conf/online_cmvn.conf‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎egs/reverb/s5/conf/reverb_beamformit.cfg‎
Lines changed: 50 additions & 0 deletions b/‎egs/reverb/s5/conf/reverb_beamformit.cfg‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎egs/reverb/s5/local/Generate_mcTrainData_cut.m‎
Lines changed: 10 additions & 13 deletions b/‎egs/reverb/s5/local/Generate_mcTrainData_cut.m‎
Lines changed: 10 additions & 13 deletions
diff --git a/‎egs/reverb/s5/local/REVERB_create_mcdata.sh‎
Lines changed: 0 additions & 74 deletions b/‎egs/reverb/s5/local/REVERB_create_mcdata.sh‎
Lines changed: 0 additions & 74 deletions
diff --git a/‎egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh‎
Lines changed: 0 additions & 165 deletions b/‎egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh‎
Lines changed: 0 additions & 165 deletions
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40
+--high-freq=-400
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
@@ -1,13 +1,13 @@
 function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
 %
 % Input variables:
-% WSJ_dir_name: string name of user's clean wsjcam0 corpus directory 
-% (*Directory structure for wsjcam0 corpushas to be kept as it is after obtaining it from LDC. 
+% WSJ_dir_name: string name of WAV file directory converted from original wsjcam0 SPHERE files
+% (*Directory structure for wsjcam0 corpus to be kept as it is after obtaining it from LDC. 
 % Otherwise this script does not work.)
 %
 % This function generates multi-condition traiing data
 % based on the following items:
-% 1. wsjcam0 corpus (distributed from the LDC)
+% 1. wsjcam0 corpus (WAV files)
 % 2. room impulse responses (ones under ./RIR/)
 % 3. noise (ones under ./NOISE/).
 % Generated data has the same directory structure as original wsjcam0 corpus. 
@@ -26,8 +26,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
 
 display(['Name of directory for original wsjcam0: ',WSJ_dir_name])
 display(['Name of directory to save generated multi-condition training data: ',save_dir])
-unix(['chmod u+x sphere_to_wave.csh']);
-unix(['chmod u+x bin/*']);
 
 % Parameters related to acoustic conditions
 SNRdB=20;
@@ -89,7 +87,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
  save_dir_tr=[save_dir,'/data/mc_train/'];
 end
 mkdir([save_dir_tr]);
-%mkdir([save_dir,'/taskfiles/'])
 
 mic_idx=['A';'B';'C';'D';'E';'F';'G';'H'];
 prev_fname='dummy';
@@ -114,13 +111,12 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
  end
  prev_fname=fname(1:idx1(end));
 
- % load (sphere format) speech signal 
- x=read_sphere([WSJ_dir_name,'/data/', fname]);
- x=x/(2^15); % conversion from short-int to float
+ % load speech signal
+ x=audioread([WSJ_dir_name, '/data/', fname, '.wav'])';
 
  % load RIR and noise for "THIS" utterance
- eval(['RIR=wavread(RIR_sim',num2str(rcount),');']);
- eval(['NOISE=wavread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
+ eval(['RIR=audioread(RIR_sim',num2str(rcount),');']);
+ eval(['NOISE=audioread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
 
  % Generate 8ch noisy reverberant data 
  y=gen_obs(x,RIR,NOISE,SNRdB);
@@ -138,8 +134,9 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
  y=y/4; % common normalization to all the data to prevent clipping
  % denominator was decided experimentally
 
- for ch=1:8 
- eval(['wavwrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']);
+ for ch=1:8
+ outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav'];
+ eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']);
  end
 
  display(['sentence ',num2str(fcount),' (out of 7861) finished! (Multi-condition training data)'])
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh`