Skip to content

Commit cc2469e

Browse files
pegahghdanpovey
authored andcommitted
[src] modify feature-extraction binaries to support downsampling (kaldi-asr#1773)
1 parent 4a0106a commit cc2469e

15 files changed

+146
-84
lines changed

src/feat/feature-common-inl.h

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,29 +20,70 @@
2020
#ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
2121
#define KALDI_FEAT_FEATURE_COMMON_INL_H_
2222

23+
#include "feat/resample.h"
2324
// Do not include this file directly. It is included by feat/feature-common.h
2425

2526
namespace kaldi {
2627

28+
template <class F>
29+
void OfflineFeatureTpl<F>::ComputeFeatures(
30+
const VectorBase<BaseFloat> &wave,
31+
BaseFloat sample_freq,
32+
BaseFloat vtln_warp,
33+
Matrix<BaseFloat> *output) {
34+
KALDI_ASSERT(output != NULL);
35+
BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
36+
if (sample_freq == new_sample_freq)
37+
Compute(wave, vtln_warp, output);
38+
else {
39+
if (new_sample_freq < sample_freq) {
40+
if (! computer_.GetFrameOptions().allow_downsample)
41+
KALDI_ERR << "Waveform and config sample Frequency mismatch: "
42+
<< sample_freq << " .vs " << new_sample_freq
43+
<< " ( use --allow_downsample=true option to allow "
44+
<< " downsampling the waveform).";
45+
46+
// Downsample the waveform.
47+
Vector<BaseFloat> downsampled_wave(wave);
48+
DownsampleWaveForm(sample_freq, wave,
49+
new_sample_freq, &downsampled_wave);
50+
Compute(downsampled_wave, vtln_warp, output);
51+
} else
52+
KALDI_ERR << "The waveform is allowed to get downsampled."
53+
<< "New sample Frequency " << new_sample_freq
54+
<< " is larger than waveform original sampling frequency "
55+
<< sample_freq;
56+
57+
}
58+
}
59+
60+
template <class F>
61+
void OfflineFeatureTpl<F>::ComputeFeatures(
62+
const VectorBase<BaseFloat> &wave,
63+
BaseFloat sample_freq,
64+
BaseFloat vtln_warp,
65+
Matrix<BaseFloat> *output) const {
66+
OfflineFeatureTpl<F> temp(*this);
67+
// This const version of ComputeFeatures() is a wrapper that
68+
// calls the non-const ComputeFeatures() on a temporary object
69+
// that is a copy of *this. It is not as efficient because of the
70+
// overhead of copying *this.
71+
temp.ComputeFeatures(wave, vtln_warp, output);
72+
}
73+
2774
template <class F>
2875
void OfflineFeatureTpl<F>::Compute(
2976
const VectorBase<BaseFloat> &wave,
3077
BaseFloat vtln_warp,
31-
Matrix<BaseFloat> *output,
32-
Vector<BaseFloat> *deprecated_wave_remainder) {
78+
Matrix<BaseFloat> *output) {
3379
KALDI_ASSERT(output != NULL);
3480
int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
3581
cols_out = computer_.Dim();
3682
if (rows_out == 0) {
3783
output->Resize(0, 0);
38-
if (deprecated_wave_remainder != NULL)
39-
*deprecated_wave_remainder = wave;
4084
return;
4185
}
4286
output->Resize(rows_out, cols_out);
43-
if (deprecated_wave_remainder != NULL)
44-
ExtractWaveformRemainder(wave, computer_.GetFrameOptions(),
45-
deprecated_wave_remainder);
4687
Vector<BaseFloat> window; // windowed waveform.
4788
bool use_raw_log_energy = computer_.NeedRawLogEnergy();
4889
for (int32 r = 0; r < rows_out; r++) { // r is frame index.
@@ -60,13 +101,12 @@ template <class F>
60101
void OfflineFeatureTpl<F>::Compute(
61102
const VectorBase<BaseFloat> &wave,
62103
BaseFloat vtln_warp,
63-
Matrix<BaseFloat> *output,
64-
Vector<BaseFloat> *deprecated_wave_remainder) const {
104+
Matrix<BaseFloat> *output) const {
65105
OfflineFeatureTpl<F> temp(*this);
66106
// call the non-const version of Compute() on a temporary copy of this object.
67107
// This is a workaround for const-ness that may sometimes be useful in
68108
// multi-threaded code, although it's not optimally efficient.
69-
temp.Compute(wave, vtln_warp, output, deprecated_wave_remainder);
109+
temp.Compute(wave, vtln_warp, output);
70110
}
71111

72112
} // end namespace kaldi

src/feat/feature-common.h

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,25 +118,50 @@ class OfflineFeatureTpl {
118118
computer_(opts),
119119
feature_window_function_(computer_.GetFrameOptions()) { }
120120

121-
// Computes the features for one file (one sequence of features).
122-
// Use of the 'deprecatd_wave_remainder' argument is highly deprecated; it is
123-
// only provided for back-compatibility for code that may have
124-
// relied on the older interface. It's deprecated because it
125-
// doesn't support the --snip-edges=false option, and because
126-
// we plan to eventually remove this argument so that there
127-
// will be only one way to do online feature extraction.
121+
// Internal (and back-compatibility) interface for computing features, which
122+
// requires that the user has already checked that the sampling frequency
123+
// of the waveform is equal to the sampling frequency specified in
124+
// the frame-extraction options.
128125
void Compute(const VectorBase<BaseFloat> &wave,
129126
BaseFloat vtln_warp,
130-
Matrix<BaseFloat> *output,
131-
Vector<BaseFloat> *deprecated_wave_remainder = NULL);
127+
Matrix<BaseFloat> *output);
132128

133129
// This const version of Compute() is a wrapper that
134130
// calls the non-const version on a temporary object.
135131
// It's less efficient than the non-const version.
136132
void Compute(const VectorBase<BaseFloat> &wave,
137133
BaseFloat vtln_warp,
138-
Matrix<BaseFloat> *output,
139-
Vector<BaseFloat> *deprecated_wave_remainder = NULL) const;
134+
Matrix<BaseFloat> *output) const;
135+
136+
/**
137+
Computes the features for one file (one sequence of features).
138+
This is the newer interface where you specify the sample frequency
139+
of the input waveform.
140+
@param [in] wave The input waveform
141+
@param [in] sample_freq The sampling frequency with which
142+
'wave' was sampled.
143+
if sample_freq is higher than the frequency
144+
specified in the config, we will downsample
145+
the waveform, but if lower, it's an error.
146+
@param [in] vtln_warp The VTLN warping factor (will normally
147+
be 1.0)
148+
@param [out] output The matrix of features, where the row-index
149+
is the frame index.
150+
*/
151+
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
152+
BaseFloat sample_freq,
153+
BaseFloat vtln_warp,
154+
Matrix<BaseFloat> *output);
155+
/**
156+
This const version of ComputeFeatures() is a wrapper that
157+
calls the non-const ComputeFeatures() on a temporary object
158+
that is a copy of *this. It is not as efficient because of the
159+
overhead of copying *this.
160+
*/
161+
void ComputeFeatures(const VectorBase<BaseFloat> &wave,
162+
BaseFloat sample_freq,
163+
BaseFloat vtln_warp,
164+
Matrix<BaseFloat> *output) const;
140165

141166
int32 Dim() const { return computer_.Dim(); }
142167

src/feat/feature-fbank-test.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
101101
// use default parameters
102102

103103
// compute fbanks.
104-
fbank.Compute(v, 1.0, &m, NULL);
104+
fbank.Compute(v, 1.0, &m);
105105

106106
// possibly dump
107107
// std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
143143

144144
// calculate kaldi features
145145
Matrix<BaseFloat> kaldi_features;
146-
fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
146+
fbank.Compute(waveform, 1.0, &kaldi_features);
147147

148148

149149
std::cout << "<<<=== Compare with HTK features...\n";
@@ -224,7 +224,7 @@ static void UnitTestHTKCompare2() {
224224

225225
// calculate kaldi features
226226
Matrix<BaseFloat> kaldi_features;
227-
fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
227+
fbank.Compute(waveform, 1.0, &kaldi_features);
228228

229229

230230
std::cout << "<<<=== Compare with HTK features...\n";
@@ -308,7 +308,7 @@ static void UnitTestHTKCompare3() {
308308

309309
// calculate kaldi features
310310
Matrix<BaseFloat> kaldi_features;
311-
fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
311+
fbank.Compute(waveform, vtln_warp, &kaldi_features);
312312

313313

314314
std::cout << "<<<=== Compare with HTK features...\n";
@@ -394,7 +394,7 @@ static void UnitTestHTKCompare4() {
394394

395395
// calculate kaldi features
396396
Matrix<BaseFloat> kaldi_features;
397-
fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
397+
fbank.Compute(waveform, vtln_warp, &kaldi_features);
398398

399399

400400
std::cout << "<<<=== Compare with HTK features...\n";

src/feat/feature-mfcc-test.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
101101
// use default parameters
102102

103103
// compute mfccs.
104-
mfcc.Compute(v, 1.0, &m, NULL);
104+
mfcc.Compute(v, 1.0, &m);
105105

106106
// possibly dump
107107
// std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
143143

144144
// calculate kaldi features
145145
Matrix<BaseFloat> kaldi_raw_features;
146-
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
146+
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
147147

148148
DeltaFeaturesOptions delta_opts;
149149
Matrix<BaseFloat> kaldi_features;
@@ -227,7 +227,7 @@ static void UnitTestHTKCompare2() {
227227

228228
// calculate kaldi features
229229
Matrix<BaseFloat> kaldi_raw_features;
230-
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
230+
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
231231

232232
DeltaFeaturesOptions delta_opts;
233233
Matrix<BaseFloat> kaldi_features;
@@ -312,7 +312,7 @@ static void UnitTestHTKCompare3() {
312312

313313
// calculate kaldi features
314314
Matrix<BaseFloat> kaldi_raw_features;
315-
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
315+
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
316316

317317
DeltaFeaturesOptions delta_opts;
318318
Matrix<BaseFloat> kaldi_features;
@@ -395,7 +395,7 @@ static void UnitTestHTKCompare4() {
395395

396396
// calculate kaldi features
397397
Matrix<BaseFloat> kaldi_raw_features;
398-
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
398+
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
399399

400400
DeltaFeaturesOptions delta_opts;
401401
Matrix<BaseFloat> kaldi_features;
@@ -483,7 +483,7 @@ static void UnitTestHTKCompare5() {
483483

484484
// calculate kaldi features
485485
Matrix<BaseFloat> kaldi_raw_features;
486-
mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features, NULL);
486+
mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);
487487

488488
DeltaFeaturesOptions delta_opts;
489489
Matrix<BaseFloat> kaldi_features;
@@ -568,7 +568,7 @@ static void UnitTestHTKCompare6() {
568568

569569
// calculate kaldi features
570570
Matrix<BaseFloat> kaldi_raw_features;
571-
mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
571+
mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
572572

573573
DeltaFeaturesOptions delta_opts;
574574
Matrix<BaseFloat> kaldi_features;

src/feat/feature-plp-test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ static void UnitTestSimple() {
6060
// use default parameters
6161

6262
// compute mfccs.
63-
plp.Compute(v, 1.0, &m, NULL);
63+
plp.Compute(v, 1.0, &m);
6464

6565
// possibly dump
6666
// std::cout << "== Output features == \n" << m;
@@ -102,7 +102,7 @@ static void UnitTestHTKCompare1() {
102102

103103
// calculate kaldi features
104104
Matrix<BaseFloat> kaldi_raw_features;
105-
plp.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
105+
plp.Compute(waveform, 1.0, &kaldi_raw_features);
106106

107107
DeltaFeaturesOptions delta_opts;
108108
Matrix<BaseFloat> kaldi_features;

src/feat/feature-sdc-test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ int main() {
148148
op.use_energy = false;
149149
Mfcc mfcc(op);
150150
Matrix<BaseFloat> raw_features;
151-
mfcc.Compute(waveform, 1.0, &raw_features, NULL);
151+
mfcc.Compute(waveform, 1.0, &raw_features);
152152

153153
try {
154154
for (int32 window = 1; window < 4; window++) {

src/feat/feature-window.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct FrameExtractionOptions {
4343
bool round_to_power_of_two;
4444
BaseFloat blackman_coeff;
4545
bool snip_edges;
46+
bool allow_downsample;
4647
// May be "hamming", "rectangular", "povey", "hanning", "blackman"
4748
// "povey" is a window I made to be similar to Hamming but to go to zero at the
4849
// edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
@@ -57,7 +58,8 @@ struct FrameExtractionOptions {
5758
window_type("povey"),
5859
round_to_power_of_two(true),
5960
blackman_coeff(0.42),
60-
snip_edges(true){ }
61+
snip_edges(true),
62+
allow_downsample(false) { }
6163

6264
void Register(OptionsItf *opts) {
6365
opts->Register("sample-frequency", &samp_freq,
@@ -83,6 +85,9 @@ struct FrameExtractionOptions {
8385
"completely fit in the file, and the number of frames depends on the "
8486
"frame-length. If false, the number of frames depends only on the "
8587
"frame-shift, and we reflect the data at the ends.");
88+
opts->Register("allow-downsample", &allow_downsample,
89+
"If true, allow the input waveform to have a higher frequency than"
90+
"the specified --sample-frequency (and we'll downsample).");
8691
}
8792
int32 WindowShift() const {
8893
return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);

src/feat/online-feature-test.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ void TestOnlineMfcc() {
167167

168168
// compute mfcc offline
169169
Matrix<BaseFloat> mfcc_feats;
170-
mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
170+
mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
171171

172172
// compare
173173
// The test waveform is about 1.44s long, so
@@ -217,7 +217,7 @@ void TestOnlinePlp() {
217217

218218
// compute plp offline
219219
Matrix<BaseFloat> plp_feats;
220-
plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
220+
plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
221221

222222
// compare
223223
// The test waveform is about 1.44s long, so
@@ -309,7 +309,7 @@ void TestOnlineAppendFeature() {
309309

310310
// compute mfcc offline
311311
Matrix<BaseFloat> mfcc_feats;
312-
mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
312+
mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
313313

314314
// the parametrization object for 2nd stream plp feature
315315
PlpOptions plp_op;
@@ -326,7 +326,7 @@ void TestOnlineAppendFeature() {
326326

327327
// compute plp offline
328328
Matrix<BaseFloat> plp_feats;
329-
plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
329+
plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
330330

331331
// compare
332332
// The test waveform is about 1.44s long, so

src/feat/resample.cc

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ void LinearResample::Resample(const VectorBase<BaseFloat> &input,
155155
int32 input_dim = input.Dim();
156156
int64 tot_input_samp = input_sample_offset_ + input_dim,
157157
tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
158-
158+
159159
KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
160160

161161
output->Resize(tot_output_samp - output_sample_offset_);
@@ -365,5 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
365365
return filter * window;
366366
}
367367

368-
368+
void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
369+
BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
370+
KALDI_ASSERT(new_freq < orig_freq);
371+
BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
372+
int32 lowpass_filter_width = 6;
373+
LinearResample signal_downsampler(orig_freq, new_freq,
374+
lowpass_cutoff, lowpass_filter_width);
375+
signal_downsampler.Resample(wave, true, new_wave);
376+
}
369377
} // namespace kaldi

0 commit comments

Comments
 (0)