chenzhehuai
diff --git a/‎src/feat/feature-common-inl.h‎
Lines changed: 50 additions & 10 deletions b/‎src/feat/feature-common-inl.h‎
Lines changed: 50 additions & 10 deletions
diff --git a/‎src/feat/feature-common.h‎
Lines changed: 36 additions & 11 deletions b/‎src/feat/feature-common.h‎
Lines changed: 36 additions & 11 deletions
diff --git a/‎src/feat/feature-fbank-test.cc‎
Lines changed: 5 additions & 5 deletions b/‎src/feat/feature-fbank-test.cc‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/feat/feature-mfcc-test.cc‎
Lines changed: 7 additions & 7 deletions b/‎src/feat/feature-mfcc-test.cc‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/feat/feature-plp-test.cc‎
Lines changed: 2 additions & 2 deletions b/‎src/feat/feature-plp-test.cc‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/feat/feature-sdc-test.cc‎
Lines changed: 1 addition & 1 deletion b/‎src/feat/feature-sdc-test.cc‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/feat/feature-window.h‎
Lines changed: 6 additions & 1 deletion b/‎src/feat/feature-window.h‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎src/feat/online-feature-test.cc‎
Lines changed: 4 additions & 4 deletions b/‎src/feat/online-feature-test.cc‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/feat/resample.cc‎
Lines changed: 10 additions & 2 deletions b/‎src/feat/resample.cc‎
Lines changed: 10 additions & 2 deletions
@@ -20,29 +20,70 @@
 #ifndef KALDI_FEAT_FEATURE_COMMON_INL_H_
 #define KALDI_FEAT_FEATURE_COMMON_INL_H_
 
+#include "feat/resample.h"
 // Do not include this file directly. It is included by feat/feature-common.h
 
 namespace kaldi {
 
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+ const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) {
+ KALDI_ASSERT(output != NULL);
+ BaseFloat new_sample_freq = computer_.GetFrameOptions().samp_freq;
+ if (sample_freq == new_sample_freq)
+ Compute(wave, vtln_warp, output);
+ else {
+ if (new_sample_freq < sample_freq) {
+ if (! computer_.GetFrameOptions().allow_downsample)
+ KALDI_ERR << "Waveform and config sample Frequency mismatch: "
+ << sample_freq << " .vs " << new_sample_freq
+ << " ( use --allow_downsample=true option to allow "
+ << " downsampling the waveform).";
+
+ // Downsample the waveform.
+ Vector<BaseFloat> downsampled_wave(wave);
+ DownsampleWaveForm(sample_freq, wave,
+ new_sample_freq, &downsampled_wave);
+ Compute(downsampled_wave, vtln_warp, output);
+ } else
+ KALDI_ERR << "The waveform is allowed to get downsampled."
+ << "New sample Frequency " << new_sample_freq
+ << " is larger than waveform original sampling frequency "
+ << sample_freq;
+
+ }
+}
+
+template <class F>
+void OfflineFeatureTpl<F>::ComputeFeatures(
+ const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) const {
+ OfflineFeatureTpl<F> temp(*this);
+ // This const version of ComputeFeatures() is a wrapper that
+ // calls the non-const ComputeFeatures() on a temporary object
+ // that is a copy of *this. It is not as efficient because of the
+ // overhead of copying *this.
+ temp.ComputeFeatures(wave, vtln_warp, output);
+}
+
 template <class F>
 void OfflineFeatureTpl<F>::Compute(
  const VectorBase<BaseFloat> &wave,
  BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder) {
+ Matrix<BaseFloat> *output) {
  KALDI_ASSERT(output != NULL);
  int32 rows_out = NumFrames(wave.Dim(), computer_.GetFrameOptions()),
  cols_out = computer_.Dim();
  if (rows_out == 0) {
  output->Resize(0, 0);
- if (deprecated_wave_remainder != NULL)
- *deprecated_wave_remainder = wave;
  return;
  }
  output->Resize(rows_out, cols_out);
- if (deprecated_wave_remainder != NULL)
- ExtractWaveformRemainder(wave, computer_.GetFrameOptions(),
- deprecated_wave_remainder);
  Vector<BaseFloat> window; // windowed waveform.
  bool use_raw_log_energy = computer_.NeedRawLogEnergy();
  for (int32 r = 0; r < rows_out; r++) { // r is frame index.
@@ -60,13 +101,12 @@ template <class F>
 void OfflineFeatureTpl<F>::Compute(
  const VectorBase<BaseFloat> &wave,
  BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder) const {
+ Matrix<BaseFloat> *output) const {
  OfflineFeatureTpl<F> temp(*this);
  // call the non-const version of Compute() on a temporary copy of this object.
  // This is a workaround for const-ness that may sometimes be useful in
  // multi-threaded code, although it's not optimally efficient.
- temp.Compute(wave, vtln_warp, output, deprecated_wave_remainder);
+ temp.Compute(wave, vtln_warp, output);
 }
 
 } // end namespace kaldi
 
@@ -118,25 +118,50 @@ class OfflineFeatureTpl {
  computer_(opts),
  feature_window_function_(computer_.GetFrameOptions()) { }
 
- // Computes the features for one file (one sequence of features).
- // Use of the 'deprecatd_wave_remainder' argument is highly deprecated; it is
- // only provided for back-compatibility for code that may have
- // relied on the older interface. It's deprecated because it
- // doesn't support the --snip-edges=false option, and because
- // we plan to eventually remove this argument so that there
- // will be only one way to do online feature extraction.
+ // Internal (and back-compatibility) interface for computing features, which
+ // requires that the user has already checked that the sampling frequency
+ // of the waveform is equal to the sampling frequency specified in
+ // the frame-extraction options.
  void Compute(const VectorBase<BaseFloat> &wave,
  BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder = NULL);
+ Matrix<BaseFloat> *output);
 
  // This const version of Compute() is a wrapper that
  // calls the non-const version on a temporary object.
  // It's less efficient than the non-const version.
  void Compute(const VectorBase<BaseFloat> &wave,
  BaseFloat vtln_warp,
- Matrix<BaseFloat> *output,
- Vector<BaseFloat> *deprecated_wave_remainder = NULL) const;
+ Matrix<BaseFloat> *output) const;
+
+ /**
+ Computes the features for one file (one sequence of features).
+ This is the newer interface where you specify the sample frequency
+ of the input waveform.
+ @param [in] wave The input waveform
+ @param [in] sample_freq The sampling frequency with which
+ 'wave' was sampled.
+ if sample_freq is higher than the frequency
+ specified in the config, we will downsample
+ the waveform, but if lower, it's an error.
+ @param [in] vtln_warp The VTLN warping factor (will normally
+ be 1.0)
+ @param [out] output The matrix of features, where the row-index
+ is the frame index.
+ */
+ void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output);
+ /**
+ This const version of ComputeFeatures() is a wrapper that
+ calls the non-const ComputeFeatures() on a temporary object
+ that is a copy of *this. It is not as efficient because of the
+ overhead of copying *this.
+ */
+ void ComputeFeatures(const VectorBase<BaseFloat> &wave,
+ BaseFloat sample_freq,
+ BaseFloat vtln_warp,
+ Matrix<BaseFloat> *output) const;
 
  int32 Dim() const { return computer_.Dim(); }
 
 
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
  // use default parameters
 
  // compute fbanks.
- fbank.Compute(v, 1.0, &m, NULL);
+ fbank.Compute(v, 1.0, &m);
 
  // possibly dump
  // std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+ fbank.Compute(waveform, 1.0, &kaldi_features);
 
 
  std::cout << "<<<=== Compare with HTK features...\n";
@@ -224,7 +224,7 @@ static void UnitTestHTKCompare2() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, 1.0, &kaldi_features, NULL);
+ fbank.Compute(waveform, 1.0, &kaldi_features);
 
 
  std::cout << "<<<=== Compare with HTK features...\n";
@@ -308,7 +308,7 @@ static void UnitTestHTKCompare3() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+ fbank.Compute(waveform, vtln_warp, &kaldi_features);
 
 
  std::cout << "<<<=== Compare with HTK features...\n";
@@ -394,7 +394,7 @@ static void UnitTestHTKCompare4() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_features;
- fbank.Compute(waveform, vtln_warp, &kaldi_features, NULL);
+ fbank.Compute(waveform, vtln_warp, &kaldi_features);
 
 
  std::cout << "<<<=== Compare with HTK features...\n";
 
@@ -101,7 +101,7 @@ static void UnitTestSimple() {
  // use default parameters
 
  // compute mfccs.
- mfcc.Compute(v, 1.0, &m, NULL);
+ mfcc.Compute(v, 1.0, &m);
 
  // possibly dump
  // std::cout << "== Output features == \n" << m;
@@ -143,7 +143,7 @@ static void UnitTestHTKCompare1() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
@@ -227,7 +227,7 @@ static void UnitTestHTKCompare2() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
@@ -312,7 +312,7 @@ static void UnitTestHTKCompare3() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
@@ -395,7 +395,7 @@ static void UnitTestHTKCompare4() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
@@ -483,7 +483,7 @@ static void UnitTestHTKCompare5() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, vtln_warp, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
@@ -568,7 +568,7 @@ static void UnitTestHTKCompare6() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- mfcc.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
 
@@ -60,7 +60,7 @@ static void UnitTestSimple() {
  // use default parameters
 
  // compute mfccs.
- plp.Compute(v, 1.0, &m, NULL);
+ plp.Compute(v, 1.0, &m);
 
  // possibly dump
  // std::cout << "== Output features == \n" << m;
@@ -102,7 +102,7 @@ static void UnitTestHTKCompare1() {
 
  // calculate kaldi features
  Matrix<BaseFloat> kaldi_raw_features;
- plp.Compute(waveform, 1.0, &kaldi_raw_features, NULL);
+ plp.Compute(waveform, 1.0, &kaldi_raw_features);
 
  DeltaFeaturesOptions delta_opts;
  Matrix<BaseFloat> kaldi_features;
 
@@ -148,7 +148,7 @@ int main() {
  op.use_energy = false;
  Mfcc mfcc(op);
  Matrix<BaseFloat> raw_features;
- mfcc.Compute(waveform, 1.0, &raw_features, NULL);
+ mfcc.Compute(waveform, 1.0, &raw_features);
 
  try {
  for (int32 window = 1; window < 4; window++) {
 
@@ -43,6 +43,7 @@ struct FrameExtractionOptions {
  bool round_to_power_of_two;
  BaseFloat blackman_coeff;
  bool snip_edges;
+ bool allow_downsample;
  // May be "hamming", "rectangular", "povey", "hanning", "blackman"
  // "povey" is a window I made to be similar to Hamming but to go to zero at the
  // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
@@ -57,7 +58,8 @@ struct FrameExtractionOptions {
  window_type("povey"),
  round_to_power_of_two(true),
  blackman_coeff(0.42),
- snip_edges(true){ }
+ snip_edges(true),
+ allow_downsample(false) { }
 
  void Register(OptionsItf *opts) {
  opts->Register("sample-frequency", &samp_freq,
@@ -83,6 +85,9 @@ struct FrameExtractionOptions {
  "completely fit in the file, and the number of frames depends on the "
  "frame-length. If false, the number of frames depends only on the "
  "frame-shift, and we reflect the data at the ends.");
+ opts->Register("allow-downsample", &allow_downsample,
+ "If true, allow the input waveform to have a higher frequency than"
+ "the specified --sample-frequency (and we'll downsample).");
  }
  int32 WindowShift() const {
  return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
 
@@ -167,7 +167,7 @@ void TestOnlineMfcc() {
 
  // compute mfcc offline
  Matrix<BaseFloat> mfcc_feats;
- mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
+ mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
 
  // compare
  // The test waveform is about 1.44s long, so
@@ -217,7 +217,7 @@ void TestOnlinePlp() {
 
  // compute plp offline
  Matrix<BaseFloat> plp_feats;
- plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
+ plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
 
  // compare
  // The test waveform is about 1.44s long, so
@@ -309,7 +309,7 @@ void TestOnlineAppendFeature() {
 
  // compute mfcc offline
  Matrix<BaseFloat> mfcc_feats;
- mfcc.Compute(waveform, 1.0, &mfcc_feats, NULL); // vtln not supported
+ mfcc.Compute(waveform, 1.0, &mfcc_feats); // vtln not supported
 
  // the parametrization object for 2nd stream plp feature
  PlpOptions plp_op;
@@ -326,7 +326,7 @@ void TestOnlineAppendFeature() {
 
  // compute plp offline
  Matrix<BaseFloat> plp_feats;
- plp.Compute(waveform, 1.0, &plp_feats, NULL); // vtln not supported
+ plp.Compute(waveform, 1.0, &plp_feats); // vtln not supported
 
  // compare
  // The test waveform is about 1.44s long, so
 
@@ -155,7 +155,7 @@ void LinearResample::Resample(const VectorBase<BaseFloat> &input,
  int32 input_dim = input.Dim();
  int64 tot_input_samp = input_sample_offset_ + input_dim,
  tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
- 
+
  KALDI_ASSERT(tot_output_samp >= output_sample_offset_);
 
  output->Resize(tot_output_samp - output_sample_offset_);
@@ -365,5 +365,13 @@ BaseFloat ArbitraryResample::FilterFunc(BaseFloat t) const {
  return filter * window;
 }
 
-
+void DownsampleWaveForm(BaseFloat orig_freq, const VectorBase<BaseFloat> &wave,
+ BaseFloat new_freq, Vector<BaseFloat> *new_wave) {
+ KALDI_ASSERT(new_freq < orig_freq);
+ BaseFloat lowpass_cutoff = 0.99 * 0.5 * new_freq;
+ int32 lowpass_filter_width = 6;
+ LinearResample signal_downsampler(orig_freq, new_freq,
+ lowpass_cutoff, lowpass_filter_width);
+ signal_downsampler.Resample(wave, true, new_wave);
+}
 } // namespace kaldi