PaddlePaddle · kuke · Mar 27, 2018 · Mar 21, 2018 · Mar 22, 2018 · Mar 22, 2018
diff --git a/fluid/DeepASR/decoder/decoder.cc b/fluid/DeepASR/decoder/decoder.cc
diff --git a/fluid/DeepASR/decoder/decoder.h b/fluid/DeepASR/decoder/decoder.h
diff --git a/fluid/DeepASR/decoder/post_decode_faster.cc b/fluid/DeepASR/decoder/post_decode_faster.cc
@@ -0,0 +1,144 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "post_decode_faster.h"
+
+typedef kaldi::int32 int32;
+using fst::SymbolTable;
+using fst::VectorFst;
+using fst::StdArc;
+
+Decoder::Decoder(std::string word_syms_filename,
+ std::string fst_in_filename,
+ std::string logprior_rxfilename) {
+ const char* usage =
+ "Decode, reading log-likelihoods (of transition-ids or whatever symbol "
+ "is on the graph) as matrices.";
+
+ kaldi::ParseOptions po(usage);
+ binary = true;
+ acoustic_scale = 1.5;
+ allow_partial = true;
+ kaldi::FasterDecoderOptions decoder_opts;
+ decoder_opts.Register(&po, true); // true == include obscure settings.
+ po.Register("binary", &binary, "Write output in binary mode");
+ po.Register("allow-partial",
+ &allow_partial,
+ "Produce output even when final state was not reached");
+ po.Register("acoustic-scale",
+ &acoustic_scale,
+ "Scaling factor for acoustic likelihoods");
+
+ word_syms = NULL;
+ if (word_syms_filename != "") {
+ word_syms = fst::SymbolTable::ReadText(word_syms_filename);
+ if (!word_syms)
+ KALDI_ERR << "Could not read symbol table from file "
+ << word_syms_filename;
+ }
+
+ std::ifstream is_logprior(logprior_rxfilename);
+ logprior.Read(is_logprior, false);
+
+ // It's important that we initialize decode_fst after loglikes_reader, as it
+ // can prevent crashes on systems installed without enough virtual memory.
+ // It has to do with what happens on UNIX systems if you call fork() on a
+ // large process: the page-table entries are duplicated, which requires a
+ // lot of virtual memory.
+ decode_fst = fst::ReadFstKaldi(fst_in_filename);
+
+ decoder = new kaldi::FasterDecoder(*decode_fst, decoder_opts);
+}
+
+
+Decoder::~Decoder() {
+ if (!word_syms) delete word_syms;
+ delete decode_fst;
+ delete decoder;
+}
+
+std::string Decoder::decode(
+ std::string key,
+ const std::vector<std::vector<kaldi::BaseFloat>>& log_probs) {
+ size_t num_frames = log_probs.size();
+ size_t dim_label = log_probs[0].size();
+
+ kaldi::Matrix<kaldi::BaseFloat> loglikes(
+ num_frames, dim_label, kaldi::kSetZero, kaldi::kStrideEqualNumCols);
+ for (size_t i = 0; i < num_frames; ++i) {
+ memcpy(loglikes.Data() + i * dim_label,
+ log_probs[i].data(),
+ sizeof(kaldi::BaseFloat) * dim_label);
+ }
+
+ return decode(key, loglikes);
+}
+
+
+std::vector<std::string> Decoder::decode(std::string posterior_rspecifier) {
+ kaldi::SequentialBaseFloatMatrixReader posterior_reader(posterior_rspecifier);
+ std::vector<std::string> decoding_results;
+
+ for (; !posterior_reader.Done(); posterior_reader.Next()) {
+ std::string key = posterior_reader.Key();
+ kaldi::Matrix<kaldi::BaseFloat> loglikes(posterior_reader.Value());
+
+ decoding_results.push_back(decode(key, loglikes));
+ }
+
+ return decoding_results;
+}
+
+
+std::string Decoder::decode(std::string key,
+ kaldi::Matrix<kaldi::BaseFloat>& loglikes) {
+ std::string decoding_result;
+
+ if (loglikes.NumRows() == 0) {
+ KALDI_WARN << "Zero-length utterance: " << key;
+ }
+ KALDI_ASSERT(loglikes.NumCols() == logprior.Dim());
+
+ loglikes.ApplyLog();
+ loglikes.AddVecToRows(-1.0, logprior);
+
+ kaldi::DecodableMatrixScaled decodable(loglikes, acoustic_scale);
+ decoder->Decode(&decodable);
+
+ VectorFst<kaldi::LatticeArc> decoded; // linear FST.
+
+ if ((allow_partial || decoder->ReachedFinal()) &&
+ decoder->GetBestPath(&decoded)) {
+ if (!decoder->ReachedFinal())
+ KALDI_WARN << "Decoder did not reach end-state, outputting partial "
+ "traceback.";
+
+ std::vector<int32> alignment;
+ std::vector<int32> words;
+ kaldi::LatticeWeight weight;
+
+ GetLinearSymbolSequence(decoded, &alignment, &words, &weight);
+
+ if (word_syms != NULL) {
+ for (size_t i = 0; i < words.size(); i++) {
+ std::string s = word_syms->Find(words[i]);
+ decoding_result += s;
+ if (s == "")
+ KALDI_ERR << "Word-id " << words[i] << " not in symbol table.";
+ }
+ }
+ }
+
+ return decoding_result;
+}
diff --git a/fluid/DeepASR/decoder/post_decode_faster.h b/fluid/DeepASR/decoder/post_decode_faster.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <string>
+#include <vector>
+#include "base/kaldi-common.h"
+#include "base/timer.h"
+#include "decoder/decodable-matrix.h"
+#include "decoder/faster-decoder.h"
+#include "fstext/fstext-lib.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h" // for {Compact}LatticeArc
+#include "tree/context-dep.h"
+#include "util/common-utils.h"
+
+
+class Decoder {
+public:
+ Decoder(std::string word_syms_filename,
+ std::string fst_in_filename,
+ std::string logprior_rxfilename);
+ ~Decoder();
+
+ // Interface to accept the scores read from specifier and return
+ // the batch decoding results
+ std::vector<std::string> decode(std::string posterior_rspecifier);
+
+ // Accept the scores of one utterance and return the decoding result
+ std::string decode(
+ std::string key,
+ const std::vector<std::vector<kaldi::BaseFloat>> &log_probs);
+
+private:
+ // For decoding one utterance
+ std::string decode(std::string key,
+ kaldi::Matrix<kaldi::BaseFloat> &loglikes);
+
+ fst::SymbolTable *word_syms;
+ fst::VectorFst<fst::StdArc> *decode_fst;
+ kaldi::FasterDecoder *decoder;
+ kaldi::Vector<kaldi::BaseFloat> logprior;
+
+ bool binary;
+ kaldi::BaseFloat acoustic_scale;
+ bool allow_partial;
+};
diff --git a/fluid/DeepASR/decoder/pybind.cc b/fluid/DeepASR/decoder/pybind.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -15,15 +15,25 @@ limitations under the License. */
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 
-#include "decoder.h"
+#include "post_decode_faster.h"
 
 namespace py = pybind11;
 
-PYBIND11_MODULE(decoder, m) {
- m.doc() = "Decode function for Deep ASR model";
-
- m.def("decode",
- &decode,
- "Decode one input probability matrix "
- "and return the transcription");
+PYBIND11_MODULE(post_decode_faster, m) {
+ m.doc() = "Decoder for Deep ASR model";
+
+ py::class_<Decoder>(m, "Decoder")
+ .def(py::init<std::string, std::string, std::string>())
+ .def("decode",
+ (std::vector<std::string> (Decoder::*)(std::string)) &
+ Decoder::decode,
+ "Decode for the probability matrices in specifier "
+ "and return the transcriptions.")
+ .def(
+ "decode",
+ (std::string (Decoder::*)(
+ std::string, const std::vector<std::vector<kaldi::BaseFloat>>&)) &
+ Decoder::decode,
+ "Decode one input probability matrix "
+ "and return the transcription.");
 }
diff --git a/fluid/DeepASR/decoder/setup.py b/fluid/DeepASR/decoder/setup.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,27 +13,57 @@
 # limitations under the License.
 
 import os
+import glob
 from distutils.core import setup, Extension
 from distutils.sysconfig import get_config_vars
 
-args = ['-std=c++11']
+try:
+ kaldi_root = os.environ['KALDI_ROOT']
+except:
+ raise ValueError("Enviroment variable 'KALDI_ROOT' is not defined. Please "
+ "install kaldi and export KALDI_ROOT=<kaldi's root dir> .")
+
+args = [
+ '-std=c++11', '-Wno-sign-compare', '-Wno-unused-variable',
+ '-Wno-unused-local-typedefs', '-Wno-unused-but-set-variable',
+ '-Wno-deprecated-declarations', '-Wno-unused-function'
+]
 
 # remove warning about -Wstrict-prototypes
 (opt, ) = get_config_vars('OPT')
 os.environ['OPT'] = " ".join(flag for flag in opt.split()
  if flag != '-Wstrict-prototypes')
+os.environ['CC'] = 'g++'
+
+LIBS = [
+ 'fst', 'kaldi-base', 'kaldi-util', 'kaldi-matrix', 'kaldi-tree',
+ 'kaldi-hmm', 'kaldi-fstext', 'kaldi-decoder', 'kaldi-lat'
+]
+
+LIB_DIRS = [
+ 'tools/openfst/lib', 'src/base', 'src/matrix', 'src/util', 'src/tree',
+ 'src/hmm', 'src/fstext', 'src/decoder', 'src/lat'
+]
+LIB_DIRS = [os.path.join(kaldi_root, path) for path in LIB_DIRS]
+LIB_DIRS = [os.path.abspath(path) for path in LIB_DIRS]
 
 ext_modules = [
  Extension(
- 'decoder',
- ['pybind.cc', 'decoder.cc'],
- include_dirs=['pybind11/include', '.'],
+ 'post_decode_faster',
+ ['pybind.cc', 'post_decode_faster.cc'],
+ include_dirs=[
+ 'pybind11/include', '.', os.path.join(kaldi_root, 'src'),
+ os.path.join(kaldi_root, 'tools/openfst/src/include')
+ ],
  language='c++',
+ libraries=LIBS,
+ library_dirs=LIB_DIRS,
+ runtime_library_dirs=LIB_DIRS,
  extra_compile_args=args, ),
 ]
 
 setup(
- name='decoder',
+ name='post_decode_faster',
  version='0.0.1',
  author='Paddle',
  author_email='',

diff --git a/fluid/DeepASR/decoder/setup.sh b/fluid/DeepASR/decoder/setup.sh
@@ -1,4 +1,4 @@
-
+set -e
 
 if [ ! -d pybind11 ]; then
  git clone https://github.com/pybind/pybind11.git