kaldi-asr
diff --git a/‎scripts/rnnlm/choose_features.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/rnnlm/choose_features.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/rnnlm/get_special_symbol_opts.py‎
Lines changed: 2 additions & 1 deletion b/‎scripts/rnnlm/get_special_symbol_opts.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎scripts/rnnlm/get_unigram_probs.py‎
Lines changed: 5 additions & 4 deletions b/‎scripts/rnnlm/get_unigram_probs.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎scripts/rnnlm/get_vocab.py‎
Lines changed: 1 addition & 1 deletion b/‎scripts/rnnlm/get_vocab.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/rnnlm/get_word_features.py‎
Lines changed: 3 additions & 3 deletions b/‎scripts/rnnlm/get_word_features.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎scripts/rnnlm/prepare_split_data.py‎
Lines changed: 1 addition & 1 deletion b/‎scripts/rnnlm/prepare_split_data.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/rnnlm/show_word_features.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/rnnlm/show_word_features.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/rnnlm/validate_features.py‎
Lines changed: 1 addition & 1 deletion b/‎scripts/rnnlm/validate_features.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/rnnlm/validate_text_dir.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/rnnlm/validate_text_dir.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/rnnlm/validate_word_features.py‎
Lines changed: 2 additions & 2 deletions b/‎scripts/rnnlm/validate_word_features.py‎
Lines changed: 2 additions & 2 deletions
@@ -86,7 +86,7 @@ def read_vocab(vocab_file):
  vocab = {}
  with open(vocab_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 2
  if fields[0] in vocab:
  sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}"
@@ -115,7 +115,7 @@ def read_unigram_probs(unigram_probs_file):
  unigram_probs = []
  with open(unigram_probs_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 2
  idx = int(fields[0])
  if idx >= len(unigram_probs):
 
@@ -27,7 +27,8 @@
 upper_ids = {}
 input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
 for line in input_stream:
- fields = line.split()
+ fields = line.split(' ')
+ assert(len(fields) == 2)
  sym = fields[0]
  if sym in special_symbols:
  assert sym not in lower_ids
 
@@ -77,7 +77,7 @@ def read_data_weights(weights_file, data_sources):
  with open(weights_file, 'r', encoding="utf-8") as f:
  for line in f:
  try:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 3
  if fields[0] in data_weights:
  raise Exception("duplicated data source({0}) specified in "
@@ -101,7 +101,7 @@ def read_vocab(vocab_file):
  vocab = {}
  with open(vocab_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 2
  if fields[0] in vocab:
  sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}"
@@ -130,8 +130,9 @@ def get_counts(data_sources, data_weights, vocab):
 
  with open(counts_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
- assert len(fields) == 2
+ fields = line.split(' ')
+ if len(fields) != 2: print("Warning, should be 2 cols:", fields, file=sys.stderr);
+ assert(len(fields) == 2)
  word = fields[0]
  count = fields[1]
  if word not in vocab:
 
@@ -28,7 +28,7 @@ def add_counts(word_counts, counts_file):
  with open(counts_file, 'r', encoding="utf-8") as f:
  for line in f:
  line = line.strip()
- word_and_count = line.split()
+ word_and_count = line.split(' ')
  assert len(word_and_count) == 2
  if word_and_count[0] in word_counts:
  word_counts[word_and_count[0]] += int(word_and_count[1])
 
@@ -40,7 +40,7 @@ def read_vocab(vocab_file):
  vocab = {}
  with open(vocab_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 2
  if fields[0] in vocab:
  sys.exit(sys.argv[0] + ": duplicated word({0}) in vocab: {1}"
@@ -61,7 +61,7 @@ def read_unigram_probs(unigram_probs_file):
  unigram_probs = []
  with open(unigram_probs_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 2
  idx = int(fields[0])
  if idx >= len(unigram_probs):
@@ -102,7 +102,7 @@ def read_features(features_file):
 
  with open(features_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split('\t')
  assert(len(fields) in [3, 4, 5])
 
  feat_id = int(fields[0])
 
@@ -66,7 +66,7 @@ def read_data_weights(weights_file, data_sources):
  with open(weights_file, 'r', encoding="utf-8") as f:
  for line in f:
  try:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) == 3
  if fields[0] in data_weights:
  raise Exception("duplicated data source({0}) specified in "
 
@@ -29,7 +29,7 @@ def read_feature_type_and_key(features_file):
 
  with open(features_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert(len(fields) in [2, 3, 4])
 
  feat_id = int(fields[0])
@@ -46,7 +46,7 @@ def read_feature_type_and_key(features_file):
 num_word_feats = 0
 with open(args.word_features_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) % 2 == 1
 
  print(int(fields[0]), end='\t')
 
@@ -30,7 +30,7 @@
  final_feats = {}
  word_feats = {}
  for line in f:
- fields = line.split()
+ fields = line.split('\t')
  assert(len(fields) in [3, 4, 5])
 
  assert idx == int(fields[0])
 
@@ -51,7 +51,7 @@ def check_text_file(text_file):
  lineno += 1
  if args.spot_check == 'true' and lineno > 10:
  break
- words = line.split()
+ words = line.split(' ')
  if len(words) != 0:
  found_nonempty_line = True
  for word in words:
@@ -75,7 +75,7 @@ def check_text_file(text_file):
  other_fields_set = set()
  with open(text_file, 'r', encoding="utf-8") as f:
  for line in f:
- array = line.split()
+ array = line.split(' ')
  if len(array) > 0:
  first_word = array[0]
  if first_word in first_field_set or first_word in other_fields_set:
 
@@ -27,7 +27,7 @@
 max_feat_id = -1
 with open(args.features_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert(len(fields) in [3, 4, 5])
 
  feat_id = int(fields[0])
@@ -51,7 +51,7 @@
 
 with open(args.word_features_file, 'r', encoding="utf-8") as f:
  for line in f:
- fields = line.split()
+ fields = line.split(' ')
  assert len(fields) > 0 and len(fields) % 2 == 1
  word_id = int(fields[0])