tech-srl
diff --git a/‎common.py‎
Lines changed: 9 additions & 70 deletions b/‎common.py‎
Lines changed: 9 additions & 70 deletions
diff --git a/‎config.py‎
Lines changed: 71 additions & 0 deletions b/‎config.py‎
Lines changed: 71 additions & 0 deletions
@@ -2,78 +2,17 @@
 import json
 import sys
 from enum import Enum
-from math import ceil
-
-
-class Config:
- @staticmethod
- def get_default_config(args):
- config = Config()
- config.DL_FRAMEWORK = 'keras'
- config.NUM_EPOCHS = 20
- config.SAVE_EVERY_EPOCHS = 1
- config.BATCH_SIZE = 1024
- config.TEST_BATCH_SIZE = config.BATCH_SIZE
- config.READING_BATCH_SIZE = 1300 * 4
- config.NUM_BATCHING_THREADS = 2
- config.BATCH_QUEUE_SIZE = 300000
- config.MAX_CONTEXTS = 200
- config.WORDS_VOCAB_SIZE = 1301136
- config.TARGET_VOCAB_SIZE = 261245
- config.PATHS_VOCAB_SIZE = 911417
- config.EMBEDDINGS_SIZE = 128
- config.MAX_TO_KEEP = 10
- config.DROPOUT_KEEP_RATE = 0.75
-
- config.READER_NUM_PARALLEL_BATCHES = 1 # cpu cores [for tf.contrib.data.map_and_batch()]
- config.SHUFFLE_BUFFER_SIZE = 10000
- config.CSV_BUFFER_SIZE = 100 * 1024 * 1024 # 100 MB
-
- # Automatically filled, do not edit:
- config.TRAIN_PATH = args.data_path
- config.TEST_PATH = args.test_path
- config.SAVE_PATH = args.save_path
- config.LOAD_PATH = args.load_path
- config.RELEASE = args.release
- config.EXPORT_CODE_VECTORS = args.export_code_vectors
- return config
-
- def __init__(self):
- self.DL_FRAMEWORK: str = '' # in {'keras', 'tensorflow'}
- self.NUM_EPOCHS: int = 0
- self.SAVE_EVERY_EPOCHS: int = 0
- self.BATCH_SIZE: int = 0
- self.TEST_BATCH_SIZE: int = 0
- self.READING_BATCH_SIZE: int = 0
- self.NUM_BATCHING_THREADS: int = 0
- self.BATCH_QUEUE_SIZE: int = 0
- self.MAX_CONTEXTS: int = 0
- self.WORDS_VOCAB_SIZE: int = 0
- self.TARGET_VOCAB_SIZE: int = 0
- self.PATHS_VOCAB_SIZE: int = 0
- self.EMBEDDINGS_SIZE: int = 0
- self.MAX_TO_KEEP: int = 0
- self.DROPOUT_KEEP_RATE: float = 0
-
- self.READER_NUM_PARALLEL_BATCHES: int = 0
- self.SHUFFLE_BUFFER_SIZE: int = 0
- self.CSV_BUFFER_SIZE: int = 0
-
- self.SAVE_PATH: str = ''
- self.LOAD_PATH: str = ''
- self.TRAIN_PATH: str = ''
- self.TEST_PATH: str = ''
- self.RELEASE: bool = False
- self.EXPORT_CODE_VECTORS: bool = False
-
- @property
- def steps_per_epoch(self) -> int:
- return ceil(self.NUM_EXAMPLES / self.BATCH_SIZE)
+import tensorflow as tf
 
 
 class common:
- noSuchWord = "NoSuchWord"
 
+ class SpecialDictWords(Enum):
+ NoSuchWord = 0
+
+ @classmethod
+ def index_to_start_dict_from(cls):
+ return 1 + max(special_word.value for special_word in cls)
 
  @staticmethod
  def normalize_word(word):
@@ -209,7 +148,7 @@ def split_to_batches(data_lines, batch_size):
 
  @staticmethod
  def legal_method_names_checker(name):
- return name != common.noSuchWord and re.match('^[a-zA-Z\|]+$', name)
+ return name != common.SpecialDictWords.NoSuchWord.name and re.match('^[a-zA-Z\|]+$', name)
 
  @staticmethod
  def filter_impossible_names(top_words):
@@ -227,7 +166,7 @@ def parse_results(result, unhash_dict, topk=5):
  original_name, top_suggestions, top_scores, attention_per_context = list(single_method)
  current_method_prediction_results = PredictionResults(original_name)
  for i, predicted in enumerate(top_suggestions):
- if predicted == common.noSuchWord:
+ if predicted == common.SpecialDictWords.NoSuchWord.name:
  continue
  suggestion_subtokens = common.get_subtokens(predicted)
  current_method_prediction_results.append_prediction(suggestion_subtokens, top_scores[i].item())
 
@@ -0,0 +1,71 @@
+from math import ceil
+
+
+class Config:
+ @staticmethod
+ def get_default_config(args):
+ config = Config()
+ config.DL_FRAMEWORK = 'keras'
+ config.NUM_EPOCHS = 20
+ config.SAVE_EVERY_EPOCHS = 1
+ config.BATCH_SIZE = 1024
+ config.TEST_BATCH_SIZE = config.BATCH_SIZE
+ config.READING_BATCH_SIZE = 1300 * 4
+ config.NUM_BATCHING_THREADS = 2
+ config.BATCH_QUEUE_SIZE = 300000
+ config.MAX_CONTEXTS = 200
+ config.WORDS_VOCAB_SIZE = 1301136
+ config.TARGET_VOCAB_SIZE = 261245
+ config.PATHS_VOCAB_SIZE = 911417
+ config.EMBEDDINGS_SIZE = 128
+ config.MAX_TO_KEEP = 10
+ config.DROPOUT_KEEP_RATE = 0.75
+
+ config.READER_NUM_PARALLEL_BATCHES = 1 # cpu cores [for tf.contrib.data.map_and_batch()]
+ config.SHUFFLE_BUFFER_SIZE = 10000
+ config.CSV_BUFFER_SIZE = 100 * 1024 * 1024 # 100 MB
+
+ # Automatically filled, do not edit:
+ config.TRAIN_PATH = args.data_path
+ config.TEST_PATH = args.test_path
+ config.SAVE_PATH = args.save_path
+ config.LOAD_PATH = args.load_path
+ config.RELEASE = args.release
+ config.EXPORT_CODE_VECTORS = args.export_code_vectors
+ return config
+
+ def __init__(self):
+ self.DL_FRAMEWORK: str = '' # in {'keras', 'tensorflow'}
+ self.NUM_EPOCHS: int = 0
+ self.SAVE_EVERY_EPOCHS: int = 0
+ self.BATCH_SIZE: int = 0
+ self.TEST_BATCH_SIZE: int = 0
+ self.READING_BATCH_SIZE: int = 0
+ self.NUM_BATCHING_THREADS: int = 0
+ self.BATCH_QUEUE_SIZE: int = 0
+ self.MAX_CONTEXTS: int = 0
+ self.WORDS_VOCAB_SIZE: int = 0
+ self.TARGET_VOCAB_SIZE: int = 0
+ self.PATHS_VOCAB_SIZE: int = 0
+ self.EMBEDDINGS_SIZE: int = 0
+ self.MAX_TO_KEEP: int = 0
+ self.DROPOUT_KEEP_RATE: float = 0
+
+ self.READER_NUM_PARALLEL_BATCHES: int = 0
+ self.SHUFFLE_BUFFER_SIZE: int = 0
+ self.CSV_BUFFER_SIZE: int = 0
+
+ # Automatically filled by `args`.
+ self.SAVE_PATH: str = ''
+ self.LOAD_PATH: str = ''
+ self.TRAIN_PATH: str = ''
+ self.TEST_PATH: str = ''
+ self.RELEASE: bool = False
+ self.EXPORT_CODE_VECTORS: bool = False
+
+ # Automatically filled by `ModelBase.__init__()`.
+ self.NUM_EXAMPLES: int = 0
+
+ @property
+ def steps_per_epoch(self) -> int:
+ return ceil(self.NUM_EXAMPLES / self.BATCH_SIZE)