Skip to content

Commit f62a840

Browse files
committed
Updates and bugfix:
- (Major update) Macro-Precision/Recall/F1 score calculations are added into "ner_scorer.py". - (Major update) Trainer function call parameter is shrinked. Instead of giving train/vali/test iterators, I changed it to single "dataloader". - (Major update) Since "macro-x" metrics must be calculated for whole dataset-batch (not batch-based), I changed "single_model_ner_evaluator.py". - (Minor update) A detailed logger is added into "ner_scorer.py". - (Major bugfix) LSTM hidden initialization is changed for "bidirectional=True" case.
1 parent 4d95702 commit f62a840

File tree

12 files changed

+141
-61
lines changed

12 files changed

+141
-61
lines changed

.idea/codeStyles/codeStyleConfig.xml

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/config.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,9 @@
9898
"lstm": {
9999
"dropout_type": "bernoulli",
100100
"keep_prob": 0.5,
101-
"hidden_dim": 300,
102-
"num_layers": 1,
103-
"bidirectional": false,
101+
"hidden_dim": 100,
102+
"num_layers": 2,
103+
"bidirectional": true,
104104
"bias": true
105105
},
106106
"transformer_google": {
@@ -127,8 +127,8 @@
127127
"partial_adam": 0.25,
128128
"weight_decay": 0,
129129
"momentum": 0.9,
130-
"norm_ratio": 0.25,
131-
"epoch": 20,
130+
"norm_ratio": 5,
131+
"epoch": 10,
132132
"print_every_batch_step": 250,
133133
"save_every_epoch": 1,
134134
"topk": [

crf/CRF.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ def __init__(self, args):
2121
# Matrix of transition parameters. Entry i,j is the score of transitioning *to* i *from* j
2222
self.transition = nn.Parameter(torch.Tensor(self.tag_size, self.tag_size)).to(self.device)
2323

24-
self.transition.data[self.start_id, :] = -10000. # no transition to SOS
25-
self.transition.data[:, self.end_id] = -10000. # no transition from EOS except to PAD
26-
self.transition.data[:, self.pad_id] = -10000. # no transition from PAD except to PAD
27-
self.transition.data[self.pad_id, :] = -10000. # no transition to PAD except from EOS
24+
self.transition.data[self.start_id, :] = -10000. # no transition to start
25+
self.transition.data[:, self.end_id] = -10000. # no transition from end except to pad
26+
self.transition.data[:, self.pad_id] = -10000. # no transition from pad except to pad
27+
self.transition.data[self.pad_id, :] = -10000. # no transition to pad except from end
2828
self.transition.data[self.pad_id, self.end_id] = 0.
2929
self.transition.data[self.pad_id, self.pad_id] = 0.
3030

evaluation/evaluator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def evaluator_factory(type, device):
1717
test_evaluator = SingleModelEvaluator(device, is_vali=False)
1818
return dev_evaluator, test_evaluator
1919
elif type == "single_model_ner_evaluator":
20+
logger.info("Evaluator type is %s", type)
2021
dev_evaluator = SingleModelNerEvaluator(device, is_vali=True)
2122
test_evaluator = SingleModelNerEvaluator(device, is_vali=False)
2223
return dev_evaluator, test_evaluator

evaluation/single_model_ner_evaluator.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ def __init__(self, device, is_vali):
1515
self.device = device
1616
self.is_vali = is_vali
1717

18-
def evaluate_iter(self, model, input, save_path, scorer):
18+
def evaluate_iter(self, model, input, save_path, scorer, detailed_ner_log=True):
1919
total_loss = 0
20-
total_f1 = 0
20+
macro_f1 = 0
21+
macro_precision = 0
22+
macro_recall = 0
2123
total_token_acc = 0
2224

2325
if not self.is_vali:
@@ -27,6 +29,9 @@ def evaluate_iter(self, model, input, save_path, scorer):
2729
logger.info("Validation mode!")
2830
model.eval()
2931

32+
full_ground_truth_list = list()
33+
full_prediction_list = list()
34+
3035
with torch.no_grad():
3136
for batch in input:
3237
batch_x = batch.sentence.to(self.device)
@@ -37,13 +42,26 @@ def evaluate_iter(self, model, input, save_path, scorer):
3742

3843
pred_scores, predictions = model.decode(batch_x)
3944

40-
token_level_accuracy = scorer.token_level_accuracy(predictions, batch_y)
45+
batch_y = batch_y.permute(1, 0)
46+
47+
scorer.token_level_accuracy(predictions, batch_y)
48+
49+
full_ground_truth_list.extend(batch_y.tolist())
50+
full_prediction_list.extend(predictions)
51+
52+
token_level_accuracy = scorer.token_accuracy
4153

4254
total_token_acc += token_level_accuracy
4355

4456
torch.cuda.empty_cache()
4557

46-
current_f1 = total_f1 / len(input)
58+
scorer.f1_score(full_prediction_list, full_ground_truth_list)
59+
macro_f1 = scorer.avg_macro_f1
60+
macro_precision = scorer.avg_macro_precision
61+
macro_recall = scorer.avg_macro_recall
4762
current_token_acc = total_token_acc / len(input)
4863

49-
return current_f1, current_token_acc
64+
if detailed_ner_log:
65+
scorer.print_detailed_score_log()
66+
67+
return macro_f1, macro_precision, macro_recall, current_token_acc

main.py

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,37 +29,29 @@ def initialize_model_and_trainer(model_properties, training_properties, datasetl
2929
logger.info("Model type is %s", training_properties["learner"])
3030
if training_properties["learner"] == "text_cnn":
3131
model = TextCnn(model_properties).to(device)
32-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
33-
datasetloader.val_iter, datasetloader.test_iter, device)
32+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
3433
elif training_properties["learner"] == "gru":
3534
model = GRU(model_properties).to(device)
36-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
37-
datasetloader.val_iter, datasetloader.test_iter, device)
35+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
3836
elif training_properties["learner"] == "lstm":
3937
model = LSTM(model_properties).to(device)
40-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
41-
datasetloader.val_iter, datasetloader.test_iter, device)
38+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
4239
elif training_properties["learner"] == "char_cnn":
4340
model = CharCNN(model_properties).to(device)
44-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
45-
datasetloader.val_iter, datasetloader.test_iter, device)
41+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
4642
elif training_properties["learner"] == "vdcnn":
4743
model = VDCNN(model_properties).to(device)
48-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
49-
datasetloader.val_iter, datasetloader.test_iter, device)
44+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
5045
elif training_properties["learner"] == "conv_deconv_cnn":
5146
model = ConvDeconvCNN(model_properties)
52-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
53-
datasetloader.val_iter, datasetloader.test_iter, device)
47+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
5448
elif training_properties["learner"] == "transformer_google":
5549
model = TransformerGoogle(model_properties).model.to(device)
56-
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader.train_iter,
57-
datasetloader.val_iter, datasetloader.test_iter, device)
50+
trainer = Trainer.trainer_factory("single_model_trainer", training_properties, datasetloader, device)
5851
elif training_properties["learner"] == "lstmcrf":
5952
assert training_properties["task"] == "ner"
6053
model = LSTMCRF(model_properties).to(device)
61-
trainer = Trainer.trainer_factory("single_model_ner_trainer", training_properties, datasetloader.train_iter,
62-
datasetloader.val_iter, datasetloader.test_iter, device)
54+
trainer = Trainer.trainer_factory("single_model_ner_trainer", training_properties, datasetloader, device)
6355
else:
6456
raise ValueError("Model is not defined! Available learner values are : 'text_cnn', 'char_cnn', 'vdcnn', 'gru', "
6557
"'lstm', 'conv_deconv_cnn' and 'transformer_google'")
@@ -176,6 +168,7 @@ def initialize_model_and_trainer(model_properties, training_properties, datasetl
176168
if category_vocab is not None:
177169
model_properties["common_model_properties"]["num_class"] = len(category_vocab)
178170
if ner_vocab is not None:
171+
model_properties["common_model_properties"]["ner_vocab"] = ner_vocab
179172
model_properties["common_model_properties"]["num_tags"] = len(ner_vocab)
180173
model_properties["common_model_properties"]["start_id"] = ner_vocab.stoi["<start>"]
181174
model_properties["common_model_properties"]["end_id"] = ner_vocab.stoi["<end>"]
@@ -217,4 +210,4 @@ def initialize_model_and_trainer(model_properties, training_properties, datasetl
217210
category_vocab_path=category_vocab_path,
218211
preprocessor=preprocessor.preprocess,
219212
topk=training_properties["topk"])
220-
logger.info("")
213+
logger.info("Done!")

models/LSTM.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,11 @@ def __init__(self, args):
6666

6767
def init_hidden(self, batch_size):
6868
if self.bidirectional is True:
69-
return (Variable(torch.zeros(1, batch_size, self.hidden_dim * 2).to(self.device)),
70-
Variable(torch.zeros(1, batch_size, self.hidden_dim * 2).to(self.device)))
69+
return (Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(self.device)),
70+
Variable(torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(self.device)))
7171
else:
72-
return (Variable(torch.zeros(1, batch_size, self.hidden_dim).to(self.device)),
73-
Variable(torch.zeros(1, batch_size, self.hidden_dim).to(self.device)))
72+
return (Variable(torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)),
73+
Variable(torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(self.device)))
7474

7575
def initialize_embeddings(self):
7676
logger.info("> Embeddings")

scorer/ner_scorer.py

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
1+
import logging.config
12

3+
logging.config.fileConfig(fname='./config/config.logger', disable_existing_loggers=False)
4+
logger = logging.getLogger("NerScorer")
25

36
class NerScorer(object):
4-
def __init__(self):
7+
def __init__(self, ner_vocab):
58
super(NerScorer, self).__init__()
9+
self.ner_vocab = ner_vocab
10+
self.token_accuracy = 0
11+
self.avg_macro_precision = 0
12+
self.avg_macro_recall = 0
13+
self.avg_macro_f1 = 0
14+
self.macro_precision = {}
15+
self.macro_recall = {}
16+
self.macro_f1 = {}
617

7-
@staticmethod
8-
def token_level_accuracy(prediction, ground_truth):
18+
def token_level_accuracy(self, prediction, ground_truth):
919
token_count = 0
1020
matched = 0
1121

@@ -15,4 +25,59 @@ def token_level_accuracy(prediction, ground_truth):
1525
if p == gt:
1626
matched += 1
1727

18-
return matched * 100.0 / token_count
28+
self.token_accuracy = matched * 100.0 / token_count
29+
30+
def __initialize_dict(self):
31+
d = {}
32+
for v in self.ner_vocab.stoi:
33+
d[self.ner_vocab.stoi[v]] = 0
34+
return d
35+
36+
def __add_to_dict(self, d, tag):
37+
if tag in d:
38+
d[tag] += 1
39+
else:
40+
d[tag] = 1
41+
return d
42+
43+
def __calculate_tag_f1(self, f1, tp, fp, fn):
44+
precision = {}
45+
recall = {}
46+
for tag in tp:
47+
precision[tag] = tp[tag] / (tp[tag] + fp[tag] + 1e-16)
48+
recall[tag] = tp[tag] / (tp[tag] + fn[tag] + 1e-16)
49+
f1[tag] = (2 * precision[tag] * recall[tag] / (precision[tag] + recall[tag] + 1e-16)) * 100
50+
self.macro_f1 = f1
51+
self.macro_precision = precision
52+
self.macro_recall = recall
53+
54+
def __calculate_mean_f1(self):
55+
self.avg_macro_f1 = sum(self.macro_f1.values()) / float(len(self.macro_f1))
56+
self.avg_macro_precision = sum(self.macro_precision.values()) / float(len(self.macro_precision))
57+
self.avg_macro_recall = sum(self.macro_recall.values()) / float(len(self.macro_recall))
58+
59+
def f1_score(self, prediction, ground_truth):
60+
true_positives = self.__initialize_dict()
61+
false_positives = self.__initialize_dict()
62+
false_negatives = self.__initialize_dict()
63+
f1 = self.__initialize_dict()
64+
65+
for p_seq, gt_seq in zip(prediction, ground_truth):
66+
for p, gt in zip(p_seq, gt_seq):
67+
if p == gt:
68+
true_positives = self.__add_to_dict(true_positives, gt)
69+
else:
70+
false_negatives = self.__add_to_dict(false_negatives, gt)
71+
false_positives = self.__add_to_dict(false_positives, p)
72+
self.__calculate_tag_f1(f1, true_positives, false_positives, false_negatives)
73+
self.__calculate_mean_f1()
74+
75+
def print_detailed_score_log(self):
76+
logger.info("--------------------")
77+
logger.info("Detailed Tag-Based Score")
78+
for tag in self.macro_f1:
79+
logger.info("Tag: {} - Precision: {:.4f} - Recall: {:.4f} - F1: {:.4f}".format(self.ner_vocab.itos[tag],
80+
self.macro_precision[tag],
81+
self.macro_recall[tag],
82+
self.macro_f1[tag]))
83+
logger.info("--------------------")

training/single_model_ner_trainer.py

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,23 @@
44

55
import torch
66
import torch.nn as nn
7-
import torch.optim as optim
87

9-
from custom_optimizer import OpenAIAdam, NoamOptimizer, Padam
108
from evaluation.evaluator import Evaluator
119
from models.GRU import GRU
1210
from models.LSTM import LSTMBase
11+
from scorer.ner_scorer import NerScorer
1312
from training.single_model_trainer import SingleModelTrainer
1413
from utils.utils import time_since, save_best_model
15-
from scorer.ner_scorer import NerScorer
1614

1715
logging.config.fileConfig(fname='./config/config.logger', disable_existing_loggers=False)
1816
logger = logging.getLogger("Trainer")
1917

2018

2119
class SingleModelNerTrainer(SingleModelTrainer):
22-
def __init__(self, training_properties, train_iter, dev_iter, test_iter, device):
23-
super(SingleModelNerTrainer, self).__init__(training_properties, train_iter, dev_iter, test_iter, device)
20+
def __init__(self, training_properties, datasetloader, device):
21+
super(SingleModelNerTrainer, self).__init__(training_properties, datasetloader, device)
2422

25-
self.scorer = NerScorer()
23+
self.scorer = NerScorer(datasetloader.ner_vocab)
2624
self.dev_evaluator, self.test_evaluator = Evaluator().evaluator_factory("single_model_ner_evaluator",
2725
self.device)
2826

@@ -57,10 +55,10 @@ def train_iters(self, model, checkpoint=None):
5755
self.print_epoch(start, e, total_loss, train_f1)
5856

5957
if e % self.eval_every == 0:
60-
vali_f1, vali_token_acc = self.dev_evaluator.evaluate_iter(model=model,
61-
input=self.dev_iter,
62-
save_path=self.save_path,
63-
scorer=self.scorer)
58+
vali_f1, vali_precision, vali_recall, vali_token_acc = self.dev_evaluator.evaluate_iter(model=model,
59+
input=self.dev_iter,
60+
save_path=self.save_path,
61+
scorer=self.scorer)
6462
if best_vali_f1 < vali_f1:
6563
best_vali_token_acc = vali_token_acc
6664
best_vali_f1 = vali_f1
@@ -91,10 +89,10 @@ def train_iters(self, model, checkpoint=None):
9189
}, out_path)
9290
old_path = out_path
9391

94-
test_f1, test_token_acc = self.test_evaluator.evaluate_iter(model=model,
95-
input=self.test_iter,
96-
save_path=self.save_path,
97-
scorer=self.scorer)
92+
test_f1, test_precision, test_recall, test_token_acc = self.test_evaluator.evaluate_iter(model=model,
93+
input=self.test_iter,
94+
save_path=self.save_path,
95+
scorer=self.scorer)
9896

9997
self.print_test(test_token_acc, test_f1)
10098

@@ -182,4 +180,3 @@ def print_test(self, test_token_acc, test_f1):
182180
logger.info("Test F1: {:.4f} - "
183181
"Test Token Level Accuracy: {:.4f} - ".format(test_f1,
184182
test_token_acc))
185-

training/single_model_trainer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919

2020
class SingleModelTrainer(object):
21-
def __init__(self, training_properties, train_iter, dev_iter, test_iter, device):
21+
def __init__(self, training_properties, datasetloader, device):
2222
self.task = training_properties["task"]
2323
self.optimizer_type = training_properties["optimizer"]
2424
self.learning_rate = training_properties["learning_rate"]
@@ -36,9 +36,9 @@ def __init__(self, training_properties, train_iter, dev_iter, test_iter, device)
3636
self.amsgrad = training_properties["amsgrad"]
3737
self.partial_adam = training_properties["partial_adam"]
3838

39-
self.train_iter = train_iter
40-
self.dev_iter = dev_iter
41-
self.test_iter = test_iter
39+
self.train_iter = datasetloader.train_iter
40+
self.dev_iter = datasetloader.val_iter
41+
self.test_iter = datasetloader.test_iter
4242

4343
self.device = device
4444

0 commit comments

Comments
 (0)