Skip to content

Commit a27488d

Browse files
committed
MRNN complete
1 parent b63bba8 commit a27488d

File tree

5 files changed

+75
-30
lines changed

5 files changed

+75
-30
lines changed

automatedMTL/src/model/mcrnn_model_gen2.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ class model(object):
2222
context_output_activation = "tanh"
2323
task_output_activation = "softmax"
2424
dropout = 0.0
25-
body_lstm_size = 128
26-
context_lstm_size = 128
27-
task_lstm_size = 128
25+
body_lstm_size = 512
26+
context_lstm_size = 512
27+
task_lstm_size = 512
2828
body_n_layer = 1
2929
context_n_layer = 1
3030
task_n_layer = 1
@@ -119,7 +119,7 @@ def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask")
119119
last_task_output = self.last_relevant(task_cell_output, self.length(task_cell_output))
120120
# feed the last output to the fc layer and make prediction
121121
task_fc_out = fcLayer(x=last_task_output, in_shape=self.task_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc2")
122-
task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation)
122+
task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.task_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation)
123123
print "Task output shape: ", task_output.get_shape()
124124
task_cost = compute_cost(logit=task_logits, y=y_task, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.n_classes,activation=self.task_output_activation)
125125

automatedMTL/src/model/train_gen.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ def trainModel(M):
3030

3131
# Reformat the data according to the secondary task
3232
# Create class look up table
33-
max_length = reformat_data(data_path, M.secondary_task == "missing word")
34-
class_look_up(data_path)
33+
# max_length = reformat_data(data_path, M.secondary_task == "missing word")
34+
# class_look_up(data_path)
3535

3636
n_classes, word2vec_dic, n_test, n_train, missing_word_dic = get_data(data_path)
3737

@@ -52,10 +52,28 @@ def trainModel(M):
5252
optimizer2 = tf.train.AdamOptimizer(learning_rate=task_lr)
5353

5454
context_cost, task_cost, task_output, context_output = M.buildModel(x, y_context, y_task, is_train, keep_prob)
55+
56+
context_vars = []
57+
task_vars = []
58+
for var in tf.trainable_variables():
59+
if "context" not in var.name: task_vars.append(var)
60+
if "task" not in var.name: context_vars.append(var)
61+
for var in context_vars:
62+
print "Context variable: ", var.name
63+
print ("\n")
64+
for var in task_vars:
65+
print "Task variables: ", var.name
66+
5567
if M.is_multi_task:
5668
train_step1 = optimizer1.minimize(context_cost)
5769
train_step2 = optimizer2.minimize(task_cost)
5870

71+
#if M.is_multi_task:
72+
# context_grads, _ = tf.clip_by_global_norm(tf.gradients(context_cost, context_vars), 10)
73+
# train_step1 = optimizer1.apply_gradients(zip(context_grads, context_vars))
74+
#task_grads, _ = tf.clip_by_global_norm(tf.gradients(task_cost, task_vars), 10)
75+
#train_step2 = optimizer2.apply_gradients(zip(task_grads, task_vars))
76+
5977
accuracy_list = np.zeros((M.n_epoch))
6078
# Start running operations on the graph
6179
sess = tf.Session()
@@ -72,8 +90,11 @@ def trainModel(M):
7290
for minibatch in range(n_train_batches):
7391
encoded_batch, batch_classes, batch_context_encoded, batch_context, batch_identifier, batch_text, batch_length = load_batch(n_classes, word2vec_dic, missing_word_dic, M.feature_length, M.max_length, data_path+"/Train/", 1, train_file, test_file, all_classes, start_idx, M.batch_size, M.secondary_task)
7492
start_idx += M.batch_size
75-
76-
feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr:(1-epoch*1.0/M.n_epoch)*M.lr, task_lr:epoch*1.0/M.n_epoch*M.lr}
93+
94+
if M.is_multi_task:
95+
feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr:(1-epoch*1.0/M.n_epoch)*M.lr, task_lr:epoch*1.0/M.n_epoch*M.lr}
96+
else:
97+
feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr: 0.0, task_lr:M.lr}
7798
if M.is_multi_task:
7899
train_step1.run(feed_dict=feed_dict)
79100
context_cost_val, _, _ = sess.run(fetches = [context_cost, task_cost, task_output], feed_dict=feed_dict)

automatedMTL/src/util/hps.py

Lines changed: 40 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,60 @@
11
from os.path import expanduser
22
import sys
33
import numpy
4+
import os
45
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..","model")))
56
from mcrnn_model_gen2 import model
6-
7+
from train_gen import trainModel as TM
78
#does hyperparameter search over some set of hyperparams.
89

910

10-
LR = [0.001, 0.0005, 0.0001] #3
11-
LR_MOD = [0.0, 0.1, 0.5, 1.0] #4
12-
N_EPOCHS = 30 # 30
13-
N_EXPERIMENTS = 5 # 5
11+
LR = [0.01,0.001,0.0001]
12+
LR_MOD = [1.0] #4
13+
N_EPOCHS = [50] # 30
14+
N_EXPERIMENTS = [10] # 5
1415

1516
#3*4*30*5/60=30 hrs.
1617

17-
for lr in LR:
18-
for lr_mod in LR_MOD:
19-
runExperiment(lr,lr_mod,N_EPOCHS,N_EXPERIMENTS)
2018

19+
def runExperiment(lr, lr_mod,n_epoch,n_experiments,f1):
20+
M= model()
2121

22-
def runExperiment(lr, lr_mod,n_epochs,n_experiments):
23-
M= model()
22+
print M.is_multi_task
23+
if lr_mod == 0.0:
24+
M.is_multi_task = False
25+
else:
26+
M.is_multi_task = True
27+
print M.is_multi_task
2428

25-
print M.lr
26-
M.lr = lr
27-
print M.lr
29+
print M.lr
30+
M.lr = lr
31+
print M.lr
2832

2933
print M.lr_mod
30-
M.lr_mod = lr_mod
31-
print M.lr_mod
34+
M.lr_mod = lr_mod
35+
print M.lr_mod
3236

33-
maxAccList = [];
34-
for i in range(n_experiments):
35-
accuracyVec = run_code()#INSERT CODE TO run for n epochs
36-
maxAcc = numpy.max(accuracyVec)
37-
maxAccList.append(maxAcc)
38-
expVal = numpy.mean(average)
39-
print "lr = " + str(lr) + " lr_mod = " + str(lr_mod) + " avg_acc = " + str(expVal)
37+
print M.n_epoch
38+
M.n_epoch = n_epoch
39+
print M.n_epoch
4040

41+
maxAccList = [];
42+
for i in range(n_experiments):
43+
accuracyVec = TM(M)#INSERT CODE TO run for n epochs
44+
maxAcc = numpy.max(accuracyVec)
45+
maxAccList.append(maxAcc)
46+
expVal = numpy.mean(maxAccList)
47+
string_result = "lr = " + str(lr) + " lr_mod = "+ "self-annealing" + " avg_acc = " + str(expVal)+'\n'
48+
f1.write(string_result)
49+
f1.flush()
50+
print string_result
4151

4252

53+
54+
f1 = open(expanduser('~/tweetnet/logs/hps_log_mrnn_bidir.log'),'w+')
55+
for lr in LR:
56+
for lr_mod in LR_MOD:
57+
for n_epoch in N_EPOCHS:
58+
for n_experiments in N_EXPERIMENTS:
59+
runExperiment(lr,lr_mod,n_epoch,n_experiments,f1)
60+
f1.close()

automatedMTL/src/util/hps_script.sh

Whitespace-only changes.

automatedMTL/src/util/tf_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ def fcLayer(x, in_shape, out_shape, activation, dropout, is_train, scope="fc"):
1717

1818
return out_op
1919

20+
def createGRUCell(batch_size, lstm_size):
21+
gru_cell = tf.contrib.rnn.GRUCell(num_units=lstm_size, activation=tf.tanh)
22+
state=gru_cell.zero_state(batch_size, tf.float32)
23+
24+
return gru_cell, state
25+
2026
def createLSTMCell(batch_size, lstm_size, n_layers, forget_bias):
2127

2228
lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=forget_bias)

0 commit comments

Comments
 (0)