davisliang
diff --git a/‎automatedMTL/src/model/mcrnn_model_gen2.py‎
Lines changed: 4 additions & 4 deletions b/‎automatedMTL/src/model/mcrnn_model_gen2.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎automatedMTL/src/model/train_gen.py‎
Lines changed: 25 additions & 4 deletions b/‎automatedMTL/src/model/train_gen.py‎
Lines changed: 25 additions & 4 deletions
diff --git a/‎automatedMTL/src/util/hps.py‎
Lines changed: 40 additions & 22 deletions b/‎automatedMTL/src/util/hps.py‎
Lines changed: 40 additions & 22 deletions
diff --git a/‎automatedMTL/src/util/hps_script.sh‎ b/‎automatedMTL/src/util/hps_script.sh‎
diff --git a/‎automatedMTL/src/util/tf_utils.py‎
Lines changed: 6 additions & 0 deletions b/‎automatedMTL/src/util/tf_utils.py‎
Lines changed: 6 additions & 0 deletions
@@ -22,9 +22,9 @@ class model(object):
 context_output_activation = "tanh"
 task_output_activation = "softmax"
 dropout = 0.0
-body_lstm_size = 128
-context_lstm_size = 128
-task_lstm_size = 128
+body_lstm_size = 512
+context_lstm_size = 512
+task_lstm_size = 512
 body_n_layer = 1
 context_n_layer = 1
 task_n_layer = 1
@@ -119,7 +119,7 @@ def buildModel(self, x, y_context, y_task, is_train, dropout, scope="multiTask")
  last_task_output = self.last_relevant(task_cell_output, self.length(task_cell_output))
  # feed the last output to the fc layer and make prediction
  task_fc_out = fcLayer(x=last_task_output, in_shape=self.task_lstm_size, out_shape=self.task_branch_fc, activation=self.fc_activation, dropout=self.dropout, is_train=is_train, scope="fc2")
- task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.context_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation)
+ task_output, task_logits = predictionLayer(x=task_fc_out, y=y_task, in_shape=self.task_branch_fc, out_shape=y_task.get_shape()[-1].value, activation=self.task_output_activation)
  print "Task output shape: ", task_output.get_shape()
  task_cost = compute_cost(logit=task_logits, y=y_task, out_type="last_only", max_length=self.max_length, batch_size=self.batch_size, embed_dim=self.n_classes,activation=self.task_output_activation)
 
 
@@ -30,8 +30,8 @@ def trainModel(M):
 
  # Reformat the data according to the secondary task
  # Create class look up table
- max_length = reformat_data(data_path, M.secondary_task == "missing word")
- class_look_up(data_path)
+ # max_length = reformat_data(data_path, M.secondary_task == "missing word")
+ # class_look_up(data_path)
 
  n_classes, word2vec_dic, n_test, n_train, missing_word_dic = get_data(data_path)
 
@@ -52,10 +52,28 @@ def trainModel(M):
  optimizer2 = tf.train.AdamOptimizer(learning_rate=task_lr)
 
  context_cost, task_cost, task_output, context_output = M.buildModel(x, y_context, y_task, is_train, keep_prob)
+ 
+ context_vars = []
+ task_vars = []
+ for var in tf.trainable_variables():
+if "context" not in var.name: task_vars.append(var)
+if "task" not in var.name: context_vars.append(var)
+ for var in context_vars:
+ print "Context variable: ", var.name
+ print ("\n")
+ for var in task_vars:
+print "Task variables: ", var.name
+ 
  if M.is_multi_task:
  train_step1 = optimizer1.minimize(context_cost)
  train_step2 = optimizer2.minimize(task_cost)
 
+ #if M.is_multi_task:
+ # context_grads, _ = tf.clip_by_global_norm(tf.gradients(context_cost, context_vars), 10)
+ # train_step1 = optimizer1.apply_gradients(zip(context_grads, context_vars))
+ #task_grads, _ = tf.clip_by_global_norm(tf.gradients(task_cost, task_vars), 10)
+ #train_step2 = optimizer2.apply_gradients(zip(task_grads, task_vars))
+
  accuracy_list = np.zeros((M.n_epoch))
  # Start running operations on the graph
  sess = tf.Session()
@@ -72,8 +90,11 @@ def trainModel(M):
  for minibatch in range(n_train_batches):
  encoded_batch, batch_classes, batch_context_encoded, batch_context, batch_identifier, batch_text, batch_length = load_batch(n_classes, word2vec_dic, missing_word_dic, M.feature_length, M.max_length, data_path+"/Train/", 1, train_file, test_file, all_classes, start_idx, M.batch_size, M.secondary_task)
  start_idx += M.batch_size
-
- feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr:(1-epoch*1.0/M.n_epoch)*M.lr, task_lr:epoch*1.0/M.n_epoch*M.lr}
+
+if M.is_multi_task:
+ feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr:(1-epoch*1.0/M.n_epoch)*M.lr, task_lr:epoch*1.0/M.n_epoch*M.lr}
+ else:
+ feed_dict = {x: encoded_batch, y_context: batch_context_encoded, y_task: batch_classes, is_train:1, keep_prob:0.5, context_lr: 0.0, task_lr:M.lr}
  if M.is_multi_task:
  train_step1.run(feed_dict=feed_dict)
  context_cost_val, _, _ = sess.run(fetches = [context_cost, task_cost, task_output], feed_dict=feed_dict)
 
@@ -1,42 +1,60 @@
 from os.path import expanduser
 import sys
 import numpy
+import os
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..","model")))
 from mcrnn_model_gen2 import model
-
+from train_gen import trainModel as TM
 #does hyperparameter search over some set of hyperparams.
 
 
-LR = [0.001, 0.0005, 0.0001] #3
-LR_MOD = [0.0, 0.1, 0.5, 1.0] #4
-N_EPOCHS = 30 # 30
-N_EXPERIMENTS = 5 # 5
+LR = [0.01,0.001,0.0001]
+LR_MOD = [1.0] #4
+N_EPOCHS = [50] # 30
+N_EXPERIMENTS = [10] # 5
 
 #3*4*30*5/60=30 hrs.
 
-for lr in LR:
-for lr_mod in LR_MOD:
-runExperiment(lr,lr_mod,N_EPOCHS,N_EXPERIMENTS)
 
+def runExperiment(lr, lr_mod,n_epoch,n_experiments,f1):
+ M= model()
 
-def runExperiment(lr, lr_mod,n_epochs,n_experiments):
-M= model()
+ print M.is_multi_task
+ if lr_mod == 0.0:
+ M.is_multi_task = False
+ else:
+ M.is_multi_task = True
+ print M.is_multi_task
 
-print M.lr
-M.lr = lr
-print M.lr
+ print M.lr
+ M.lr = lr
+ print M.lr
 
  print M.lr_mod
-M.lr_mod = lr_mod
-print M.lr_mod
+ M.lr_mod = lr_mod
+ print M.lr_mod
 
-maxAccList = [];
-for i in range(n_experiments):
-accuracyVec = run_code()#INSERT CODE TO run for n epochs
-maxAcc = numpy.max(accuracyVec)
-maxAccList.append(maxAcc)
-expVal = numpy.mean(average)
-print "lr = " + str(lr) + " lr_mod = " + str(lr_mod) + " avg_acc = " + str(expVal)
+ print M.n_epoch
+ M.n_epoch = n_epoch
+ print M.n_epoch
 
+ maxAccList = [];
+ for i in range(n_experiments):
+ accuracyVec = TM(M)#INSERT CODE TO run for n epochs
+ maxAcc = numpy.max(accuracyVec)
+ maxAccList.append(maxAcc)
+ expVal = numpy.mean(maxAccList)
+ string_result = "lr = " + str(lr) + " lr_mod = "+ "self-annealing" + " avg_acc = " + str(expVal)+'\n'
+ f1.write(string_result)
+ f1.flush()
+ print string_result
 
 
+
+f1 = open(expanduser('~/tweetnet/logs/hps_log_mrnn_bidir.log'),'w+') 
+for lr in LR:
+ for lr_mod in LR_MOD:
+ for n_epoch in N_EPOCHS:
+ for n_experiments in N_EXPERIMENTS:
+ runExperiment(lr,lr_mod,n_epoch,n_experiments,f1) 
+f1.close()
@@ -17,6 +17,12 @@ def fcLayer(x, in_shape, out_shape, activation, dropout, is_train, scope="fc"):
 
  return out_op
 
+def createGRUCell(batch_size, lstm_size):
+ gru_cell = tf.contrib.rnn.GRUCell(num_units=lstm_size, activation=tf.tanh)
+ state=gru_cell.zero_state(batch_size, tf.float32)
+
+ return gru_cell, state
+
 def createLSTMCell(batch_size, lstm_size, n_layers, forget_bias):
 
  lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size, forget_bias=forget_bias)