11import tensorflow as tf
2- from tensorflow .models .rnn import rnn
3- from tensorflow .models .rnn .rnn_cell import BasicLSTMCell , LSTMCell
42import numpy as np
3+ import os
4+ import cPickle as pickle
5+ from os .path import expanduser
6+ import sys
57
6- def buildModel (x , y_context , y_task , is_train , scope = "lstmLayer" ):
8+ sys .path .append (os .path .abspath (os .path .join (os .path .dirname (__file__ ), ".." ,"utils" )))
9+ from tf_utils import fcLayer , createLSTMCell , applyActivation , predictionLayer
10+ from predContext import predContext , createHtDict
11+
12+ # Model params
13+ # 0 -- shared; 1 -- context; 2 -- task
14+ fc_activation = "tanh"
15+ output_activation = "tanh"
16+ dropout = 0.0
17+ lstm_size_0 = 512
18+ lstm_size_1 = 512
19+ lstm_size_2 = 512
20+ n_layer_0 = 1
21+ n_layer_1 = 1
22+ n_layer_2 = 1
23+ branch1_fc = 512
24+ branch2_fc = 512
25+
26+ # Data params
27+ train_data_path = "~/tweetnet/data/train_data.pkl"
28+ test_data_path = "~/tweetnet/data/test_data.pkl"
29+ batch_size = 128
30+ n_steps = 40
31+ feature_length = 66
32+ context_dim = 300
33+ task_dim = 300
34+
35+ # Hyper- params
36+ lr = 0.0001
37+ n_epoch = 500
38+ topN = 4
39+
40+ def buildModel (x , y_context , y_task , is_train , scope = "multiTask" ):
741
842 # Assume the input shape is (batch_size, n_steps, feature_length)
943
1044 # Permuting batch_size and n_steps
1145 x = tf .transpose (x , [1 , 0 , 2 ])
46+ print x .get_shape ()
1247 # Reshaping to (n_steps*batch_size, feature_length)
1348 x = tf .reshape (x , [- 1 , feature_length ])
1449 # Split to get a list of "n_steps" tensors of shape (batch_size, feature_length)
50+ print x .get_shape ()
1551 x = tf .split (x , n_steps , 0 )
1652
1753 # Create lstm cell for the shared layer
18- lstm_cell_0 , state_0 = createLSTMCell (batch_size , lstm_size_0 , n_layer_0 , forget_bias )
54+ lstm_cell_0 , state_0 = createLSTMCell (batch_size , lstm_size_0 , n_layer_0 , forget_bias = 0.0 )
1955 # Create lstm cell for branch 1
20- lstm_cell_1 , state_1 = createLSTMCell (batch_size , lstm_size_1 , n_layer_1 , forget_bias )
56+ lstm_cell_1 , state_1 = createLSTMCell (batch_size , lstm_size_1 , n_layer_1 , forget_bias = 0.0 )
2157 # Create lstm cells for branch 2
22- lstm_cell_2 , state_2 = createLSTMCell (batch_size , lstm_size_2 , n_layer_2 , forget_bias )
58+ lstm_cell_2 , state_2 = createLSTMCell (batch_size , lstm_size_2 , n_layer_2 , forget_bias = 0.0 )
2359
2460 combined_cost = tf .constant (0 )
2561 cost1 = tf .constant (0 )
@@ -29,64 +65,144 @@ def buildModel(x, y_context, y_task, is_train, scope="lstmLayer"):
2965 with tf .variable_scope ("SharedLSTM" ):
3066 if time_step > 0 :
3167 tf .get_variable_scope ().reuse_variables ()
32- (cell_output_0 , state_0 ) = lstm_cell_0 (x [i ], state_0 )
68+ (cell_output_0 , state_0 ) = lstm_cell_0 (x [time_step ], state_0 )
3369
34- with tf .variable_scope ("Branch_context" ):
35- if time_step > 0 :
36- tf .get_variable_scope ().reuse_variables ()
37- (cell_output_1 , state_1 ) = lstm_cell_1 (cell_output_0 , state_1 )
38- if time_step == n_step - 1 :
39- fc_out1 = fcLayer (x = cell_output_1 , in_shape = lstm_size_1 , out_shape = branch1_hidden , activation = fc_activation , dropout = dropout , is_train , scope = "fc1" )
40- cost1 = predictionLayer (x = fc_out1 , y = y_context , in_shape = lstm_size_1 , out_shape = y_context .get_shape [- 1 ].value , activation = output_activation )
70+ #with tf.variable_scope("Branch_context"):
71+ # if time_step > 0:
72+ # tf.get_variable_scope().reuse_variables()
73+ # (cell_output_1, state_1) = lstm_cell_1(cell_output_0, state_1)
74+ with tf .variable_scope ("Branch_context_fc" ):
75+ if time_step == n_steps - 1 :
76+ fc_out1 = fcLayer (x = cell_output_0 , in_shape = lstm_size_0 , out_shape = branch1_fc , activation = fc_activation , dropout = dropout , is_train = is_train , scope = "fc1" )
77+ cost1 , output1 = predictionLayer (x = fc_out1 , y = y_context , in_shape = branch1_fc , out_shape = y_context .get_shape ()[- 1 ].value , activation = output_activation )
4178
42- with tf .variable_scope ("Branch_task" ):
43- if time_step > 0 :
44- tf .get_variable_scope ().reuse_variables ()
45- (cell_output_2 , state_2 ) = lstm_cell_2 (cell_output_0 , state_2 )
46- if time_step == n_step - 1 :
47- fc_out2 = fcLayer (x = cell_output_2 , in_shape = lstm_size_2 , out_shape = branch2_hidden , activation = fc_activation , dropout = dropout , is_train , scope = "fc2" )
48- cost2 = predictionLayer (x = fc_out2 , y = y_task , in_shape = lstm_size_2 , out_shape = y_task .get_shape [- 1 ].value , activation = output_activation )
79+ #with tf.variable_scope("Branch_task"):
80+ # if time_step > 0:
81+ # tf.get_variable_scope().reuse_variables()
82+ # (cell_output_2, state_2) = lstm_cell_2(cell_output_0, state_2)
83+ with tf .variable_scope ("Branch_task_fc" ):
84+ if time_step == n_steps - 1 :
85+ fc_out2 = fcLayer (x = cell_output_0 , in_shape = lstm_size_0 , out_shape = branch2_fc , activation = fc_activation , dropout = dropout , is_train = is_train , scope = "fc2" )
86+ cost2 , output2 = predictionLayer (x = fc_out2 , y = y_task , in_shape = branch1_fc , out_shape = y_task .get_shape ()[- 1 ].value , activation = output_activation )
4987
5088 combined_cost = cost1 + cost2
51- return combined_cost , cost1 , cost2
89+
90+ return combined_cost , cost1 , cost2 , output2 , output1
91+
92+
93+
94+ def trainModel (train_path = train_data_path , test_path = test_data_path ):
95+
96+ # Load data as np arrays
97+ train_data = pickle .load (open (expanduser (train_path )))
98+ trainX , trainY_task , trainY_context = train_data [0 ], train_data [1 ], train_data [2 ]
99+
100+ test_data = pickle .load (open (expanduser (test_path )))
101+ testX , testY_task , testY_context = test_data [0 ], test_data [1 ], test_data [2 ]
102+
103+ htDic , testTweets , testHashtags , testMw , testTweetSequence , testHashtagSequence , testMwSequence , testStartIdx = prepareForTest ()
52104
53- def trainModel (dataset_path = dataset_path ):
54105
106+ # place holder for X and Y
55107 x = tf .placeholder (tf .float32 , shape = (batch_size , n_steps , feature_length ))
56108 y_context = tf .placeholder (tf .float32 , shape = (batch_size , context_dim ))
57109 y_task = tf .placeholder (tf .float32 , shape = (batch_size , task_dim ))
58110
111+ # Setting up training variables
59112 optimizer = tf .train .AdamOptimizer (learning_rate = lr )
60113 is_train = tf .placeholder (tf .int32 )
61-
62- total_cost , cost1 , cost2 = buildModel (x , y , is_train )
63- train_step = optimizer .minimize (total_cost )
114+ n_batches = np .ceil (len (trainX ) / batch_size ).astype (int )
115+
116+ # Build model and apply optimizer
117+ total_cost , cost1 , cost2 , output2 , output1 = buildModel (x , y_context , y_task , is_train )
118+
119+ # Minimize losses
120+ train_step1 = optimizer .minimize (cost1 )
121+ train_step2 = optimizer .minimize (cost2 )
64122
65123 # Start running operations on the graph
66124 sess = tf .Session ()
67125 sess .run (tf .initialize_all_variables ())
126+
127+ trainables = tf .trainable_variables ()
128+ for var in trainables :
129+ print var .name
130+
68131 with sess .as_default ():
69132 for epoch in range (n_epoch ):
133+ taskCost = 0
134+ contextCost = 0
135+ epochTask = 0
136+ epochContext = 0
137+ for batch in range (n_batches ):
138+ startIdx = batch * batch_size
139+ train_x = trainX [startIdx : startIdx + batch_size , :, :]
140+ train_y_context = trainY_context [startIdx : startIdx + batch_size , :]
141+ train_y_task = trainY_task [startIdx : startIdx + batch_size , :]
142+
143+ feed_dict = {x : train_x , y_context : train_y_context , y_task : train_y_task , is_train : 1 }
144+
145+ train_step2 .run (feed_dict = feed_dict )
146+ combined_cost , _ , cost_task , taskOutput = sess .run (fetches = [total_cost , cost1 , cost2 , output2 ], feed_dict = feed_dict )
147+ taskCost += cost_task
70148
149+ train_step1 .run (feed_dict = feed_dict )
150+ combined_cost , cost_context , _ , taskOutput = sess .run (fetches = [total_cost , cost1 , cost2 , output2 ], feed_dict = feed_dict )
151+ contextCost += cost_context
152+ epochTask += cost_task
153+ epochContext += cost_context
154+
155+ if batch != 0 and batch % 100 == 0 :
156+ print "Minibatch " , batch , " Missing Word: " , contextCost / 100 , " Hashtag: " , taskCost / 100
157+ contextCost = 0
158+ taskCost = 0
159+ print "Epoch " , epoch , "Missing Word: " , epochContext / n_batches , " Hashtag: " , epochTask / n_batches
71160
72161
73- # Model params
74- fc_activation = "relu"
75- output_activation = "tanh"
76- dropout = 0.9
77- lstm_size_0 = 512
78- lstm_size_1 = 512
79- lstm_size_2 = 512
80- branch1_fc = 512
81- branch2_fc = 512
162+ # At the end of each epoch, run a forward pass of all testing data
82163
83- # Data params
84- batch_size = 128
85- n_step = 40
86- feature_length = 66
87- context_dim = 300
88- task_dim = 300
164+ #tweetStartIdx = 0
165+ tweetCnt = 0
166+ correctCnt = 0
167+ n_test_batches = np .ceil (len (testTweetSequence ) / batch_size ).astype (int )
89168
90- # Hyper- params
91- lr = 0.001
92- n_epoch = 500
169+ for batch in range (n_test_batches ):
170+ startIdx = batch * batch_size
171+ test_x = testX [startIdx : startIdx + batch_size , :, :]
172+ test_y_context = testY_context [startIdx : startIdx + batch_size , :]
173+ test_y_task = testY_task [startIdx : startIdx + batch_size , :]
174+
175+ feed_dict = {x : test_x , y_context : test_y_context , y_task : test_y_task }
176+
177+ combined_cost , cost_context , cost_task , taskOutput = sess .run (fetches = [total_cost , cost1 , cost2 , output2 ], feed_dict = feed_dict )
178+ print taskOutput .shape
179+ for i in range (batch_size ):
180+ if testTweetSequence [startIdx + i ][- 1 ] == chr (3 ):
181+ topNht , isCorrect , topNdist = predContext (htDic , np .reshape (taskOutput [i ,:], [1 ,task_dim ]), topN , testHashtags [tweetCnt ])
182+ #tweetStartIdx = testIdx + 1
183+ if isCorrect : correctCnt += 1
184+
185+ print "Tweet: " , testTweets [tweetCnt ]
186+ print "True label is: " , testHashtags [tweetCnt ]
187+ print "Predicted labels are: " , topNht
188+
189+ tweetCnt += 1
190+
191+ accuracy = correctCnt * 1.0 / len (testTweets )
192+ print "Testing accuracy is: " , accuracy
193+
194+
195+
196+ def prepareForTest (dataset_path = "~/tweetnet/data/text_data.pkl" ):
197+
198+ text_data = pickle .load (open (expanduser (dataset_path )))
199+ testTweets , testHashtags , testMw , testTweetSequence , testHashtagSequence , testMwSequence , testStartIdx = text_data [0 ],text_data [1 ],text_data [2 ],text_data [3 ],text_data [4 ],text_data [5 ],text_data [6 ]
200+
201+ dictionary = pickle .load (open (expanduser ("~/tweetnet/data/word2vec_dict.pkl" )))
202+ htDic = createHtDict (dictionary , testHashtags )
203+ return htDic , testTweets , testHashtags , testMw , testTweetSequence , testHashtagSequence , testMwSequence , testStartIdx
204+
205+
206+
207+
208+ trainModel (train_path = train_data_path , test_path = test_data_path )
0 commit comments