Skip to content

Commit f9c20cf

Browse files
committed
finished verifying mtl code... kinda weird it gets up to 74% and rising performance
1 parent 9d6d62b commit f9c20cf

File tree

10 files changed

+271
-86
lines changed

10 files changed

+271
-86
lines changed

src/models/cascKeras.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from keras.layers import Input, Dense, Embedding, LSTM, merge
2+
from keras.models import Model
3+
4+
# this returns a tensor
5+
contextX, contexty, taskX, taskY = loadData();
6+
7+
text_input = Input(shape=(100,), dtype='float32', name='text_input')
8+
9+
lstm_body = lstm(32)(text_input)
10+
11+
lstm_context = lstm(32)(lstm_body)
12+
fc_context = Dense(256)(lstm_context)
13+
out_context = Dense(300)(fc_context)
14+
15+
lstm_task = lstm(32)(lstm_body)
16+
fc_task = Dense(256)(lstm_task)
17+
fc_out = Dense(300)(fc_task)

src/models/mtlKeras.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from keras.layers import Input, Dense, Embedding, LSTM, merge
2+
from keras.models import Model
3+
4+
# this returns a tensor
5+
contextX, contexty, taskX, taskY = loadData();
6+
7+
text_input = Input(shape=(100,), dtype='float32', name='text_input')
8+
9+
lstm_body = lstm(32)(text_input)
10+
11+
lstm_context = lstm(32)(lstm_body)
12+
fc_context = Dense(256)(lstm_context)
13+
out_context = Dense(300)(fc_context)
14+
15+
lstm_task = lstm(32)(lstm_body)
16+
fc_task = Dense(256)(lstm_task)
17+
fc_out = Dense(300)(fc_task)
18+
19+

src/models/multitasking.py

Lines changed: 160 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,61 @@
11
import tensorflow as tf
2-
from tensorflow.models.rnn import rnn
3-
from tensorflow.models.rnn.rnn_cell import BasicLSTMCell, LSTMCell
42
import numpy as np
3+
import os
4+
import cPickle as pickle
5+
from os.path import expanduser
6+
import sys
57

6-
def buildModel(x, y_context, y_task, is_train, scope="lstmLayer"):
8+
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..","utils")))
9+
from tf_utils import fcLayer, createLSTMCell, applyActivation, predictionLayer
10+
from predContext import predContext, createHtDict
11+
12+
# Model params
13+
# 0 -- shared; 1 -- context; 2 -- task
14+
fc_activation = "tanh"
15+
output_activation = "tanh"
16+
dropout = 0.0
17+
lstm_size_0 = 512
18+
lstm_size_1 = 512
19+
lstm_size_2 = 512
20+
n_layer_0 = 1
21+
n_layer_1 = 1
22+
n_layer_2 = 1
23+
branch1_fc = 512
24+
branch2_fc = 512
25+
26+
# Data params
27+
train_data_path = "~/tweetnet/data/train_data.pkl"
28+
test_data_path = "~/tweetnet/data/test_data.pkl"
29+
batch_size = 128
30+
n_steps = 40
31+
feature_length = 66
32+
context_dim = 300
33+
task_dim = 300
34+
35+
# Hyper- params
36+
lr = 0.0001
37+
n_epoch = 500
38+
topN = 4
39+
40+
def buildModel(x, y_context, y_task, is_train, scope="multiTask"):
741

842
# Assume the input shape is (batch_size, n_steps, feature_length)
943

1044
# Permuting batch_size and n_steps
1145
x = tf.transpose(x, [1, 0, 2])
46+
print x.get_shape()
1247
# Reshaping to (n_steps*batch_size, feature_length)
1348
x = tf.reshape(x, [-1, feature_length])
1449
# Split to get a list of "n_steps" tensors of shape (batch_size, feature_length)
50+
print x.get_shape()
1551
x = tf.split(x, n_steps, 0)
1652

1753
# Create lstm cell for the shared layer
18-
lstm_cell_0, state_0 = createLSTMCell(batch_size, lstm_size_0, n_layer_0, forget_bias)
54+
lstm_cell_0, state_0 = createLSTMCell(batch_size, lstm_size_0, n_layer_0, forget_bias=0.0)
1955
# Create lstm cell for branch 1
20-
lstm_cell_1, state_1 = createLSTMCell(batch_size, lstm_size_1, n_layer_1, forget_bias)
56+
lstm_cell_1, state_1 = createLSTMCell(batch_size, lstm_size_1, n_layer_1, forget_bias=0.0)
2157
# Create lstm cells for branch 2
22-
lstm_cell_2, state_2 = createLSTMCell(batch_size, lstm_size_2, n_layer_2, forget_bias)
58+
lstm_cell_2, state_2 = createLSTMCell(batch_size, lstm_size_2, n_layer_2, forget_bias=0.0)
2359

2460
combined_cost = tf.constant(0)
2561
cost1 = tf.constant(0)
@@ -29,64 +65,144 @@ def buildModel(x, y_context, y_task, is_train, scope="lstmLayer"):
2965
with tf.variable_scope("SharedLSTM"):
3066
if time_step > 0:
3167
tf.get_variable_scope().reuse_variables()
32-
(cell_output_0, state_0) = lstm_cell_0(x[i], state_0)
68+
(cell_output_0, state_0) = lstm_cell_0(x[time_step], state_0)
3369

34-
with tf.variable_scope("Branch_context"):
35-
if time_step > 0:
36-
tf.get_variable_scope().reuse_variables()
37-
(cell_output_1, state_1) = lstm_cell_1(cell_output_0, state_1)
38-
if time_step == n_step - 1:
39-
fc_out1 = fcLayer(x=cell_output_1, in_shape=lstm_size_1, out_shape=branch1_hidden, activation=fc_activation, dropout=dropout, is_train, scope="fc1")
40-
cost1 = predictionLayer(x=fc_out1, y=y_context, in_shape=lstm_size_1, out_shape=y_context.get_shape[-1].value, activation=output_activation)
70+
#with tf.variable_scope("Branch_context"):
71+
# if time_step > 0:
72+
# tf.get_variable_scope().reuse_variables()
73+
# (cell_output_1, state_1) = lstm_cell_1(cell_output_0, state_1)
74+
with tf.variable_scope("Branch_context_fc"):
75+
if time_step == n_steps - 1:
76+
fc_out1 = fcLayer(x=cell_output_0, in_shape=lstm_size_0, out_shape=branch1_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc1")
77+
cost1, output1 = predictionLayer(x=fc_out1, y=y_context, in_shape=branch1_fc, out_shape=y_context.get_shape()[-1].value, activation=output_activation)
4178

42-
with tf.variable_scope("Branch_task"):
43-
if time_step > 0:
44-
tf.get_variable_scope().reuse_variables()
45-
(cell_output_2, state_2) = lstm_cell_2(cell_output_0, state_2)
46-
if time_step == n_step - 1:
47-
fc_out2 = fcLayer(x=cell_output_2, in_shape=lstm_size_2, out_shape=branch2_hidden, activation=fc_activation, dropout=dropout, is_train, scope="fc2")
48-
cost2 = predictionLayer(x=fc_out2, y=y_task, in_shape=lstm_size_2, out_shape=y_task.get_shape[-1].value, activation=output_activation)
79+
#with tf.variable_scope("Branch_task"):
80+
# if time_step > 0:
81+
# tf.get_variable_scope().reuse_variables()
82+
# (cell_output_2, state_2) = lstm_cell_2(cell_output_0, state_2)
83+
with tf.variable_scope("Branch_task_fc"):
84+
if time_step == n_steps - 1:
85+
fc_out2 = fcLayer(x=cell_output_0, in_shape=lstm_size_0, out_shape=branch2_fc, activation=fc_activation, dropout=dropout, is_train=is_train, scope="fc2")
86+
cost2, output2 = predictionLayer(x=fc_out2, y=y_task, in_shape=branch1_fc, out_shape=y_task.get_shape()[-1].value, activation=output_activation)
4987

5088
combined_cost = cost1 + cost2
51-
return combined_cost, cost1, cost2
89+
90+
return combined_cost, cost1, cost2, output2, output1
91+
92+
93+
94+
def trainModel(train_path = train_data_path, test_path = test_data_path):
95+
96+
# Load data as np arrays
97+
train_data = pickle.load(open(expanduser(train_path)))
98+
trainX, trainY_task, trainY_context = train_data[0], train_data[1], train_data[2]
99+
100+
test_data = pickle.load(open(expanduser(test_path)))
101+
testX, testY_task, testY_context = test_data[0], test_data[1], test_data[2]
102+
103+
htDic, testTweets, testHashtags, testMw, testTweetSequence, testHashtagSequence, testMwSequence, testStartIdx = prepareForTest()
52104

53-
def trainModel(dataset_path = dataset_path):
54105

106+
# place holder for X and Y
55107
x = tf.placeholder(tf.float32, shape=(batch_size, n_steps, feature_length))
56108
y_context = tf.placeholder(tf.float32, shape=(batch_size, context_dim))
57109
y_task = tf.placeholder(tf.float32, shape=(batch_size, task_dim))
58110

111+
# Setting up training variables
59112
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
60113
is_train = tf.placeholder(tf.int32)
61-
62-
total_cost, cost1, cost2 = buildModel(x, y, is_train)
63-
train_step = optimizer.minimize(total_cost)
114+
n_batches = np.ceil(len(trainX) / batch_size).astype(int)
115+
116+
# Build model and apply optimizer
117+
total_cost, cost1, cost2, output2, output1 = buildModel(x, y_context, y_task, is_train)
118+
119+
# Minimize losses
120+
train_step1 = optimizer.minimize(cost1)
121+
train_step2 = optimizer.minimize(cost2)
64122

65123
# Start running operations on the graph
66124
sess = tf.Session()
67125
sess.run(tf.initialize_all_variables())
126+
127+
trainables = tf.trainable_variables()
128+
for var in trainables:
129+
print var.name
130+
68131
with sess.as_default():
69132
for epoch in range(n_epoch):
133+
taskCost = 0
134+
contextCost = 0
135+
epochTask = 0
136+
epochContext = 0
137+
for batch in range(n_batches):
138+
startIdx = batch*batch_size
139+
train_x = trainX[startIdx : startIdx+batch_size, :, :]
140+
train_y_context = trainY_context[startIdx : startIdx+batch_size, :]
141+
train_y_task = trainY_task[startIdx : startIdx+batch_size, :]
142+
143+
feed_dict = {x: train_x, y_context: train_y_context, y_task: train_y_task, is_train: 1}
144+
145+
train_step2.run(feed_dict=feed_dict)
146+
combined_cost, _, cost_task, taskOutput = sess.run(fetches = [total_cost, cost1, cost2, output2], feed_dict=feed_dict)
147+
taskCost += cost_task
70148

149+
train_step1.run(feed_dict=feed_dict)
150+
combined_cost, cost_context, _, taskOutput = sess.run(fetches = [total_cost, cost1, cost2, output2], feed_dict=feed_dict)
151+
contextCost += cost_context
152+
epochTask += cost_task
153+
epochContext += cost_context
154+
155+
if batch !=0 and batch % 100 == 0:
156+
print "Minibatch ", batch, " Missing Word: ", contextCost / 100, " Hashtag: ", taskCost / 100
157+
contextCost = 0
158+
taskCost = 0
159+
print "Epoch ", epoch, "Missing Word: ", epochContext / n_batches, " Hashtag: ", epochTask / n_batches
71160

72161

73-
# Model params
74-
fc_activation = "relu"
75-
output_activation = "tanh"
76-
dropout = 0.9
77-
lstm_size_0 = 512
78-
lstm_size_1 = 512
79-
lstm_size_2 = 512
80-
branch1_fc = 512
81-
branch2_fc = 512
162+
# At the end of each epoch, run a forward pass of all testing data
82163

83-
# Data params
84-
batch_size = 128
85-
n_step = 40
86-
feature_length = 66
87-
context_dim = 300
88-
task_dim = 300
164+
#tweetStartIdx = 0
165+
tweetCnt = 0
166+
correctCnt = 0
167+
n_test_batches = np.ceil(len(testTweetSequence) / batch_size).astype(int)
89168

90-
# Hyper- params
91-
lr = 0.001
92-
n_epoch = 500
169+
for batch in range(n_test_batches):
170+
startIdx = batch*batch_size
171+
test_x = testX[startIdx : startIdx+batch_size, :, :]
172+
test_y_context = testY_context[startIdx : startIdx+batch_size, :]
173+
test_y_task = testY_task[startIdx : startIdx+batch_size, :]
174+
175+
feed_dict = {x: test_x, y_context: test_y_context, y_task: test_y_task}
176+
177+
combined_cost, cost_context, cost_task, taskOutput = sess.run(fetches = [total_cost, cost1, cost2, output2], feed_dict=feed_dict)
178+
print taskOutput.shape
179+
for i in range(batch_size):
180+
if testTweetSequence[startIdx+i][-1] == chr(3):
181+
topNht, isCorrect, topNdist = predContext(htDic, np.reshape(taskOutput[i,:], [1,task_dim]), topN, testHashtags[tweetCnt])
182+
#tweetStartIdx = testIdx + 1
183+
if isCorrect: correctCnt += 1
184+
185+
print "Tweet: ", testTweets[tweetCnt]
186+
print "True label is: ", testHashtags[tweetCnt]
187+
print "Predicted labels are: ", topNht
188+
189+
tweetCnt += 1
190+
191+
accuracy = correctCnt * 1.0 / len(testTweets)
192+
print "Testing accuracy is: ", accuracy
193+
194+
195+
196+
def prepareForTest(dataset_path="~/tweetnet/data/text_data.pkl"):
197+
198+
text_data = pickle.load(open(expanduser(dataset_path)))
199+
testTweets, testHashtags, testMw, testTweetSequence, testHashtagSequence, testMwSequence, testStartIdx = text_data[0],text_data[1],text_data[2],text_data[3],text_data[4],text_data[5],text_data[6]
200+
201+
dictionary = pickle.load(open(expanduser("~/tweetnet/data/word2vec_dict.pkl")))
202+
htDic = createHtDict(dictionary, testHashtags)
203+
return htDic, testTweets, testHashtags, testMw, testTweetSequence, testHashtagSequence, testMwSequence, testStartIdx
204+
205+
206+
207+
208+
trainModel(train_path = train_data_path, test_path=test_data_path)

src/models/textToContext.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from keras.layers.wrappers import Bidirectional
2525
from keras.optimizers import RMSprop
2626
from keras.optimizers import Adadelta
27-
from keras.optimizers import Adagrad
27+
from keras.optimizers import Adam
2828
from keras.layers import Dropout
2929
from keras.layers import BatchNormalization
3030
from tweetGenerator_lstm import generateText
@@ -36,7 +36,7 @@
3636
#get the top N prediction of hashtags
3737
topN = 4
3838
#sequenceLength: sequence length (k in BPTTk)
39-
sequenceLength = 30
39+
sequenceLength = 40
4040
#Number of symbols
4141
vocabLen = 66
4242
#train test split
@@ -92,18 +92,20 @@
9292

9393
model.add(LSTM(numHiddenFirst, input_shape=(sequenceLength, inputSize)))
9494

95-
model.add(BatchNormalization())
95+
#model.add(BatchNormalization())
9696

9797
model.add(Dense(numHiddenFirst))
98-
model.add(PReLU())
99-
model.add(BatchNormalization())
98+
model.add(Activation('relu'))
99+
#model.add(PReLU())
100+
#model.add(BatchNormalization())
100101

101102
model.add(Dense(outputSize))
102-
model.add(PReLU())
103-
model.add(BatchNormalization())
104-
105-
optimizer = RMSprop(lr=0.005)
103+
model.add(Activation('tanh'))
104+
#model.add(PReLU())
105+
#model.add(BatchNormalization())
106106

107+
#optimizer = RMSprop(lr=0.005)
108+
optimizer = Adam(lr=0.0001)
107109
model.compile(loss='mean_squared_error', optimizer=optimizer)
108110
print("Finished building model.")
109111

src/utils/checkTrainTestDup.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import cPickle as pickle
2+
import numpy as np
3+
import os
4+
from os.path import expanduser
5+
6+
test_data = pickle.load(open(expanduser("~/tweetnet/data/test_data.pkl")))
7+
train_data = pickle.load(open(expanduser("~/tweetnet/data/train_data.pkl")))
8+
9+
testX = test_data[0]
10+
trainX = train_data[0]
11+
12+
idx = len(test_data)*np.random.rand(2000)
13+
14+
n = 0
15+
cnt = 0
16+
for i in idx:
17+
print n
18+
n += 1
19+
test_x = testX[int(i), :, :]
20+
for j in range(len(trainX)):
21+
if np.array_equal(test_x, trainX[j, :, :]):
22+
cnt += 1
23+
print "Dup"
24+
break
25+
print cnt

0 commit comments

Comments
 (0)