Skip to content

Commit ba8904e

Browse files
committed
update diffnet
1 parent 66535bf commit ba8904e

File tree

10 files changed

+869
-8
lines changed

10 files changed

+869
-8
lines changed

diffnet/class/DataModule.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
'''
2+
author: Peijie Sun
3+
e-mail: sun.hfut@gmail.com
4+
released date: 04/18/2019
5+
'''
6+
7+
from collections import defaultdict
8+
import numpy as np
9+
from time import time
10+
import random
11+
12+
class DataModule():
13+
def __init__(self, conf, filename):
14+
self.conf = conf
15+
self.data_dict = {}
16+
self.terminal_flag = 1
17+
self.filename = filename
18+
self.index = 0
19+
20+
########################################### Initalize Procedures ############################################
21+
def prepareModelSupplement(self, model):
22+
data_dict = {}
23+
if 'CONSUMED_ITEMS_SPARSE_MATRIX' in model.supply_set:
24+
self.generateConsumedItemsSparseMatrix()
25+
data_dict['CONSUMED_ITEMS_INDICES_INPUT'] = self.consumed_items_indices_list
26+
data_dict['CONSUMED_ITEMS_VALUES_INPUT'] = self.consumed_items_values_list
27+
if 'SOCIAL_NEIGHBORS_SPARSE_MATRIX' in model.supply_set:
28+
self.readSocialNeighbors()
29+
self.generateSocialNeighborsSparseMatrix()
30+
data_dict['SOCIAL_NEIGHBORS_INDICES_INPUT'] = self.social_neighbors_indices_list
31+
data_dict['SOCIAL_NEIGHBORS_VALUES_INPUT'] = self.social_neighbors_values_list
32+
return data_dict
33+
34+
def initializeRankingTrain(self):
35+
self.readData()
36+
self.arrangePositiveData()
37+
self.generateTrainNegative()
38+
39+
def initializeRankingVT(self):
40+
self.readData()
41+
self.arrangePositiveData()
42+
self.generateTrainNegative()
43+
44+
def initalizeRankingEva(self):
45+
self.readData()
46+
self.getEvaPositiveBatch()
47+
self.generateEvaNegative()
48+
49+
def linkedMap(self):
50+
self.data_dict['USER_LIST'] = self.user_list
51+
self.data_dict['ITEM_LIST'] = self.item_list
52+
self.data_dict['LABEL_LIST'] = self.labels_list
53+
54+
def linkedRankingEvaMap(self):
55+
self.data_dict['EVA_USER_LIST'] = self.eva_user_list
56+
self.data_dict['EVA_ITEM_LIST'] = self.eva_item_list
57+
58+
########################################### Ranking ############################################
59+
def readData(self):
60+
f = open(self.filename) ## May should be specific for different subtasks
61+
total_user_list = set()
62+
hash_data = defaultdict(int)
63+
for _, line in enumerate(f):
64+
arr = line.split("\t")
65+
hash_data[(int(arr[0]), int(arr[1]))] = 1
66+
total_user_list.add(int(arr[0]))
67+
self.total_user_list = list(total_user_list)
68+
self.hash_data = hash_data
69+
70+
def arrangePositiveData(self):
71+
positive_data = defaultdict(set)
72+
total_data = set()
73+
hash_data = self.hash_data
74+
for (u, i) in hash_data:
75+
total_data.add((u, i))
76+
positive_data[u].add(i)
77+
self.positive_data = positive_data
78+
self.total_data = len(total_data)
79+
80+
'''
81+
This function designes for the train/val/test negative generating section
82+
'''
83+
def generateTrainNegative(self):
84+
num_items = self.conf.num_items
85+
num_negatives = self.conf.num_negatives
86+
negative_data = defaultdict(set)
87+
total_data = set()
88+
hash_data = self.hash_data
89+
for (u, i) in hash_data:
90+
total_data.add((u, i))
91+
for _ in range(num_negatives):
92+
j = np.random.randint(num_items)
93+
while (u, j) in hash_data:
94+
j = np.random.randint(num_items)
95+
negative_data[u].add(j)
96+
total_data.add((u, j))
97+
self.negative_data = negative_data
98+
self.terminal_flag = 1
99+
100+
'''
101+
This function designes for the val/test section, compute loss
102+
'''
103+
def getVTRankingOneBatch(self):
104+
positive_data = self.positive_data
105+
negative_data = self.negative_data
106+
total_user_list = self.total_user_list
107+
user_list = []
108+
item_list = []
109+
labels_list = []
110+
for u in total_user_list:
111+
user_list.extend([u] * len(positive_data[u]))
112+
item_list.extend(positive_data[u])
113+
labels_list.extend([1] * len(positive_data[u]))
114+
user_list.extend([u] * len(negative_data[u]))
115+
item_list.extend(negative_data[u])
116+
labels_list.extend([0] * len(negative_data[u]))
117+
118+
self.user_list = np.reshape(user_list, [-1, 1])
119+
self.item_list = np.reshape(item_list, [-1, 1])
120+
self.labels_list = np.reshape(labels_list, [-1, 1])
121+
122+
'''
123+
This function designes for the training process
124+
'''
125+
def getTrainRankingBatch(self):
126+
positive_data = self.positive_data
127+
negative_data = self.negative_data
128+
total_user_list = self.total_user_list
129+
index = self.index
130+
batch_size = self.conf.training_batch_size
131+
132+
user_list, item_list, labels_list = [], [], []
133+
134+
if index + batch_size < len(total_user_list):
135+
target_user_list = total_user_list[index:index+batch_size]
136+
self.index = index + batch_size
137+
else:
138+
target_user_list = total_user_list[index:len(total_user_list)]
139+
self.index = 0
140+
self.terminal_flag = 0
141+
142+
for u in target_user_list:
143+
user_list.extend([u] * len(positive_data[u]))
144+
item_list.extend(list(positive_data[u]))
145+
labels_list.extend([1] * len(positive_data[u]))
146+
user_list.extend([u] * len(negative_data[u]))
147+
item_list.extend(list(negative_data[u]))
148+
labels_list.extend([0] * len(negative_data[u]))
149+
150+
self.user_list = np.reshape(user_list, [-1, 1])
151+
self.item_list = np.reshape(item_list, [-1, 1])
152+
self.labels_list = np.reshape(labels_list, [-1, 1])
153+
154+
'''
155+
This function designes for the positive data in rating evaluate section
156+
'''
157+
def getEvaPositiveBatch(self):
158+
hash_data = self.hash_data
159+
user_list = []
160+
item_list = []
161+
index_dict = defaultdict(list)
162+
index = 0
163+
for (u, i) in hash_data:
164+
user_list.append(u)
165+
item_list.append(i)
166+
index_dict[u].append(index)
167+
index = index + 1
168+
self.eva_user_list = np.reshape(user_list, [-1, 1])
169+
self.eva_item_list = np.reshape(item_list, [-1, 1])
170+
self.eva_index_dict = index_dict
171+
172+
'''
173+
This function designes for the negative data generation process in rating evaluate section
174+
'''
175+
def generateEvaNegative(self):
176+
hash_data = self.hash_data
177+
total_user_list = self.total_user_list
178+
num_evaluate = self.conf.num_evaluate
179+
num_items = self.conf.num_items
180+
eva_negative_data = defaultdict(list)
181+
for u in total_user_list:
182+
for _ in range(num_evaluate):
183+
j = np.random.randint(num_items)
184+
while (u, j) in hash_data:
185+
j = np.random.randint(num_items)
186+
eva_negative_data[u].append(j)
187+
self.eva_negative_data = eva_negative_data
188+
189+
'''
190+
This function designs for the rating evaluate section, generate negative batch
191+
'''
192+
def getEvaRankingBatch(self):
193+
batch_size = self.conf.evaluate_batch_size
194+
num_evaluate = self.conf.num_evaluate
195+
eva_negative_data = self.eva_negative_data
196+
total_user_list = self.total_user_list
197+
index = self.index
198+
terminal_flag = 1
199+
total_users = len(total_user_list)
200+
user_list = []
201+
item_list = []
202+
if index + batch_size < total_users:
203+
batch_user_list = total_user_list[index:index+batch_size]
204+
self.index = index + batch_size
205+
else:
206+
terminal_flag = 0
207+
batch_user_list = total_user_list[index:total_users]
208+
self.index = 0
209+
for u in batch_user_list:
210+
user_list.extend([u]*num_evaluate)
211+
item_list.extend(eva_negative_data[u])
212+
self.eva_user_list = np.reshape(user_list, [-1, 1])
213+
self.eva_item_list = np.reshape(item_list, [-1, 1])
214+
return batch_user_list, terminal_flag
215+
216+
##################################################### Supplement for Sparse Computation ############################################
217+
def readSocialNeighbors(self, friends_flag=1):
218+
social_neighbors = defaultdict(set)
219+
links_file = open(self.conf.links_filename)
220+
for _, line in enumerate(links_file):
221+
tmp = line.split('\t')
222+
u1, u2 = int(tmp[0]), int(tmp[1])
223+
social_neighbors[u1].add(u2)
224+
if friends_flag == 1:
225+
social_neighbors[u2].add(u1)
226+
self.social_neighbors = social_neighbors
227+
228+
'''
229+
Generate Social Neighbors Sparse Matrix Indices and Values
230+
'''
231+
def generateSocialNeighborsSparseMatrix(self):
232+
social_neighbors = self.social_neighbors
233+
social_neighbors_indices_list = []
234+
social_neighbors_values_list = []
235+
social_neighbors_dict = defaultdict(list)
236+
for u in social_neighbors:
237+
social_neighbors_dict[u] = sorted(social_neighbors[u])
238+
239+
user_list = sorted(list(social_neighbors.keys()))
240+
for user in user_list:
241+
for friend in social_neighbors_dict[user]:
242+
social_neighbors_indices_list.append([user, friend])
243+
social_neighbors_values_list.append(1.0/len(social_neighbors_dict[user]))
244+
self.social_neighbors_indices_list = np.array(social_neighbors_indices_list).astype(np.int64)
245+
self.social_neighbors_values_list = np.array(social_neighbors_values_list).astype(np.float32)
246+
247+
'''
248+
Generate Consumed Items Sparse Matrix Indices and Values
249+
'''
250+
def generateConsumedItemsSparseMatrix(self):
251+
positive_data = self.positive_data
252+
consumed_items_indices_list = []
253+
consumed_items_values_list = []
254+
consumed_items_dict = defaultdict(list)
255+
for u in positive_data:
256+
consumed_items_dict[u] = sorted(positive_data[u])
257+
user_list = sorted(list(positive_data.keys()))
258+
for u in user_list:
259+
for i in consumed_items_dict[u]:
260+
consumed_items_indices_list.append([u, i])
261+
consumed_items_values_list.append(1.0/len(consumed_items_dict[u]))
262+
self.consumed_items_indices_list = np.array(consumed_items_indices_list).astype(np.int64)
263+
self.consumed_items_values_list = np.array(consumed_items_values_list).astype(np.float32)

diffnet/class/DataUtil.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
'''
2+
author: Peijie Sun
3+
e-mail: sun.hfut@gmail.com
4+
released date: 04/18/2019
5+
'''
6+
7+
import os
8+
from time import time
9+
from DataModule import DataModule
10+
11+
class DataUtil():
12+
def __init__(self, conf):
13+
self.conf = conf
14+
#print('DataUtil, Line12, test- conf data_dir:%s' % self.conf.data_dir)
15+
16+
def initializeRankingHandle(self):
17+
#t0 = time()
18+
self.createTrainHandle()
19+
self.createEvaluateHandle()
20+
#t1 = time()
21+
#print('Prepare data cost:%.4fs' % (t1 - t0))
22+
23+
def createTrainHandle(self):
24+
data_dir = self.conf.data_dir
25+
train_filename = "%s/%s.train.rating" % (data_dir, self.conf.data_name)
26+
val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
27+
test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)
28+
29+
self.train = DataModule(self.conf, train_filename)
30+
self.val = DataModule(self.conf, val_filename)
31+
self.test = DataModule(self.conf, test_filename)
32+
33+
def createEvaluateHandle(self):
34+
data_dir = self.conf.data_dir
35+
val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
36+
test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)
37+
38+
self.val_eva = DataModule(self.conf, val_filename)
39+
self.test_eva = DataModule(self.conf, test_filename)

diffnet/class/Evaluate.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
'''
2+
author: Peijie Sun
3+
e-mail: sun.hfut@gmail.com
4+
released date: 04/18/2019
5+
'''
6+
7+
import math
8+
import numpy as np
9+
10+
class Evaluate():
11+
def __init__(self, conf):
12+
self.conf = conf
13+
14+
def getIdcg(self, length):
15+
idcg = 0.0
16+
for i in range(length):
17+
idcg = idcg + math.log(2) / math.log(i + 2)
18+
return idcg
19+
20+
def getDcg(self, value):
21+
dcg = math.log(2) / math.log(value + 2)
22+
return dcg
23+
24+
def getHr(self, value):
25+
hit = 1.0
26+
return hit
27+
28+
def evaluateRankingPerformance(self, evaluate_index_dict, evaluate_real_rating_matrix, \
29+
evaluate_predict_rating_matrix, topK, num_procs, exp_flag=0, sp_name=None, result_file=None):
30+
user_list = list(evaluate_index_dict.keys())
31+
batch_size = len(user_list) / num_procs
32+
33+
hr_list, ndcg_list = [], []
34+
index = 0
35+
for _ in range(num_procs):
36+
if index + batch_size < len(user_list):
37+
batch_user_list = user_list[index:index+batch_size]
38+
index = index + batch_size
39+
else:
40+
batch_user_list = user_list[index:len(user_list)]
41+
tmp_hr_list, tmp_ndcg_list = self.getHrNdcgProc(evaluate_index_dict, evaluate_real_rating_matrix, \
42+
evaluate_predict_rating_matrix, topK, batch_user_list)
43+
hr_list.extend(tmp_hr_list)
44+
ndcg_list.extend(tmp_ndcg_list)
45+
return np.mean(hr_list), np.mean(ndcg_list)
46+
47+
def getHrNdcgProc(self,
48+
evaluate_index_dict,
49+
evaluate_real_rating_matrix,
50+
evaluate_predict_rating_matrix,
51+
topK,
52+
user_list):
53+
54+
tmp_hr_list, tmp_ndcg_list = [], []
55+
56+
for u in user_list:
57+
real_item_index_list = evaluate_index_dict[u]
58+
real_item_rating_list = list(np.concatenate(evaluate_real_rating_matrix[real_item_index_list]))
59+
positive_length = len(real_item_rating_list)
60+
target_length = min(positive_length, topK)
61+
62+
predict_rating_list = evaluate_predict_rating_matrix[u]
63+
real_item_rating_list.extend(predict_rating_list)
64+
sort_index = np.argsort(real_item_rating_list)
65+
sort_index = sort_index[::-1]
66+
67+
user_hr_list = []
68+
user_ndcg_list = []
69+
hits_num = 0
70+
for idx in range(topK):
71+
ranking = sort_index[idx]
72+
if ranking < positive_length:
73+
hits_num += 1
74+
user_hr_list.append(self.getHr(idx))
75+
user_ndcg_list.append(self.getDcg(idx))
76+
77+
idcg = self.getIdcg(target_length)
78+
79+
tmp_hr = np.sum(user_hr_list) / target_length
80+
tmp_ndcg = np.sum(user_ndcg_list) / idcg
81+
tmp_hr_list.append(tmp_hr)
82+
tmp_ndcg_list.append(tmp_ndcg)
83+
84+
return tmp_hr_list, tmp_ndcg_list

0 commit comments

Comments
 (0)