Skip to content

Commit 5c30f64

Browse files
authored
Merge pull request #16 from lijunweiyhn/master
DiffNet++ pull request
2 parents 3fe67ff + c039510 commit 5c30f64

18 files changed

+1824
-0
lines changed

.DS_Store

8 KB
Binary file not shown.

Diffnet++/.DS_Store

14 KB
Binary file not shown.

Diffnet++/.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python: 2.7

Diffnet++/class/DataModule.py

Lines changed: 466 additions & 0 deletions
Large diffs are not rendered by default.

Diffnet++/class/DataUtil.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import os
2+
from time import time
3+
from DataModule import DataModule
4+
5+
class DataUtil():
6+
def __init__(self, conf):
7+
self.conf = conf
8+
#print('DataUtil, Line12, test- conf data_dir:%s' % self.conf.data_dir)
9+
10+
def initializeRankingHandle(self):
11+
#t0 = time()
12+
self.createTrainHandle()
13+
self.createEvaluateHandle()
14+
#t1 = time()
15+
#print('Prepare data cost:%.4fs' % (t1 - t0))
16+
17+
def createTrainHandle(self):
18+
data_dir = self.conf.data_dir
19+
train_filename = "%s/%s.train.rating" % (data_dir, self.conf.data_name)
20+
val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
21+
test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)
22+
23+
self.train = DataModule(self.conf, train_filename)
24+
self.val = DataModule(self.conf, val_filename)
25+
self.test = DataModule(self.conf, test_filename)
26+
27+
def createEvaluateHandle(self):
28+
data_dir = self.conf.data_dir
29+
val_filename = "%s/%s.val.rating" % (data_dir, self.conf.data_name)
30+
test_filename = "%s/%s.test.rating" % (data_dir, self.conf.data_name)
31+
32+
self.val_eva = DataModule(self.conf, val_filename)
33+
self.test_eva = DataModule(self.conf, test_filename)

Diffnet++/class/Evaluate.py

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
import math
2+
import numpy as np
3+
4+
5+
class Evaluate():
6+
def __init__(self, conf):
7+
self.conf = conf
8+
9+
def getIdcg(self, length):
10+
idcg = 0.0
11+
for i in range(length):
12+
idcg = idcg + math.log(2) / math.log(i + 2)
13+
return idcg
14+
15+
def getDcg(self, value):
16+
dcg = math.log(2) / math.log(value + 2)
17+
return dcg
18+
19+
def getHr(self, value):
20+
hit = 1.0
21+
return hit
22+
23+
def evaluateRankingPerformance(self, evaluate_index_dict, evaluate_real_rating_matrix, \
24+
evaluate_predict_rating_matrix, topK, num_procs, exp_flag=0, sp_name=None, result_file=None):
25+
user_list = list(evaluate_index_dict.keys())
26+
batch_size = len(user_list) / num_procs
27+
28+
hr_list, ndcg_list = [], []
29+
index = 0
30+
for _ in range(num_procs):
31+
if index + batch_size < len(user_list):
32+
batch_user_list = user_list[index:index+batch_size]
33+
index = index + batch_size
34+
else:
35+
batch_user_list = user_list[index:len(user_list)]
36+
tmp_hr_list, tmp_ndcg_list = self.getHrNdcgProc(evaluate_index_dict, evaluate_real_rating_matrix, \
37+
evaluate_predict_rating_matrix, topK, batch_user_list)
38+
hr_list.extend(tmp_hr_list)
39+
ndcg_list.extend(tmp_ndcg_list)
40+
return np.mean(hr_list), np.mean(ndcg_list)
41+
42+
def getHrNdcgProc(self,
43+
evaluate_index_dict,
44+
evaluate_real_rating_matrix,
45+
evaluate_predict_rating_matrix,
46+
topK,
47+
user_list):
48+
49+
tmp_hr_list, tmp_ndcg_list = [], []
50+
51+
for u in user_list:
52+
real_item_index_list = evaluate_index_dict[u]
53+
real_item_rating_list = list(np.concatenate(evaluate_real_rating_matrix[real_item_index_list]))
54+
positive_length = len(real_item_rating_list)
55+
target_length = min(positive_length, topK)
56+
57+
predict_rating_list = evaluate_predict_rating_matrix[u]
58+
real_item_rating_list.extend(predict_rating_list)
59+
sort_index = np.argsort(real_item_rating_list)
60+
sort_index = sort_index[::-1]
61+
62+
user_hr_list = []
63+
user_ndcg_list = []
64+
hits_num = 0
65+
for idx in range(topK):
66+
ranking = sort_index[idx]
67+
if ranking < positive_length:
68+
hits_num += 1
69+
user_hr_list.append(self.getHr(idx))
70+
user_ndcg_list.append(self.getDcg(idx))
71+
72+
idcg = self.getIdcg(target_length)
73+
74+
tmp_hr = np.sum(user_hr_list) / target_length
75+
tmp_ndcg = np.sum(user_ndcg_list) / idcg
76+
tmp_hr_list.append(tmp_hr)
77+
tmp_ndcg_list.append(tmp_ndcg)
78+
79+
return tmp_hr_list, tmp_ndcg_list
80+
81+
def evaluateRankingPerformance_sparsity(self, evaluate_index_dict, social_sparsity_dict, interest_sparsity_dict, evaluate_real_rating_matrix, \
82+
evaluate_predict_rating_matrix, topK, num_procs, exp_flag=0, sp_name=None, result_file=None):
83+
user_list = list(evaluate_index_dict.keys())
84+
batch_size = len(user_list) / num_procs
85+
86+
social_hr_list_0_4, social_ndcg_list_0_4 = [], []
87+
social_hr_list_4_8, social_ndcg_list_4_8 = [], []
88+
social_hr_list_8_16, social_ndcg_list_8_16 = [], []
89+
social_hr_list_16_32, social_ndcg_list_16_32 = [], []
90+
social_hr_list_32_64, social_ndcg_list_32_64 = [], []
91+
social_hr_list_64, social_ndcg_list_64 = [], []
92+
93+
94+
95+
interest_hr_list_0_4, interest_ndcg_list_0_4 = [], []
96+
interest_hr_list_4_8, interest_ndcg_list_4_8 = [], []
97+
interest_hr_list_8_16, interest_ndcg_list_8_16 = [], []
98+
interest_hr_list_16_32, interest_ndcg_list_16_32 = [], []
99+
interest_hr_list_32_64, interest_ndcg_list_32_64 = [], []
100+
interest_hr_list_64, interest_ndcg_list_64 = [], []
101+
102+
103+
104+
index = 0
105+
for _ in range(num_procs):
106+
if index + batch_size < len(user_list):
107+
batch_user_list = user_list[index:index+batch_size]
108+
index = index + batch_size
109+
else:
110+
batch_user_list = user_list[index:len(user_list)]
111+
#set_trace()
112+
#social
113+
social_tmp_hr_list_0_4, social_tmp_ndcg_list_0_4,\
114+
social_tmp_hr_list_4_8, social_tmp_ndcg_list_4_8,\
115+
social_tmp_hr_list_8_16, social_tmp_ndcg_list_8_16,\
116+
social_tmp_hr_list_16_32, social_tmp_ndcg_list_16_32,\
117+
social_tmp_hr_list_32_64, social_tmp_ndcg_list_32_64, \
118+
social_tmp_hr_list_64, social_tmp_ndcg_list_64 = self.getHrNdcgProc_social_sparsity(evaluate_index_dict, social_sparsity_dict, interest_sparsity_dict, evaluate_real_rating_matrix, \
119+
evaluate_predict_rating_matrix, topK, batch_user_list)
120+
121+
social_hr_list_0_4.extend(social_tmp_hr_list_0_4)
122+
social_ndcg_list_0_4.extend(social_tmp_ndcg_list_0_4)
123+
social_hr_list_4_8.extend(social_tmp_hr_list_4_8)
124+
social_ndcg_list_4_8.extend(social_tmp_ndcg_list_4_8)
125+
social_hr_list_8_16.extend(social_tmp_hr_list_8_16)
126+
social_ndcg_list_8_16.extend(social_tmp_ndcg_list_8_16)
127+
social_hr_list_16_32.extend(social_tmp_hr_list_16_32)
128+
social_ndcg_list_16_32.extend(social_tmp_ndcg_list_16_32)
129+
social_hr_list_32_64.extend(social_tmp_hr_list_32_64)
130+
social_ndcg_list_32_64.extend(social_tmp_ndcg_list_32_64)
131+
social_hr_list_64.extend(social_tmp_hr_list_64)
132+
social_ndcg_list_64.extend(social_tmp_ndcg_list_64)
133+
134+
135+
#interest
136+
interest_tmp_hr_list_0_4, interest_tmp_ndcg_list_0_4,\
137+
interest_tmp_hr_list_4_8, interest_tmp_ndcg_list_4_8,\
138+
interest_tmp_hr_list_8_16, interest_tmp_ndcg_list_8_16,\
139+
interest_tmp_hr_list_16_32, interest_tmp_ndcg_list_16_32,\
140+
interest_tmp_hr_list_32_64, interest_tmp_ndcg_list_32_64, \
141+
interest_tmp_hr_list_64, interest_tmp_ndcg_list_64 = self.getHrNdcgProc_interest_sparsity(evaluate_index_dict, social_sparsity_dict, interest_sparsity_dict, evaluate_real_rating_matrix, \
142+
evaluate_predict_rating_matrix, topK, batch_user_list)
143+
144+
interest_hr_list_0_4.extend(interest_tmp_hr_list_0_4)
145+
interest_ndcg_list_0_4.extend(interest_tmp_ndcg_list_0_4)
146+
interest_hr_list_4_8.extend(interest_tmp_hr_list_4_8)
147+
interest_ndcg_list_4_8.extend(interest_tmp_ndcg_list_4_8)
148+
interest_hr_list_8_16.extend(interest_tmp_hr_list_8_16)
149+
interest_ndcg_list_8_16.extend(interest_tmp_ndcg_list_8_16)
150+
interest_hr_list_16_32.extend(interest_tmp_hr_list_16_32)
151+
interest_ndcg_list_16_32.extend(interest_tmp_ndcg_list_16_32)
152+
interest_hr_list_32_64.extend(interest_tmp_hr_list_32_64)
153+
interest_ndcg_list_32_64.extend(interest_tmp_ndcg_list_32_64)
154+
interest_hr_list_64.extend(interest_tmp_hr_list_64)
155+
interest_ndcg_list_64.extend(interest_tmp_ndcg_list_64)
156+
157+
158+
#set_trace()
159+
return np.sum(social_hr_list_0_4)/len(social_sparsity_dict['0-4']), np.sum(social_ndcg_list_0_4)/len(social_sparsity_dict['0-4']),\
160+
np.sum(social_hr_list_4_8)/len(social_sparsity_dict['4-8']), np.sum(social_ndcg_list_4_8)/len(social_sparsity_dict['4-8']),\
161+
np.sum(social_hr_list_8_16)/len(social_sparsity_dict['8-16']), np.sum(social_ndcg_list_8_16)/len(social_sparsity_dict['8-16']),\
162+
np.sum(social_hr_list_16_32)/len(social_sparsity_dict['16-32']), np.sum(social_ndcg_list_16_32)/len(social_sparsity_dict['16-32']),\
163+
np.sum(social_hr_list_32_64)/len(social_sparsity_dict['32-64']), np.sum(social_ndcg_list_32_64)/len(social_sparsity_dict['32-64']), \
164+
np.sum(social_hr_list_64)/len(social_sparsity_dict['64-']), np.sum(social_ndcg_list_64)/len(social_sparsity_dict['64-']), \
165+
np.sum(interest_hr_list_0_4)/len(interest_sparsity_dict['0-4']), np.sum(interest_ndcg_list_0_4)/len(interest_sparsity_dict['0-4']),\
166+
np.sum(interest_hr_list_4_8)/len(interest_sparsity_dict['4-8']), np.sum(interest_ndcg_list_4_8)/len(interest_sparsity_dict['4-8']),\
167+
np.sum(interest_hr_list_8_16)/len(interest_sparsity_dict['8-16']), np.sum(interest_ndcg_list_8_16)/len(interest_sparsity_dict['8-16']),\
168+
np.sum(interest_hr_list_16_32)/len(interest_sparsity_dict['16-32']), np.sum(interest_ndcg_list_16_32)/len(interest_sparsity_dict['16-32']),\
169+
np.sum(interest_hr_list_32_64)/len(interest_sparsity_dict['32-64']), np.sum(interest_ndcg_list_32_64)/len(interest_sparsity_dict['32-64']), \
170+
np.sum(interest_hr_list_64)/len(interest_sparsity_dict['64-']), np.sum(interest_ndcg_list_64)/len(interest_sparsity_dict['64-'])
171+
172+
173+
def getHrNdcgProc_social_sparsity(self,
174+
evaluate_index_dict,
175+
social_sparsity_dict,
176+
interest_sparsity_dict,
177+
evaluate_real_rating_matrix,
178+
evaluate_predict_rating_matrix,
179+
topK,
180+
user_list):
181+
182+
social_tmp_hr_list_0_4, social_tmp_ndcg_list_0_4= [], []
183+
social_tmp_hr_list_4_8, social_tmp_ndcg_list_4_8= [], []
184+
social_tmp_hr_list_8_16, social_tmp_ndcg_list_8_16= [], []
185+
social_tmp_hr_list_16_32, social_tmp_ndcg_list_16_32= [], []
186+
social_tmp_hr_list_32_64, social_tmp_ndcg_list_32_64= [], []
187+
social_tmp_hr_list_64, social_tmp_ndcg_list_64= [], []
188+
189+
190+
for u in user_list:
191+
real_item_index_list = evaluate_index_dict[u]
192+
real_item_rating_list = list(np.concatenate(evaluate_real_rating_matrix[real_item_index_list]))
193+
positive_length = len(real_item_rating_list)
194+
target_length = min(positive_length, topK)
195+
196+
predict_rating_list = evaluate_predict_rating_matrix[u]
197+
real_item_rating_list.extend(predict_rating_list)
198+
sort_index = np.argsort(real_item_rating_list)
199+
sort_index = sort_index[::-1]
200+
201+
user_hr_list = []
202+
user_ndcg_list = []
203+
hits_num = 0
204+
for idx in range(topK):
205+
ranking = sort_index[idx]
206+
if ranking < positive_length:
207+
hits_num += 1
208+
user_hr_list.append(self.getHr(idx))
209+
user_ndcg_list.append(self.getDcg(idx))
210+
211+
idcg = self.getIdcg(target_length)
212+
213+
tmp_hr = np.sum(user_hr_list) / target_length
214+
tmp_ndcg = np.sum(user_ndcg_list) / idcg
215+
#set_trace()
216+
if( u in social_sparsity_dict['64-'] ):
217+
social_tmp_hr_list_64.append(tmp_hr)
218+
social_tmp_ndcg_list_64.append(tmp_ndcg)
219+
elif( u in social_sparsity_dict['32-64'] ):
220+
social_tmp_hr_list_32_64.append(tmp_hr)
221+
social_tmp_ndcg_list_32_64.append(tmp_ndcg)
222+
elif( u in social_sparsity_dict['16-32'] ):
223+
social_tmp_hr_list_16_32.append(tmp_hr)
224+
social_tmp_ndcg_list_16_32.append(tmp_ndcg)
225+
elif( u in social_sparsity_dict['8-16'] ):
226+
social_tmp_hr_list_8_16.append(tmp_hr)
227+
social_tmp_ndcg_list_8_16.append(tmp_ndcg)
228+
elif( u in social_sparsity_dict['4-8'] ):
229+
social_tmp_hr_list_4_8.append(tmp_hr)
230+
social_tmp_ndcg_list_4_8.append(tmp_ndcg)
231+
elif( u in social_sparsity_dict['0-4'] ):
232+
social_tmp_hr_list_0_4.append(tmp_hr)
233+
social_tmp_ndcg_list_0_4.append(tmp_ndcg)
234+
235+
236+
return social_tmp_hr_list_0_4, social_tmp_ndcg_list_0_4, \
237+
social_tmp_hr_list_4_8, social_tmp_ndcg_list_4_8, \
238+
social_tmp_hr_list_8_16, social_tmp_ndcg_list_8_16, \
239+
social_tmp_hr_list_16_32, social_tmp_ndcg_list_16_32, \
240+
social_tmp_hr_list_32_64, social_tmp_ndcg_list_32_64, \
241+
social_tmp_hr_list_64, social_tmp_ndcg_list_64
242+
243+
244+
245+
246+
def getHrNdcgProc_interest_sparsity(self,
247+
evaluate_index_dict,
248+
social_sparsity_dict,
249+
interest_sparsity_dict,
250+
evaluate_real_rating_matrix,
251+
evaluate_predict_rating_matrix,
252+
topK,
253+
user_list):
254+
255+
interest_tmp_hr_list_0_4, interest_tmp_ndcg_list_0_4= [], []
256+
interest_tmp_hr_list_4_8, interest_tmp_ndcg_list_4_8= [], []
257+
interest_tmp_hr_list_8_16, interest_tmp_ndcg_list_8_16= [], []
258+
interest_tmp_hr_list_16_32, interest_tmp_ndcg_list_16_32= [], []
259+
interest_tmp_hr_list_32_64, interest_tmp_ndcg_list_32_64= [], []
260+
interest_tmp_hr_list_64, interest_tmp_ndcg_list_64= [], []
261+
262+
263+
for u in user_list:
264+
real_item_index_list = evaluate_index_dict[u]
265+
real_item_rating_list = list(np.concatenate(evaluate_real_rating_matrix[real_item_index_list]))
266+
positive_length = len(real_item_rating_list)
267+
target_length = min(positive_length, topK)
268+
269+
predict_rating_list = evaluate_predict_rating_matrix[u]
270+
real_item_rating_list.extend(predict_rating_list)
271+
sort_index = np.argsort(real_item_rating_list)
272+
sort_index = sort_index[::-1]
273+
274+
user_hr_list = []
275+
user_ndcg_list = []
276+
hits_num = 0
277+
for idx in range(topK):
278+
ranking = sort_index[idx]
279+
if ranking < positive_length:
280+
hits_num += 1
281+
user_hr_list.append(self.getHr(idx))
282+
user_ndcg_list.append(self.getDcg(idx))
283+
284+
idcg = self.getIdcg(target_length)
285+
286+
tmp_hr = np.sum(user_hr_list) / target_length
287+
tmp_ndcg = np.sum(user_ndcg_list) / idcg
288+
#set_trace()
289+
if( u in interest_sparsity_dict['64-'] ):
290+
interest_tmp_hr_list_64.append(tmp_hr)
291+
interest_tmp_ndcg_list_64.append(tmp_ndcg)
292+
elif( u in interest_sparsity_dict['32-64']):
293+
interest_tmp_hr_list_32_64.append(tmp_hr)
294+
interest_tmp_ndcg_list_32_64.append(tmp_ndcg)
295+
elif( u in interest_sparsity_dict['16-32']):
296+
interest_tmp_hr_list_16_32.append(tmp_hr)
297+
interest_tmp_ndcg_list_16_32.append(tmp_ndcg)
298+
elif( u in interest_sparsity_dict['8-16']):
299+
interest_tmp_hr_list_8_16.append(tmp_hr)
300+
interest_tmp_ndcg_list_8_16.append(tmp_ndcg)
301+
elif( u in interest_sparsity_dict['4-8'] ):
302+
interest_tmp_hr_list_4_8.append(tmp_hr)
303+
interest_tmp_ndcg_list_4_8.append(tmp_ndcg)
304+
elif( u in interest_sparsity_dict['0-4']):
305+
interest_tmp_hr_list_0_4.append(tmp_hr)
306+
interest_tmp_ndcg_list_0_4.append(tmp_ndcg)
307+
308+
return interest_tmp_hr_list_0_4, interest_tmp_ndcg_list_0_4, \
309+
interest_tmp_hr_list_4_8, interest_tmp_ndcg_list_4_8, \
310+
interest_tmp_hr_list_8_16, interest_tmp_ndcg_list_8_16, \
311+
interest_tmp_hr_list_16_32, interest_tmp_ndcg_list_16_32, \
312+
interest_tmp_hr_list_32_64, interest_tmp_ndcg_list_32_64, \
313+
interest_tmp_hr_list_64, interest_tmp_ndcg_list_64
314+
315+
316+
317+
318+
319+
320+
321+
322+
323+

Diffnet++/class/Logging.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import os, shutil
2+
import ConfigParser as cp
3+
4+
class Logging():
5+
def __init__(self, filename):
6+
self.filename = filename
7+
8+
def record(self, str_log):
9+
filename = self.filename
10+
print(str_log)
11+
with open(filename, 'a') as f:
12+
f.write("%s\r\n" % str_log)
13+
f.flush()

0 commit comments

Comments
 (0)