Skip to content

Commit df36de4

Browse files
committed
Fix problem
2 parents 8f681c5 + 5efd43e commit df36de4

File tree

5 files changed

+33
-19
lines changed

5 files changed

+33
-19
lines changed

MLGkernel/data_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def read_graphfile(datadir, dataname, max_nodes=None):
2929
with open(filename_nodes) as f:
3030
for line in f:
3131
line=line.strip("\n")
32-
node_labels+=[int(line) - 1]
32+
node_labels+=[int(line)]
3333
num_unique_node_labels = max(node_labels) + 1
3434
except IOError:
3535
print('No node labels')
@@ -110,7 +110,9 @@ def read_graphfile(datadir, dataname, max_nodes=None):
110110
graphs.append(nx.relabel_nodes(G, mapping))
111111

112112

113-
# np.random.shuffle(graphs)
113+
np.random.shuffle(graphs)
114+
#idx = np.random.RandomState(seed=2).permutation(len(graphs))
115+
#graphs = [graphs[i] for i in idx]
114116

115117
return graphs
116118

MLGkernel/evaluate_embedding.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import numpy as np
33
import pandas as pd
44
import os
5+
import sys
56

67
from sklearn.model_selection import cross_val_score
78
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
@@ -16,14 +17,14 @@ def evaluate_embedding(embeddings, labels):
1617

1718
labels = preprocessing.LabelEncoder().fit_transform(labels)
1819
x, y = np.array(embeddings), np.array(labels)
20+
print(x.shape, y.shape)
1921

2022
kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
2123
accuracies = []
2224
for train_index, test_index in kf.split(x, y):
2325

2426
x_train, x_test = x[train_index], x[test_index]
2527
y_train, y_test = y[train_index], y[test_index]
26-
# x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
2728
search=True
2829
if search:
2930
params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
@@ -40,11 +41,10 @@ def evaluate_embedding(embeddings, labels):
4041

4142
x_train, x_test = x[train_index], x[test_index]
4243
y_train, y_test = y[train_index], y[test_index]
43-
# x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
4444
search=True
4545
if search:
46-
classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
4746
params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
47+
classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
4848
else:
4949
classifier = LinearSVC(C=10)
5050
classifier.fit(x_train, y_train)
@@ -69,16 +69,12 @@ def evaluate_embedding(embeddings, labels):
6969
if __name__ == '__main__':
7070
# x, y = get_mutag()
7171
emb = []
72-
with open('data/results/output.txt', 'r') as f:
72+
with open('data/results/{}_output.txt'.format(sys.argv[1]), 'r') as f:
7373
for line in f:
7474
emb.append(list(map(float, [x for x in line.strip().split()])))
7575

76-
import sys
77-
graphs = read_graphfile('../data', sys.argv[1])
78-
y = [graph.graph['label'] for graph in graphs]
76+
with open('../data/{}_label.txt'.format(sys.argv[1]), 'r') as f:
77+
y = f.readlines()
78+
y = [int(x.strip()) for x in y]
7979

8080
evaluate_embedding(emb, y)
81-
# import sys
82-
# preprocess(sys.argv[1])
83-
84-

MLGkernel/preprocess.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
def preprocess(DS):
66
graphs = read_graphfile('../data', DS)
7+
lab = open('../data/{}_label.txt'.format(DS), 'w')
78
f = open('../data/{}.txt'.format(DS), 'w')
89
nl = open('../data/{}_nodelabels.txt'.format(DS), 'w')
910
f.write('{}\n'.format(len(graphs)))
@@ -13,20 +14,23 @@ def preprocess(DS):
1314
num_nodes = g.number_of_nodes()
1415
f.write('{}\n'.format(num_nodes))
1516
nl.write('{}\n'.format(num_nodes))
17+
lab.write('{}\n'.format(g.graph['label']))
1618

1719
A = np.array(nx.adjacency_matrix(g).todense())
1820
assert A.shape == (num_nodes, num_nodes)
1921
for u in g.nodes():
2022

2123
f.write(' '.join(list(map(str, list(A[int(u)])))) + '\n')
22-
nl.write('{}\n'.format(np.argmax(g.node[int(u)]['label'])))
24+
try:
25+
nl.write('{}\n'.format(np.argmax(g.node[int(u)]['label'])+1))
26+
except:
27+
pass
2328
f.close()
2429
nl.close()
30+
lab.close()
2531

2632
if __name__ == '__main__':
2733
# x, y = get_mutag()
2834
# evaluate_embedding(x, y)
2935
import sys
3036
preprocess(sys.argv[1])
31-
32-

MLGkernel/test.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
import numpy as np
22
from glob import glob
3-
files = glob('log*')
3+
import sys
4+
files = glob(sys.argv[1])
45
num=288
56
scores = [[] for i in range(num)]
67
for f in files:
78
with open(f, 'r') as f:
89
for idx, line in enumerate(f):
9-
scores[idx].append(float(line.strip().split()[-1]))
10+
scores[idx%288].append(float(line.strip().split()[-1]))
1011

1112

1213
res = [np.mean(scores[idx]) for idx in range(num)]
1314
amax = np.argmax(res)
14-
print(amax, np.mean(scores[amax]), np.std(scores[amax]))
15+
print(amax, np.mean(scores[amax]), np.std(scores[amax]), len(scores[amax]))
1516

MLGkernel/tmp.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
f = open('../data/MUTAG_nodelabels.txt', 'r')
2+
print(f.readline())
3+
node_labels = []
4+
for i in range(188):
5+
num = int(f.readline()[:-1])
6+
for j in range(num):
7+
node_labels.append(int(f.readline()[:-1]))
8+
9+
for i in range(8):
10+
print(node_labels.count(i))
11+

0 commit comments

Comments
 (0)