sunfanyunn
diff --git a/‎MLGkernel/data_utils.py‎
Lines changed: 4 additions & 2 deletions b/‎MLGkernel/data_utils.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎MLGkernel/evaluate_embedding.py‎
Lines changed: 7 additions & 11 deletions b/‎MLGkernel/evaluate_embedding.py‎
Lines changed: 7 additions & 11 deletions
diff --git a/‎MLGkernel/preprocess.py‎
Lines changed: 7 additions & 3 deletions b/‎MLGkernel/preprocess.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎MLGkernel/test.py‎
Lines changed: 4 additions & 3 deletions b/‎MLGkernel/test.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎MLGkernel/tmp.py‎
Lines changed: 11 additions & 0 deletions b/‎MLGkernel/tmp.py‎
Lines changed: 11 additions & 0 deletions
@@ -29,7 +29,7 @@ def read_graphfile(datadir, dataname, max_nodes=None):
  with open(filename_nodes) as f:
  for line in f:
  line=line.strip("\n")
- node_labels+=[int(line) - 1]
+ node_labels+=[int(line)]
  num_unique_node_labels = max(node_labels) + 1
  except IOError:
  print('No node labels')
@@ -110,7 +110,9 @@ def read_graphfile(datadir, dataname, max_nodes=None):
  graphs.append(nx.relabel_nodes(G, mapping))
 
 
- # np.random.shuffle(graphs)
+ np.random.shuffle(graphs)
+ #idx = np.random.RandomState(seed=2).permutation(len(graphs))
+ #graphs = [graphs[i] for i in idx]
 
  return graphs
 
@@ -2,6 +2,7 @@
 import numpy as np
 import pandas as pd
 import os
+import sys
 
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
@@ -16,14 +17,14 @@ def evaluate_embedding(embeddings, labels):
 
  labels = preprocessing.LabelEncoder().fit_transform(labels)
  x, y = np.array(embeddings), np.array(labels)
+ print(x.shape, y.shape)
 
  kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
  accuracies = []
  for train_index, test_index in kf.split(x, y):
 
  x_train, x_test = x[train_index], x[test_index]
  y_train, y_test = y[train_index], y[test_index]
- # x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
  search=True
  if search:
  params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
@@ -40,11 +41,10 @@ def evaluate_embedding(embeddings, labels):
 
  x_train, x_test = x[train_index], x[test_index]
  y_train, y_test = y[train_index], y[test_index]
- # x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1)
  search=True
  if search:
- classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
  params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
+ classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
  else:
  classifier = LinearSVC(C=10)
  classifier.fit(x_train, y_train)
@@ -69,16 +69,12 @@ def evaluate_embedding(embeddings, labels):
 if __name__ == '__main__':
  # x, y = get_mutag()
  emb = []
- with open('data/results/output.txt', 'r') as f:
+ with open('data/results/{}_output.txt'.format(sys.argv[1]), 'r') as f:
  for line in f:
  emb.append(list(map(float, [x for x in line.strip().split()])))
 
- import sys
- graphs = read_graphfile('../data', sys.argv[1]) 
- y = [graph.graph['label'] for graph in graphs]
+ with open('../data/{}_label.txt'.format(sys.argv[1]), 'r') as f:
+  y = f.readlines()
+ y = [int(x.strip()) for x in y]
 
  evaluate_embedding(emb, y)
- # import sys
- # preprocess(sys.argv[1])
- 
-
@@ -4,6 +4,7 @@
 
 def preprocess(DS):
  graphs = read_graphfile('../data', DS)
+ lab = open('../data/{}_label.txt'.format(DS), 'w')
  f = open('../data/{}.txt'.format(DS), 'w')
  nl = open('../data/{}_nodelabels.txt'.format(DS), 'w')
  f.write('{}\n'.format(len(graphs)))
@@ -13,20 +14,23 @@ def preprocess(DS):
  num_nodes = g.number_of_nodes()
  f.write('{}\n'.format(num_nodes))
  nl.write('{}\n'.format(num_nodes))
+ lab.write('{}\n'.format(g.graph['label']))
 
  A = np.array(nx.adjacency_matrix(g).todense())
  assert A.shape == (num_nodes, num_nodes)
  for u in g.nodes():
 
  f.write(' '.join(list(map(str, list(A[int(u)])))) + '\n')
- nl.write('{}\n'.format(np.argmax(g.node[int(u)]['label'])))
+ try:
+ nl.write('{}\n'.format(np.argmax(g.node[int(u)]['label'])+1))
+ except:
+ pass
  f.close()
  nl.close()
+ lab.close()
 
 if __name__ == '__main__':
  # x, y = get_mutag()
  # evaluate_embedding(x, y)
  import sys
  preprocess(sys.argv[1])
- 
-
 
@@ -1,15 +1,16 @@
 import numpy as np
 from glob import glob
-files = glob('log*')
+import sys
+files = glob(sys.argv[1])
 num=288
 scores = [[] for i in range(num)]
 for f in files:
  with open(f, 'r') as f:
  for idx, line in enumerate(f):
- scores[idx].append(float(line.strip().split()[-1]))
+ scores[idx%288].append(float(line.strip().split()[-1]))
 
 
 res = [np.mean(scores[idx]) for idx in range(num)]
 amax = np.argmax(res)
-print(amax, np.mean(scores[amax]), np.std(scores[amax]))
+print(amax, np.mean(scores[amax]), np.std(scores[amax]), len(scores[amax]))
 
@@ -0,0 +1,11 @@
+f = open('../data/MUTAG_nodelabels.txt', 'r')
+print(f.readline())
+node_labels = []
+for i in range(188):
+ num = int(f.readline()[:-1])
+ for j in range(num):
+ node_labels.append(int(f.readline()[:-1]))
+
+for i in range(8):
+ print(node_labels.count(i))
+