diff --git a/.gitignore b/.gitignore
index ead11c4..4fa9bbe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,5 +20,8 @@ pygraph/kernels/*_sym.py
*.dat
*.pyc
+preimage/*
+!preimage/*.py
+
__pycache__
##*#
diff --git a/preimage/gk_iam.py b/preimage/gk_iam.py
new file mode 100644
index 0000000..206b640
--- /dev/null
+++ b/preimage/gk_iam.py
@@ -0,0 +1,196 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr 30 17:07:43 2019
+
+A graph pre-image method combining iterative pre-image method in reference [1]
+and the iterative alternate minimizations (IAM) in reference [2].
+@author: ljia
+@references:
+ [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph
+ pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
+ [2] Generalized median graph via iterative alternate minimization.
+"""
+import numpy as np
+import multiprocessing
+from tqdm import tqdm
+import networkx as nx
+import matplotlib.pyplot as plt
+
+from iam import iam
+
+
+def gk_iam(Gn, alpha):
+ """This function constructs graph pre-image by the iterative pre-image
+ framework in reference [1], algorithm 1, where the step of generating new
+ graphs randomly is replaced by the IAM algorithm in reference [2].
+
+ notes
+ -----
+ Every time a better graph is acquired, the older one is replaced by it.
+ """
+ # compute k nearest neighbors of phi in DN.
+ dis_list = [] # distance between g_star and each graph.
+ for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
+ dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
+ k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
+ (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
+ k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+ dis_list.append(dtemp)
+
+ # sort
+ sort_idx = np.argsort(dis_list)
+ dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
+ g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
+ if dis_gs[0] == 0: # the exact pre-image.
+ print('The exact pre-image is found from the input dataset.')
+ return 0, g0hat
+ dhat = dis_gs[0] # the nearest distance
+ Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
+ gihat_list = []
+
+# i = 1
+ r = 1
+ while r < r_max:
+ print('r =', r)
+# found = False
+ Gs_nearest = Gk + gihat_list
+ g_tmp = iam(Gs_nearest)
+
+ # compute distance between phi and the new generated graph.
+ knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
+ p_quit=lmbda, n_iteration=20, remove_totters=False,
+ n_jobs=multiprocessing.cpu_count(), verbose=False)
+ dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
+ knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
+ (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
+ k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+ if dnew <= dhat: # the new distance is smaller
+ print('I am smaller!')
+ dhat = dnew
+ g_new = g_tmp.copy() # found better graph.
+ gihat_list = [g_new]
+ dis_gs.append(dhat)
+ r = 0
+ else:
+ r += 1
+
+ ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
+
+ return dhat, ghat
+
+
+def gk_iam_nearest(Gn, alpha):
+ """This function constructs graph pre-image by the iterative pre-image
+ framework in reference [1], algorithm 1, where the step of generating new
+ graphs randomly is replaced by the IAM algorithm in reference [2].
+
+ notes
+ -----
+ Every time a better graph is acquired, its distance in kernel space is
+ compared with the k nearest ones, and the k nearest distances from the k+1
+ distances will be used as the new ones.
+ """
+ # compute k nearest neighbors of phi in DN.
+ dis_list = [] # distance between g_star and each graph.
+ for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
+ dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) *
+ k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha *
+ (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
+ k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+ dis_list.append(dtemp)
+
+ # sort
+ sort_idx = np.argsort(dis_list)
+ dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
+ g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
+ if dis_gs[0] == 0: # the exact pre-image.
+ print('The exact pre-image is found from the input dataset.')
+ return 0, g0hat
+ dhat = dis_gs[0] # the nearest distance
+ ghat = g0hat
+ Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
+ Gs_nearest = Gk
+# gihat_list = []
+
+# i = 1
+ r = 1
+ while r < r_max:
+ print('r =', r)
+# found = False
+# Gs_nearest = Gk + gihat_list
+ g_tmp = iam(Gs_nearest)
+
+ # compute distance between phi and the new generated graph.
+ knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
+ p_quit=lmbda, n_iteration=20, remove_totters=False,
+ n_jobs=multiprocessing.cpu_count(), verbose=False)
+ dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) *
+ knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha *
+ (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha *
+ k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+ if dnew <= dhat: # the new distance is smaller
+ print('I am smaller!')
+ dhat = dnew
+ g_new = g_tmp.copy() # found better graph.
+ ghat = g_tmp.copy()
+ dis_gs.append(dhat) # add the new nearest distance.
+ Gs_nearest.append(g_new) # add the corresponding graph.
+ sort_idx = np.argsort(dis_gs)
+ dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+ Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
+ r = 0
+ else:
+ r += 1
+
+ return dhat, ghat
+
+
+if __name__ == '__main__':
+ import sys
+ sys.path.insert(0, "../")
+ from pygraph.kernels.marginalizedKernel import marginalizedkernel
+ from pygraph.utils.graphfiles import loadDataset
+ ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
+ 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
+ Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+# Gn = Gn[0:10]
+
+ lmbda = 0.03 # termination probalility
+ r_max = 10 # recursions
+ l = 500
+ alpha_range = np.linspace(0.1, 0.9, 9)
+ k = 5 # k nearest neighbors
+
+ # randomly select two molecules
+ np.random.seed(1)
+ idx1, idx2 = np.random.randint(0, len(Gn), 2)
+ g1 = Gn[idx1]
+ g2 = Gn[idx2]
+
+ # compute
+ k_list = [] # kernel between each graph and itself.
+ k_g1_list = [] # kernel between each graph and g1
+ k_g2_list = [] # kernel between each graph and g2
+ for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout):
+ ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
+ p_quit=lmbda, n_iteration=20, remove_totters=False,
+ n_jobs=multiprocessing.cpu_count(), verbose=False)
+ k_list.append(ktemp[0][0, 0])
+ k_g1_list.append(ktemp[0][0, 1])
+ k_g2_list.append(ktemp[0][0, 2])
+
+ g_best = []
+ dis_best = []
+ # for each alpha
+ for alpha in alpha_range:
+ print('alpha =', alpha)
+ dhat, ghat = gk_iam_nearest(Gn, alpha)
+ dis_best.append(dhat)
+ g_best.append(ghat)
+
+ for idx, item in enumerate(alpha_range):
+ print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+ print('the corresponding pre-image is')
+ nx.draw_networkx(g_best[idx])
+ plt.show()
\ No newline at end of file
diff --git a/preimage/iam.py b/preimage/iam.py
new file mode 100644
index 0000000..cabb05f
--- /dev/null
+++ b/preimage/iam.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr 26 11:49:12 2019
+
+Iterative alternate minimizations using GED.
+@author: ljia
+"""
+import numpy as np
+import random
+import networkx as nx
+
+import sys
+#from Cython_GedLib_2 import librariesImport, script
+import librariesImport, script
+sys.path.insert(0, "../")
+from pygraph.utils.graphfiles import saveDataset
+from pygraph.utils.graphdataset import get_dataset_attributes
+
+
+def iam(Gn, node_label='atom', edge_label='bond_type'):
+ """See my name, then you know what I do.
+ """
+# Gn = Gn[0:10]
+ Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
+
+ c_er = 1
+ c_es = 1
+ c_ei = 1
+
+ # phase 1: initilize.
+ # compute set-median.
+ dis_min = np.inf
+ pi_p = []
+ pi_all = []
+ for idx1, G_p in enumerate(Gn):
+ dist_sum = 0
+ pi_all.append([])
+ for idx2, G_p_prime in enumerate(Gn):
+ dist_tmp, pi_tmp = GED(G_p, G_p_prime)
+ pi_all[idx1].append(pi_tmp)
+ dist_sum += dist_tmp
+ if dist_sum < dis_min:
+ dis_min = dist_sum
+ G = G_p.copy()
+ idx_min = idx1
+ # list of edit operations.
+ pi_p = pi_all[idx_min]
+
+ # phase 2: iteration.
+ ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'],
+ edge_label=edge_label)
+ for itr in range(0, 10):
+ G_new = G.copy()
+ # update vertex labels.
+ # pre-compute h_i0 for each label.
+# for label in get_node_labels(Gn, node_label):
+# print(label)
+# for nd in G.nodes(data=True):
+# pass
+ if not ds_attrs['node_attr_dim']: # labels are symbolic
+ for nd, _ in G.nodes(data=True):
+ h_i0_list = []
+ label_list = []
+ for label in get_node_labels(Gn, node_label):
+ h_i0 = 0
+ for idx, g in enumerate(Gn):
+ pi_i = pi_p[idx][nd]
+ if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
+ h_i0 += 1
+ h_i0_list.append(h_i0)
+ label_list.append(label)
+ # choose one of the best randomly.
+ idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
+ idx_rdm = random.randint(0, len(idx_max) - 1)
+ G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
+ else: # labels are non-symbolic
+ for nd, _ in G.nodes(data=True):
+ Si_norm = 0
+ phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
+ for idx, g in enumerate(Gn):
+ pi_i = pi_p[idx][nd]
+ if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
+ Si_norm += 1
+ phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])
+ phi_i_bar /= Si_norm
+ G_new.nodes[nd]['attributes'] = phi_i_bar
+
+ # update edge labels and adjacency matrix.
+ if ds_attrs['edge_labeled']:
+ for nd1, nd2, _ in G.edges(data=True):
+ h_ij0_list = []
+ label_list = []
+ for label in get_edge_labels(Gn, edge_label):
+ h_ij0 = 0
+ for idx, g in enumerate(Gn):
+ pi_i = pi_p[idx][nd1]
+ pi_j = pi_p[idx][nd2]
+ h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and
+ g.has_edge(pi_i, pi_j) and
+ g.edges[pi_i, pi_j][edge_label] == label)
+ h_ij0 += h_ij0_p
+ h_ij0_list.append(h_ij0)
+ label_list.append(label)
+ # choose one of the best randomly.
+ idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
+ h_ij0_max = h_ij0_list[idx_max[0]]
+ idx_rdm = random.randint(0, len(idx_max) - 1)
+ best_label = label_list[idx_max[idx_rdm]]
+
+ # check whether a_ij is 0 or 1.
+ sij_norm = 0
+ for idx, g in enumerate(Gn):
+ pi_i = pi_p[idx][nd1]
+ pi_j = pi_p[idx][nd2]
+ if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+ sij_norm += 1
+ if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
+ if not G_new.has_edge(nd1, nd2):
+ G_new.add_edge(nd1, nd2)
+ G_new.edges[nd1, nd2][edge_label] = best_label
+ else:
+ if G_new.has_edge(nd1, nd2):
+ G_new.remove_edge(nd1, nd2)
+ else: # if edges are unlabeled
+ for nd1, nd2, _ in G.edges(data=True):
+ sij_norm = 0
+ for idx, g in enumerate(Gn):
+ pi_i = pi_p[idx][nd1]
+ pi_j = pi_p[idx][nd2]
+ if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+ sij_norm += 1
+ if sij_norm > len(Gn) * c_er / (c_er + c_ei):
+ if not G_new.has_edge(nd1, nd2):
+ G_new.add_edge(nd1, nd2)
+ else:
+ if G_new.has_edge(nd1, nd2):
+ G_new.remove_edge(nd1, nd2)
+
+ G = G_new.copy()
+
+ return G
+
+
+def GED(g1, g2, lib='gedlib'):
+ """
+ Compute GED. It is a dummy function for now.
+ """
+ if lib == 'gedlib':
+ saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp')
+ script.appel()
+ script.PyRestartEnv()
+ script.PyLoadGXLGraph('ged_tmp/', 'collections/tmp.xml')
+ listID = script.PyGetGraphIds()
+ script.PySetEditCost("CHEM_1")
+ script.PyInitEnv()
+ script.PySetMethod("BIPARTITE", "")
+ script.PyInitMethod()
+ g = listID[0]
+ h = listID[1]
+ script.PyRunMethod(g, h)
+ liste = script.PyGetAllMap(g, h)
+ upper = script.PyGetUpperBound(g, h)
+ lower = script.PyGetLowerBound(g, h)
+ dis = upper + lower
+ pi = liste[0]
+
+ return dis, pi
+
+
+def get_node_labels(Gn, node_label):
+ nl = set()
+ for G in Gn:
+ nl = nl | set(nx.get_node_attributes(G, node_label).values())
+ return nl
+
+
+def get_edge_labels(Gn, edge_label):
+ el = set()
+ for G in Gn:
+ el = el | set(nx.get_edge_attributes(G, edge_label).values())
+ return el
+
+
+if __name__ == '__main__':
+ from pygraph.utils.graphfiles import loadDataset
+ ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
+ 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
+# ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+# 'extra_params': {}} # node nsymb
+# ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
+# 'extra_params': {}}
+ Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+
+ iam(Gn)
\ No newline at end of file
diff --git a/preimage/librariesImport.py b/preimage/librariesImport.py
new file mode 100644
index 0000000..f1573cc
--- /dev/null
+++ b/preimage/librariesImport.py
@@ -0,0 +1,5 @@
+from ctypes import *
+lib1 = cdll.LoadLibrary('lib/fann/libdoublefann.so')
+lib2 = cdll.LoadLibrary('lib/libsvm.3.22/libsvm.so')
+lib3 = cdll.LoadLibrary('lib/nomad/libnomad.so')
+lib4 = cdll.LoadLibrary('lib/nomad/libsgtelib.so')
diff --git a/preimage/librariesImport2.py b/preimage/librariesImport2.py
new file mode 100644
index 0000000..94f2940
--- /dev/null
+++ b/preimage/librariesImport2.py
@@ -0,0 +1,5 @@
+from ctypes import *
+lib1 = cdll.LoadLibrary('Cython_GedLib_2/lib/fann/libdoublefann.so')
+lib2 = cdll.LoadLibrary('Cython_GedLib_2/lib/libsvm.3.22/libsvm.so')
+lib3 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libnomad.so')
+lib4 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libsgtelib.so')
diff --git a/preimage/preimage.py b/preimage/preimage.py
index c466087..d449062 100644
--- a/preimage/preimage.py
+++ b/preimage/preimage.py
@@ -126,6 +126,7 @@ for alpha in alpha_range:
dhat = dnew
gnew = gtemp.copy()
found = True # found better graph.
+ r = 0
if found:
gihat_list = [gnew]
dis_gs.append(dhat)
diff --git a/preimage/setup.py b/preimage/setup.py
new file mode 100644
index 0000000..381a51f
--- /dev/null
+++ b/preimage/setup.py
@@ -0,0 +1,26 @@
+#from distutils.core import setup
+from distutils.extension import Extension
+#from Cython.Distutils import build_ext
+
+from distutils.core import setup
+from Cython.Build import cythonize
+
+#setup(ext_modules=cythonize("script.pyx"))
+
+extensions = [Extension("script",
+ sources=["script.pyx", "src/essai.cpp"],
+ include_dirs=["include","include/lsape", "include/Eigen", "include/nomad", "include/sgtelib", "include/libsvm.3.22", "include/fann", "include/boost_1_69_0"],
+ library_dirs=["lib/fann","lib/gedlib", "lib/libsvm.3.22","lib/nomad"],
+ libraries=["doublefann","sgtelib", "svm", "nomad"],
+ language="c++",
+ extra_compile_args=["-std=c++11"],
+ extra_link_args=["-std=c++11"])]
+
+setup(ext_modules=cythonize(extensions))
+
+#extensions = [Extension("script", sources=["script.pyx", "include/gedlib-master/src/env/ged_env.ipp"], include_dirs=["."], language="c++")]
+
+#setup(name = "script", ext_modules = extensions, cmdclass = {'build_ext':build_ext},)
+
+
+# Commande Bash : python setup.py build_ext --inplace
diff --git a/preimage/test.py b/preimage/test.py
new file mode 100644
index 0000000..e6ca558
--- /dev/null
+++ b/preimage/test.py
@@ -0,0 +1,57 @@
+#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/Cython_GedLib_2/lib/fann/:/export/home/lambertn/Documents/Cython_GedLib_2/lib/libsvm.3.22:/export/home/lambertn/Documents/Cython_GedLib_2/lib/nomad
+
+#Pour que "import script" trouve les librairies qu'a besoin GedLib
+#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
+#Permet de fonctionner sur Idle et autre sans définir à chaque fois la variable d'environnement
+#os.environ ne fonctionne pas dans ce cas
+import librariesImport, script
+
+#import script
+
+#truc = script.computeEditDistanceOnGXlGraphs('include/gedlib-master/data/datasets/Mutagenicity/data/','collections/MUTA_10.xml',"CHEM_1", "BIPARTITE", "")
+#print(truc)
+#script.PyRestartEnv()
+#script.appel()
+
+def test() :
+# script.appel()
+
+ script.PyRestartEnv()
+
+# print("Here is the Python function !")
+#
+# print("List of Edit Cost Options : ")
+# for i in script.listOfEditCostOptions :
+# print (i)
+# print("")
+#
+# print("List of Method Options : ")
+# for j in script.listOfMethodOptions :
+# print (j)
+# print("")
+
+ script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
+ listID = script.PyGetGraphIds()
+
+ afficheId = ""
+ for i in listID :
+ afficheId+=str(i) + " "
+ print("Number of graphs = " + str(len(listID)) + ", list of Ids = " + afficheId)
+
+ script.PySetEditCost("CHEM_1")
+
+ script.PyInitEnv()
+
+ script.PySetMethod("BIPARTITE", "")
+ script.PyInitMethod()
+
+ g = listID[0]
+ h = listID[1]
+
+ script.PyRunMethod(g,h)
+ liste = script.PyGetAllMap(g,h)
+ print("Forward map : " ,liste[0], ", Backward map : ", liste[1])
+ print ("Upper Bound = " + str(script.PyGetUpperBound(g,h)) + ", Lower Bound = " + str(script.PyGetLowerBound(g,h)) + ", Runtime = " + str(script.PyGetRuntime(g,h)))
+
+
+test()
diff --git a/pygraph/utils/graphdataset.py b/pygraph/utils/graphdataset.py
index deaa182..4ca2c15 100644
--- a/pygraph/utils/graphdataset.py
+++ b/pygraph/utils/graphdataset.py
@@ -52,10 +52,10 @@ def get_dataset_attributes(Gn,
return False if edge_label is None else True
def get_edge_label_num(Gn):
- nl = set()
+ el = set()
for G in Gn:
- nl = nl | set(nx.get_edge_attributes(G, edge_label).values())
- return len(nl)
+ el = el | set(nx.get_edge_attributes(G, edge_label).values())
+ return len(el)
def is_directed(Gn):
return nx.is_directed(Gn[0])
diff --git a/pygraph/utils/graphfiles.py b/pygraph/utils/graphfiles.py
index 4d0feb8..9f7ab8f 100644
--- a/pygraph/utils/graphfiles.py
+++ b/pygraph/utils/graphfiles.py
@@ -22,8 +22,8 @@ def loadCT(filename):
with open(filename) as f:
content = f.read().splitlines()
g = nx.Graph(
- name=str(content[0]),
- filename=basename(filename)) # set name of the graph
+ name = str(content[0]),
+ filename = basename(filename)) # set name of the graph
tmp = content[1].split(" ")
if tmp[0] == '':
nb_nodes = int(tmp[1]) # number of the nodes
@@ -84,43 +84,63 @@ def loadGXL(filename):
return g
-def saveGXL(graph, filename):
- import xml.etree.ElementTree as ET
- root_node = ET.Element('gxl')
- attr = dict()
- attr['id'] = graph.graph['name']
- attr['edgeids'] = 'true'
- attr['edgemode'] = 'undirected'
- graph_node = ET.SubElement(root_node, 'graph', attrib=attr)
-
- for v in graph:
- current_node = ET.SubElement(graph_node, 'node', attrib={'id': str(v)})
- for attr in graph.nodes[v].keys():
- cur_attr = ET.SubElement(
- current_node, 'attr', attrib={'name': attr})
- cur_value = ET.SubElement(cur_attr,
- graph.nodes[v][attr].__class__.__name__)
- cur_value.text = graph.nodes[v][attr]
-
- for v1 in graph:
- for v2 in graph[v1]:
- if (v1 < v2): # Non oriented graphs
- cur_edge = ET.SubElement(
- graph_node,
- 'edge',
- attrib={
- 'from': str(v1),
- 'to': str(v2)
- })
- for attr in graph[v1][v2].keys():
- cur_attr = ET.SubElement(
- cur_edge, 'attr', attrib={'name': attr})
- cur_value = ET.SubElement(
- cur_attr, graph[v1][v2][attr].__class__.__name__)
- cur_value.text = str(graph[v1][v2][attr])
-
- tree = ET.ElementTree(root_node)
- tree.write(filename)
+def saveGXL(graph, filename, method='benoit'):
+ if method == 'benoit':
+ import xml.etree.ElementTree as ET
+ root_node = ET.Element('gxl')
+ attr = dict()
+ attr['id'] = str(graph.graph['name'])
+ attr['edgeids'] = 'true'
+ attr['edgemode'] = 'undirected'
+ graph_node = ET.SubElement(root_node, 'graph', attrib=attr)
+
+ for v in graph:
+ current_node = ET.SubElement(graph_node, 'node', attrib={'id': str(v)})
+ for attr in graph.nodes[v].keys():
+ cur_attr = ET.SubElement(
+ current_node, 'attr', attrib={'name': attr})
+ cur_value = ET.SubElement(cur_attr,
+ graph.nodes[v][attr].__class__.__name__)
+ cur_value.text = graph.nodes[v][attr]
+
+ for v1 in graph:
+ for v2 in graph[v1]:
+ if (v1 < v2): # Non oriented graphs
+ cur_edge = ET.SubElement(
+ graph_node,
+ 'edge',
+ attrib={
+ 'from': str(v1),
+ 'to': str(v2)
+ })
+ for attr in graph[v1][v2].keys():
+ cur_attr = ET.SubElement(
+ cur_edge, 'attr', attrib={'name': attr})
+ cur_value = ET.SubElement(
+ cur_attr, graph[v1][v2][attr].__class__.__name__)
+ cur_value.text = str(graph[v1][v2][attr])
+
+ tree = ET.ElementTree(root_node)
+ tree.write(filename)
+ elif method == 'gedlib':
+ # reference: https://github.com/dbblumenthal/gedlib/blob/master/data/generate_molecules.py#L22
+ pass
+# gxl_file = open(filename, 'w')
+# gxl_file.write("\n")
+# gxl_file.write("\n")
+# gxl_file.write("\n")
+# gxl_file.write("\n")
+# for v in graph:
+# gxl_file.write("\n")
+# gxl_file.write("" + str(self.node_labels[node]) + "\n")
+# gxl_file.write("\n")
+# for edge in self.edge_list:
+# gxl_file.write("\n")
+# gxl_file.write("1\n")
+# gxl_file.write("\n")
+# gxl_file.write("\n")
+# gxl_file.write("\n")
+# gxl_file.close()
def loadSDF(filename):
@@ -412,3 +432,33 @@ def loadDataset(filename, filename_y=None, extra_params=None):
# print(g.edges(data=True))
return data, y
+
+
+def saveDataset(Gn, y, gformat='gxl', group=None, filename='gfile'):
+ """Save list of graphs.
+ """
+ import os
+ dirname_ds = os.path.dirname(filename)
+ if dirname_ds != '':
+ dirname_ds += '/'
+ if not os.path.exists(dirname_ds) :
+ os.makedirs(dirname_ds)
+
+ if group == 'xml' and gformat == 'gxl':
+ with open(filename + '.xml', 'w') as fgroup:
+ fgroup.write("")
+ fgroup.write("\n")
+ fgroup.write("\n")
+ for idx, g in enumerate(Gn):
+ fname_tmp = "graph" + str(idx) + ".gxl"
+ saveGXL(g, dirname_ds + fname_tmp)
+ fgroup.write("\n\t")
+ fgroup.write("\n")
+ fgroup.close()
+
+
+if __name__ == '__main__':
+ ds = {'name': 'MUTAG', 'dataset': '../../datasets/MUTAG/MUTAG.mat',
+ 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}} # node/edge symb
+ Gn, y = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+ saveDataset(Gn, y, group='xml', filename='temp/temp')
\ No newline at end of file
diff --git a/pygraph/utils/model_selection_precomputed.py b/pygraph/utils/model_selection_precomputed.py
index 174b674..a6f0303 100644
--- a/pygraph/utils/model_selection_precomputed.py
+++ b/pygraph/utils/model_selection_precomputed.py
@@ -420,55 +420,6 @@ def model_selection_for_precomputed_kernel(datafile,
# np.save(results_name_pre + 'best_gram_matrix_time.dt',
# best_gram_matrix_time)
- # print out as table.
- from collections import OrderedDict
- from tabulate import tabulate
- table_dict = {}
- if model_type == 'regression':
- for param_in in param_list:
- param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])
- else:
- for param_in in param_list:
- param_in['C'] = '{:.2e}'.format(param_in['C'])
- table_dict['params'] = [{**param_out, **param_in}
- for param_in in param_list for param_out in param_list_pre_revised]
- table_dict['gram_matrix_time'] = [
- '{:.2f}'.format(gram_matrix_time[index_out])
- for param_in in param_list
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- table_dict['valid_perf'] = [
- '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],
- std_val_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- table_dict['test_perf'] = [
- '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],
- std_perf_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- table_dict['train_perf'] = [
- '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],
- std_train_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- keyorder = [
- 'params', 'train_perf', 'valid_perf', 'test_perf',
- 'gram_matrix_time'
- ]
- if verbose:
- print()
- tb_print = tabulate(
- OrderedDict(
- sorted(table_dict.items(),
- key=lambda i: keyorder.index(i[0]))),
- headers='keys')
-# print(tb_print)
- str_fw += 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print
-
# read gram matrices from file.
else:
# Grid of parameters with a discrete number of values for each.
@@ -632,58 +583,16 @@ def model_selection_for_precomputed_kernel(datafile,
# str_fw += 'time to calculate best gram matrix: {:.2f}±{:.2f}s\n'.format(ave_bgmt, std_bgmt)
str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster)
- # print out as table.
- from collections import OrderedDict
- from tabulate import tabulate
- table_dict = {}
- if model_type == 'regression':
- for param_in in param_list:
- param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])
- else:
- for param_in in param_list:
- param_in['C'] = '{:.2e}'.format(param_in['C'])
- table_dict['params'] = [{**param_out, **param_in}
- for param_in in param_list for param_out in param_list_pre_revised]
-# table_dict['gram_matrix_time'] = [
-# '{:.2f}'.format(gram_matrix_time[index_out])
-# for param_in in param_list
-# for index_out, _ in enumerate(param_list_pre_revised)
-# ]
- table_dict['valid_perf'] = [
- '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],
- std_val_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- table_dict['test_perf'] = [
- '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],
- std_perf_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- table_dict['train_perf'] = [
- '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],
- std_train_scores[index_out][index_in])
- for index_in, _ in enumerate(param_list)
- for index_out, _ in enumerate(param_list_pre_revised)
- ]
- keyorder = [
- 'params', 'train_perf', 'valid_perf', 'test_perf'
- ]
- if verbose:
- print()
- tb_print = tabulate(
- OrderedDict(
- sorted(table_dict.items(),
- key=lambda i: keyorder.index(i[0]))),
- headers='keys')
-# print(tb_print)
- str_fw += 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print
-
# open file to save all results for this dataset.
if not os.path.exists(results_dir):
os.makedirs(results_dir)
+ # print out results as table.
+ str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
+ std_val_scores, average_perf_scores, std_perf_scores,
+ average_train_scores, std_train_scores, gram_matrix_time,
+ model_type, verbose)
+
# open file to save all results for this dataset.
if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):
with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:
@@ -974,4 +883,55 @@ def read_gram_matrices_from_file(results_dir, ds_name):
gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
y = gmfile['y'].tolist()
- return gram_matrices, param_list_pre_revised, y
\ No newline at end of file
+ return gram_matrices, param_list_pre_revised, y
+
+
+def printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
+ std_val_scores, average_perf_scores, std_perf_scores,
+ average_train_scores, std_train_scores, gram_matrix_time,
+ model_type, verbose):
+ from collections import OrderedDict
+ from tabulate import tabulate
+ table_dict = {}
+ if model_type == 'regression':
+ for param_in in param_list:
+ param_in['alpha'] = '{:.2e}'.format(param_in['alpha'])
+ else:
+ for param_in in param_list:
+ param_in['C'] = '{:.2e}'.format(param_in['C'])
+ table_dict['params'] = [{**param_out, **param_in}
+ for param_in in param_list for param_out in param_list_pre_revised]
+ table_dict['gram_matrix_time'] = [
+ '{:.2f}'.format(gram_matrix_time[index_out])
+ for param_in in param_list
+ for index_out, _ in enumerate(param_list_pre_revised)
+ ]
+ table_dict['valid_perf'] = [
+ '{:.2f}±{:.2f}'.format(average_val_scores[index_out][index_in],
+ std_val_scores[index_out][index_in])
+ for index_in, _ in enumerate(param_list)
+ for index_out, _ in enumerate(param_list_pre_revised)
+ ]
+ table_dict['test_perf'] = [
+ '{:.2f}±{:.2f}'.format(average_perf_scores[index_out][index_in],
+ std_perf_scores[index_out][index_in])
+ for index_in, _ in enumerate(param_list)
+ for index_out, _ in enumerate(param_list_pre_revised)
+ ]
+ table_dict['train_perf'] = [
+ '{:.2f}±{:.2f}'.format(average_train_scores[index_out][index_in],
+ std_train_scores[index_out][index_in])
+ for index_in, _ in enumerate(param_list)
+ for index_out, _ in enumerate(param_list_pre_revised)
+ ]
+
+ keyorder = [
+ 'params', 'train_perf', 'valid_perf', 'test_perf',
+ 'gram_matrix_time'
+ ]
+ if verbose:
+ print()
+ tb_print = tabulate(OrderedDict(sorted(table_dict.items(),
+ key=lambda i: keyorder.index(i[0]))), headers='keys')
+# print(tb_print)
+ return 'table of performance v.s. hyper-params:\n\n%s\n\n' % tb_print
\ No newline at end of file