From 8c03f1b049694450f95ae57065f08af0485c1c31 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Thu, 3 Oct 2019 16:11:22 +0200 Subject: [PATCH] Update the GED method, use new gedlibpy library. --- preimage/iam.py | 186 +++--- preimage/librariesImport.py | 5 - preimage/librariesImport2.py | 5 - preimage/{gk_iam.py => preimage_iam.py} | 886 +++++++++++---------------- preimage/{preimage.py => preimage_random.py} | 2 +- preimage/run_gk_iam.py | 4 +- preimage/setup.py | 26 - preimage/test.py | 95 ++- preimage/test_random_mutag.py | 24 +- 9 files changed, 521 insertions(+), 712 deletions(-) delete mode 100644 preimage/librariesImport.py delete mode 100644 preimage/librariesImport2.py rename preimage/{gk_iam.py => preimage_iam.py} (62%) rename preimage/{preimage.py => preimage_random.py} (99%) delete mode 100644 preimage/setup.py diff --git a/preimage/iam.py b/preimage/iam.py index a7ad0a4..e4a2018 100644 --- a/preimage/iam.py +++ b/preimage/iam.py @@ -12,23 +12,19 @@ import networkx as nx from tqdm import tqdm import sys -#from Cython_GedLib_2 import librariesImport, script -import librariesImport, script +from gedlibpy import librariesImport, gedlibpy sys.path.insert(0, "../") -from pygraph.utils.graphfiles import saveDataset from pygraph.utils.graphdataset import get_dataset_attributes from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels -#from pygraph.utils.utils import graph_deepcopy -def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, - c_ei=3, c_er=3, c_es=1, ite_max=50, epsilon=0.001, - node_label='atom', edge_label='bond_type', + +def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, + epsilon=0.001, node_label='atom', edge_label='bond_type', connected=False, removeNodes=True, allBestInit=False, allBestNodes=False, - allBestEdges=False, + allBestEdges=False, allBestOutput=False, params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', 'saveGXL': 'benoit'}): """See my name, then you know what I do. """ - from tqdm import tqdm # Gn_median = Gn_median[0:10] # Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median] if removeNodes: @@ -150,16 +146,6 @@ def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, h_ij0 += h_ij0_p h_ij0_list.append(h_ij0) label_list.append(label) - # # case when the edge is to be removed. - # h_ij0_remove = 0 - # for idx, g in enumerate(Gn_median): - # pi_i = pi_p_forward[idx][nd1i] - # pi_j = pi_p_forward[idx][nd2i] - # if g.has_node(pi_i) and g.has_node(pi_j) and not - # g.has_edge(pi_i, pi_j): - # h_ij0_remove += 1 - # h_ij0_list.append(h_ij0_remove) - # label_list.append(label_r) # get the best labels. idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist() @@ -370,7 +356,9 @@ def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, idx_list.append(idx) return Gn_new, idx_list - + + ########################################################################### + # phase 1: initilize. # compute set-median. dis_min = np.inf @@ -421,8 +409,6 @@ def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, # print(g.edges(data=True)) # get the best median graphs -# dis_list, pi_forward_list = median_distance(G_list, Gn_median, -# **params_ged) G_min_list, pi_forward_min_list, dis_min = best_median_graphs( G_list, pi_forward_list, dis_list) # for g in G_min_list: @@ -430,9 +416,11 @@ def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, # plt.show() # print(g.nodes(data=True)) # print(g.edges(data=True)) - # randomly choose one graph. - idx_rdm = random.randint(0, len(G_min_list) - 1) - G_min_list = [G_min_list[idx_rdm]] + + if not allBestOutput: + # randomly choose one graph. + idx_rdm = random.randint(0, len(G_min_list) - 1) + G_min_list = [G_min_list[idx_rdm]] return G_min_list, dis_min @@ -445,13 +433,91 @@ def iam_moreGraphsAsInit_tryAllPossibleBestGraphs(Gn_median, Gn_candidate, +############################################################################### +# Useful functions. +def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', + stabilizer='min'): + """ + Compute GED. + """ + if lib == 'gedlibpy': + def convertGraph(G): + """Convert a graph to the proper NetworkX format that can be + recognized by library gedlibpy. + """ + G_new = nx.Graph() + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): +# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) + G_new.add_edge(str(nd1), str(nd2)) + + return G_new + + gedlibpy.restart_env() + gedlibpy.add_nx_graph(convertGraph(g1), "") + gedlibpy.add_nx_graph(convertGraph(g2), "") + listID = gedlibpy.get_all_graph_ids() + gedlibpy.set_edit_cost(cost) + gedlibpy.init() + gedlibpy.set_method(method, "") + gedlibpy.init_method() + + g = listID[0] + h = listID[1] + if stabilizer == None: + gedlibpy.run_method(g, h) + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + upper = gedlibpy.get_upper_bound(g, h) + lower = gedlibpy.get_lower_bound(g, h) + elif stabilizer == 'min': + upper = np.inf + for itr in range(50): + gedlibpy.run_method(g, h) + upper_tmp = gedlibpy.get_upper_bound(g, h) + if upper_tmp < upper: + upper = upper_tmp + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + lower = gedlibpy.get_lower_bound(g, h) + if upper == 0: + break + + dis = upper + + # make the map label correct (label remove map as np.inf) + nodes1 = [n for n in g1.nodes()] + nodes2 = [n for n in g2.nodes()] + nb1 = nx.number_of_nodes(g1) + nb2 = nx.number_of_nodes(g2) + pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] + pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] + + return dis, pi_forward, pi_backward +def median_distance(Gn, Gn_median, measure='ged', verbose=False, + ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): + dis_list = [] + pi_forward_list = [] + for idx, G in tqdm(enumerate(Gn), desc='computing median distances', + file=sys.stdout) if verbose else enumerate(Gn): + dis_sum = 0 + pi_forward_list.append([]) + for G_p in Gn_median: + dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, + cost=ged_cost, method=ged_method, saveGXL=saveGXL) + pi_forward_list[idx].append(pi_tmp_forward) + dis_sum += dis_tmp + dis_list.append(dis_sum) + return dis_list, pi_forward_list ############################################################################### +# Old implementations. def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', connected=True): @@ -579,73 +645,6 @@ def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', return G - -def GED(g1, g2, lib='gedlib', cost='CHEM_1', method='IPFP', saveGXL='benoit', - stabilizer='min'): - """ - Compute GED. - """ - if lib == 'gedlib': - # transform dataset to the 'xml' file as the GedLib required. - saveDataset([g1, g2], [None, None], group='xml', filename='ged_tmp/tmp', - xparams={'method': saveGXL}) - # script.appel() - script.PyRestartEnv() - script.PyLoadGXLGraph('ged_tmp/', 'ged_tmp/tmp.xml') - listID = script.PyGetGraphIds() - script.PySetEditCost(cost) #("CHEM_1") - script.PyInitEnv() - script.PySetMethod(method, "") - script.PyInitMethod() - g = listID[0] - h = listID[1] - if stabilizer == None: - script.PyRunMethod(g, h) - pi_forward, pi_backward = script.PyGetAllMap(g, h) - upper = script.PyGetUpperBound(g, h) - lower = script.PyGetLowerBound(g, h) - elif stabilizer == 'min': - upper = np.inf - for itr in range(50): - script.PyRunMethod(g, h) - upper_tmp = script.PyGetUpperBound(g, h) - if upper_tmp < upper: - upper = upper_tmp - pi_forward, pi_backward = script.PyGetAllMap(g, h) - lower = script.PyGetLowerBound(g, h) - if upper == 0: - break - - dis = upper - - # make the map label correct (label remove map as np.inf) - nodes1 = [n for n in g1.nodes()] - nodes2 = [n for n in g2.nodes()] - nb1 = nx.number_of_nodes(g1) - nb2 = nx.number_of_nodes(g2) - pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] - pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] - - return dis, pi_forward, pi_backward - - -def median_distance(Gn, Gn_median, measure='ged', verbose=False, - ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): - dis_list = [] - pi_forward_list = [] - for idx, G in tqdm(enumerate(Gn), desc='computing median distances', - file=sys.stdout) if verbose else enumerate(Gn): - dis_sum = 0 - pi_forward_list.append([]) - for G_p in Gn_median: - dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, - cost=ged_cost, method=ged_method, saveGXL=saveGXL) - pi_forward_list[idx].append(pi_tmp_forward) - dis_sum += dis_tmp - dis_list.append(dis_sum) - return dis_list, pi_forward_list - - # --------------------------- These are tests --------------------------------# def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, @@ -785,9 +784,6 @@ def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, ############################################################################### - - - if __name__ == '__main__': from pygraph.utils.graphfiles import loadDataset ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', diff --git a/preimage/librariesImport.py b/preimage/librariesImport.py deleted file mode 100644 index f1573cc..0000000 --- a/preimage/librariesImport.py +++ /dev/null @@ -1,5 +0,0 @@ -from ctypes import * -lib1 = cdll.LoadLibrary('lib/fann/libdoublefann.so') -lib2 = cdll.LoadLibrary('lib/libsvm.3.22/libsvm.so') -lib3 = cdll.LoadLibrary('lib/nomad/libnomad.so') -lib4 = cdll.LoadLibrary('lib/nomad/libsgtelib.so') diff --git a/preimage/librariesImport2.py b/preimage/librariesImport2.py deleted file mode 100644 index 94f2940..0000000 --- a/preimage/librariesImport2.py +++ /dev/null @@ -1,5 +0,0 @@ -from ctypes import * -lib1 = cdll.LoadLibrary('Cython_GedLib_2/lib/fann/libdoublefann.so') -lib2 = cdll.LoadLibrary('Cython_GedLib_2/lib/libsvm.3.22/libsvm.so') -lib3 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libnomad.so') -lib4 = cdll.LoadLibrary('Cython_GedLib_2/lib/nomad/libsgtelib.so') diff --git a/preimage/gk_iam.py b/preimage/preimage_iam.py similarity index 62% rename from preimage/gk_iam.py rename to preimage/preimage_iam.py index 6a0d46b..da4cc80 100644 --- a/preimage/gk_iam.py +++ b/preimage/preimage_iam.py @@ -19,9 +19,7 @@ import networkx as nx import matplotlib.pyplot as plt import random -import matplotlib.pyplot as plt - -from iam import iam, test_iam_with_more_graphs_as_init, iam_moreGraphsAsInit_tryAllPossibleBestGraphs +from iam import iam_upgraded sys.path.insert(0, "../") from pygraph.kernels.marginalizedKernel import marginalizedkernel from pygraph.kernels.untilHPathKernel import untilhpathkernel @@ -29,264 +27,15 @@ from pygraph.kernels.spKernel import spkernel import functools from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct from pygraph.kernels.structuralspKernel import structuralspkernel -from median import draw_Letter_graph - - -def gk_iam(Gn, alpha): - """This function constructs graph pre-image by the iterative pre-image - framework in reference [1], algorithm 1, where the step of generating new - graphs randomly is replaced by the IAM algorithm in reference [2]. - - notes - ----- - Every time a better graph is acquired, the older one is replaced by it. - """ - pass -# # compute k nearest neighbors of phi in DN. -# dis_list = [] # distance between g_star and each graph. -# for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): -# dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * -# k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * -# (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * -# k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) -# dis_list.append(dtemp) -# -# # sort -# sort_idx = np.argsort(dis_list) -# dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] -# g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN -# if dis_gs[0] == 0: # the exact pre-image. -# print('The exact pre-image is found from the input dataset.') -# return 0, g0hat -# dhat = dis_gs[0] # the nearest distance -# Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors -# gihat_list = [] -# -## i = 1 -# r = 1 -# while r < r_max: -# print('r =', r) -## found = False -# Gs_nearest = Gk + gihat_list -# g_tmp = iam(Gs_nearest) -# -# # compute distance between \psi and the new generated graph. -# knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, -# p_quit=lmbda, n_iteration=20, remove_totters=False, -# n_jobs=multiprocessing.cpu_count(), verbose=False) -# dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * -# knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * -# (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * -# k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) -# if dnew <= dhat: # the new distance is smaller -# print('I am smaller!') -# dhat = dnew -# g_new = g_tmp.copy() # found better graph. -# gihat_list = [g_new] -# dis_gs.append(dhat) -# r = 0 -# else: -# r += 1 -# -# ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list) -# -# return dhat, ghat - - -def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): - """This function constructs graph pre-image by the iterative pre-image - framework in reference [1], algorithm 1, where the step of generating new - graphs randomly is replaced by the IAM algorithm in reference [2]. - - notes - ----- - Every time a better graph is acquired, its distance in kernel space is - compared with the k nearest ones, and the k nearest distances from the k+1 - distances will be used as the new ones. - """ - # compute k nearest neighbors of phi in DN. - dis_list = [] # distance between g_star and each graph. - for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): - dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) -# dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * -# k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * -# (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * -# k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) - dis_list.append(dtemp) - - # sort - sort_idx = np.argsort(dis_list) - dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances - g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN - if dis_gs[0] == 0: # the exact pre-image. - print('The exact pre-image is found from the input dataset.') - return 0, g0hat - dhat = dis_gs[0] # the nearest distance - ghat = g0hat.copy() - Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors - for gi in Gk: - nx.draw_networkx(gi) - plt.show() - print(gi.nodes(data=True)) - print(gi.edges(data=True)) - Gs_nearest = Gk.copy() -# gihat_list = [] - -# i = 1 - r = 1 - while r < r_max: - print('r =', r) -# found = False -# Gs_nearest = Gk + gihat_list -# g_tmp = iam(Gs_nearest) - g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) - nx.draw_networkx(g_tmp) - plt.show() - print(g_tmp.nodes(data=True)) - print(g_tmp.edges(data=True)) - - # compute distance between \psi and the new generated graph. - gi_list = [Gn[i] for i in idx_gi] - knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) - dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) - -# dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * -# knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * -# alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * -# k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) - if dnew <= dhat and g_tmp != ghat: # the new distance is smaller - print('I am smaller!') - print(str(dhat) + '->' + str(dnew)) -# nx.draw_networkx(ghat) -# plt.show() -# print('->') -# nx.draw_networkx(g_tmp) -# plt.show() - - dhat = dnew - g_new = g_tmp.copy() # found better graph. - ghat = g_tmp.copy() - dis_gs.append(dhat) # add the new nearest distance. - Gs_nearest.append(g_new) # add the corresponding graph. - sort_idx = np.argsort(dis_gs) - dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. - Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] - r = 0 - else: - r += 1 - - return dhat, ghat - - -#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max): -# """This function constructs graph pre-image by the iterative pre-image -# framework in reference [1], algorithm 1, where the step of generating new -# graphs randomly is replaced by the IAM algorithm in reference [2]. -# -# notes -# ----- -# Every time a set of n better graphs is acquired, their distances in kernel space are -# compared with the k nearest ones, and the k nearest distances from the k+n -# distances will be used as the new ones. -# """ -# Gn_median = [Gn[idx].copy() for idx in idx_gi] -# # compute k nearest neighbors of phi in DN. -# dis_list = [] # distance between g_star and each graph. -# for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): -# dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) -## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * -## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * -## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * -## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) -# dis_list.append(dtemp) -# -# # sort -# sort_idx = np.argsort(dis_list) -# dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances -# nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) -# g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN -# if dis_gs[0] == 0: # the exact pre-image. -# print('The exact pre-image is found from the input dataset.') -# return 0, g0hat_list -# dhat = dis_gs[0] # the nearest distance -# ghat_list = [g.copy() for g in g0hat_list] -# for g in ghat_list: -# nx.draw_networkx(g) -# plt.show() -# print(g.nodes(data=True)) -# print(g.edges(data=True)) -# Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors -# for gi in Gk: -# nx.draw_networkx(gi) -# plt.show() -# print(gi.nodes(data=True)) -# print(gi.edges(data=True)) -# Gs_nearest = Gk.copy() -## gihat_list = [] -# -## i = 1 -# r = 1 -# while r < r_max: -# print('r =', r) -## found = False -## Gs_nearest = Gk + gihat_list -## g_tmp = iam(Gs_nearest) -# g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( -# Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1) -# for g in g_tmp_list: -# nx.draw_networkx(g) -# plt.show() -# print(g.nodes(data=True)) -# print(g.edges(data=True)) -# -# # compute distance between \psi and the new generated graphs. -# gi_list = [Gn[i] for i in idx_gi] -# knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) -# dnew_list = [] -# for idx, g_tmp in enumerate(g_tmp_list): -# dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), -# len(g_tmp_list) + len(gi_list) + 1), alpha, knew)) -# -## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * -## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * -## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * -## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) -# -# # find the new k nearest graphs. -# dis_gs = dnew_list + dis_gs # add the new nearest distances. -# Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. -# sort_idx = np.argsort(dis_gs) -# if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: -# print('We got better k nearest neighbors! Hurray!') -# dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. -# print(dis_gs[-1]) -# Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] -# nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) -# if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0: -# print('I have smaller or equal distance!') -# dhat = dis_gs[0] -# print(str(dhat) + '->' + str(dhat)) -# idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() -# ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] -# for g in ghat_list: -# nx.draw_networkx(g) -# plt.show() -# print(g.nodes(data=True)) -# print(g.edges(data=True)) -# r = 0 -# else: -# r += 1 -# -# return dhat, ghat_list -def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, - gkernel, epsilon=0.001, InitIAMWithAllDk=True, - params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, - 'ite_max': 50, 'epsilon': 0.001, - 'removeNodes': True, 'connected': False}, - params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', - 'saveGXL': 'benoit'}): +def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, + gkernel, epsilon=0.001, InitIAMWithAllDk=False, + params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, + 'ite_max': 50, 'epsilon': 0.001, + 'removeNodes': True, 'connected': False}, + params_ged={'ged_cost': 'CHEM_1', 'ged_method': 'IPFP', + 'saveGXL': 'benoit'}): """This function constructs graph pre-image by the iterative pre-image framework in reference [1], algorithm 1, where the step of generating new graphs randomly is replaced by the IAM algorithm in reference [2]. @@ -305,10 +54,6 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout): dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3) -# dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * -# k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * -# (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * -# k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) dis_all.append(dtemp) # sort @@ -334,13 +79,10 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, # draw_Letter_graph(g) print(gi.nodes(data=True)) print(gi.edges(data=True)) -# gihat_list = [] # i = 1 r = 0 itr_total = 0 -# cur_dis = dhat -# old_dis = cur_dis * 2 dis_of_each_itr = [dhat] found = False nb_updated = 0 @@ -350,23 +92,19 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, print('Current preimage iteration =', r) print('Total preimage iteration =', itr_total, '\n') found = False -# Gs_nearest = Gk + gihat_list -# g_tmp = iam(Gs_nearest) Gn_nearest_median = [g.copy() for g in Gk] if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM. ghat_new_list = [] for g_tmp in Gk: Gn_nearest_init = [g_tmp.copy()] - ghat_new_list_tmp, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs( - Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, - **params_iam) + ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, + Gn_nearest_init, params_ged=params_ged, **params_iam) ghat_new_list += ghat_new_list_tmp else: # only the best graph in D_k is used to initialize IAM. Gn_nearest_init = [g.copy() for g in Gk] - ghat_new_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs( - Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, - **params_iam) + ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, + params_ged=params_ged, **params_iam) # for g in g_tmp_list: # nx.draw_networkx(g) @@ -384,9 +122,7 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, len(ghat_new_list) + len(Gn_median) + 1), alpha, knew, withterm3=False)) - for idx_g, ghat_new in enumerate(ghat_new_list): -# ghat_new = ghat_new_list[0].copy() dhat_new = dhat_new_list[idx_g] # if the new distance is smaller than the max of D_k. @@ -396,14 +132,13 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, for dis_tmp in dis_k[1:-1]: if np.abs(dhat_new - dis_tmp) < epsilon: is_duplicate = True - print('IAM: generated duplicate k nearest graph.') + print('IAM: duplicate k nearest graph generated.') break if not is_duplicate: if np.abs(dhat_new - dhat) < epsilon: print('IAM: I am equal!') # dhat = dhat_new # ghat_list = [ghat_new.copy()] -# is_iam_duplicate = True else: print('IAM: we got better k nearest neighbors!') nb_updated_k += 1 @@ -411,8 +146,7 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, nb_updated_k, 'times.') dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance. - Gk = [nx.convert_node_labels_to_integers(ghat_new.copy())] \ - + Gk[0:k-1] # add the corresponding graph. + Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph. sort_idx = np.argsort(dis_k) dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. Gk = [Gk[idx] for idx in sort_idx[0:k]] @@ -434,129 +168,6 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, if not found: r += 1 -# else: # @todo: may not work. -# dis_k = dhat_new_list + dis_k # add the new nearest distances. -# Gk = [nx.convert_node_labels_to_integers(g.copy()) for g -# in ghat_new_list] + Gk # add the corresponding graphs. -# sort_idx = np.argsort(dis_k) -# if len([i for i in sort_idx[0:k] if i < len(dhat_new_list)]) > 0: -# print('We got new k nearest neighbors! Hurray!') -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. -# # print(dis_k[-1]) -# Gk = [Gk[idx] for idx in sort_idx[0:k]] -# nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) -# if dhat_new < dhat: -# print('I have smaller distance!') -# print(str(dhat) + '->' + str(dhat_new)) -# dhat = dis_k[0] -# idx_best_list = np.argwhere(dis_k == dhat_new).flatten().tolist() -# ghat_list = [Gk[idx].copy() for idx in idx_best_list] -# # for g in ghat_list: -# ## nx.draw_networkx(g) -# ## plt.show() -# # draw_Letter_graph(g) -# # print(g.nodes(data=True)) -# # print(g.edges(data=True)) -# r = 0 -# found = True -# nb_updated += 1 -# print('the graph is updated by IAM', nb_updated, 'times.') -# -# nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), -# with_labels=True) -# ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") -# plt.show() -# else: -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] -# Gk = [Gk[idx] for idx in sort_idx[0:k]] -# # Gn_nearest_median = [g.copy() for g in Gn_nearest_init] -# if not found: -# r += 1 - -# dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * -# knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * -# alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * -# k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) - -# # find the new k nearest graphs. -# dhat_new = min(dnew_list) -# dis_k = dnew_list + dis_k # add the new nearest distances. -# Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. -# sort_idx = np.argsort(dis_k) -# if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: -# print('We got new k nearest neighbors! Hurray!') -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. -## print(dis_k[-1]) -# Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] -# nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) -# if dnew_best < dhat and np.abs(dnew_best - dhat) > epsilon: -# print('I have smaller distance!') -# print(str(dhat) + '->' + str(dis_k[0])) -# dhat = dis_k[0] -# idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() -# ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] -## for g in ghat_list: -### nx.draw_networkx(g) -### plt.show() -## draw_Letter_graph(g) -## print(g.nodes(data=True)) -## print(g.edges(data=True)) -# r = 0 -# found = True -# nb_updated += 1 -# elif np.abs(dnew_best - dhat) < epsilon: -# print('I have almost equal distance!') -# print(str(dhat) + '->' + str(dnew_best)) -# else: -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] -# Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] -# Gn_nearest_median = [g.copy() for g in Gs_nearest] -# if not found: -# r += 1 - -# # find the new k nearest graphs. -# dnew_best = min(dnew_list) -# if np.abs(dnew_best - dhat) >= epsilon: -# dis_k = dnew_list + dis_k # add the new nearest distances. -# Gs_nearest_init = [g.copy() for g in g_tmp_list] + Gs_nearest_init # add the corresponding graphs. -# sort_idx = np.argsort(dis_k) -# else: # if the new distance is equal to the old one. -# # @todo: works if only one graph is generated. -# Gs_nearest_init[0] = g_tmp_list[0]_init.copy() -# sort_idx = np.argsort(dis_k) -# if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: -# print('We got new k nearest neighbors! Hurray!') -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. -## print(dis_k[-1]) -# Gs_nearest_init = [Gs_nearest_init[idx] for idx in sort_idx[0:k]] -# nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) -# if dnew_best < dhat and np.abs(dnew_best - dhat) >= epsilon: -# print('I have smaller distance!') -# print(str(dhat) + '->' + str(dis_k[0])) -# dhat = dis_k[0] -# idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() -# ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] -## for g in ghat_list: -### nx.draw_networkx(g) -### plt.show() -## draw_Letter_graph(g) -## print(g.nodes(data=True)) -## print(g.edges(data=True)) -# r = 0 -# found = True -# nb_updated += 1 -# elif np.abs(dnew_best - dhat) < epsilon: -# print('I have almost equal distance!') -# print(str(dhat) + '->' + str(dnew_best)) -# else: -# dis_k = [dis_k[idx] for idx in sort_idx[0:k]] -# Gs_nearest_init = [Gs_nearest_init[idx] for idx in sort_idx[0:k]] -# Gn_nearest_median = [g.copy() for g in Gs_nearest] -# if not found: -# r += 1 - -# old_dis = cur_dis -# cur_dis = dnew_best dis_of_each_itr.append(dhat) itr_total += 1 print('\nthe k shortest distances are', dis_k) @@ -573,7 +184,7 @@ def gk_iam_nearest_multi(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l_max, gkernel, epsilon=0.001, - allDkInitRandom=False, InitIAMWithAllDk=False, + InitIAMWithAllDk=False, InitRandomWithAllDk=True, params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 'ite_max': 50, 'epsilon': 0.001, 'removeNodes': True, 'connected': False}, @@ -623,64 +234,105 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max # draw_Letter_graph(g) print(gi.nodes(data=True)) print(gi.edges(data=True)) -# gihat_list = [] -# i = 1 r = 0 itr_total = 0 -# cur_dis = dhat -# old_dis = cur_dis * 2 dis_of_each_itr = [dhat] - found = False nb_updated_iam = 0 nb_updated_k_iam = 0 nb_updated_random = 0 nb_updated_k_random = 0 - is_iam_duplicate = False +# is_iam_duplicate = False while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon: print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-') print('Current preimage iteration =', r) print('Total preimage iteration =', itr_total, '\n') - found = False -# Gs_nearest = Gk + gihat_list -# g_tmp = iam(Gs_nearest) - if not is_iam_duplicate: + found_iam = False + + Gn_nearest_median = [g.copy() for g in Gk] + if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM. + ghat_new_list = [] + for g_tmp in Gk: + Gn_nearest_init = [g_tmp.copy()] + ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, + Gn_nearest_init, params_ged=params_ged, **params_iam) + ghat_new_list += ghat_new_list_tmp + else: # only the best graph in D_k is used to initialize IAM. Gn_nearest_init = [g.copy() for g in Gk] - Gn_nearest_median = [g.copy() for g in Gk] - ghat_new_list, _ = iam_moreGraphsAsInit_tryAllPossibleBestGraphs( - Gn_nearest_median, Gn_nearest_init, params_ged=params_ged, **params_iam) - # for g in ghat_new_list: - # nx.draw_networkx(g) - # plt.show() - # draw_Letter_graph(g) - # print(g.nodes(data=True)) - # print(g.edges(data=True)) - - # compute distance between \psi and the new generated graphs. - knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) - dhat_new_list = [] - for idx, g_tmp in enumerate(ghat_new_list): - # @todo: the term3 below could use the one at the beginning of the function. - dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), - len(ghat_new_list) + len(Gn_median) + 1), - alpha, knew, withterm3=False)) - - ghat_new = ghat_new_list[0].copy() - dhat_new = min(dhat_new_list) + ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, + params_ged=params_ged, **params_iam) + +# for g in g_tmp_list: +# nx.draw_networkx(g) +# plt.show() +# draw_Letter_graph(g) +# print(g.nodes(data=True)) +# print(g.edges(data=True)) + # compute distance between \psi and the new generated graphs. + knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False) + dhat_new_list = [] + + for idx, g_tmp in enumerate(ghat_new_list): + # @todo: the term3 below could use the one at the beginning of the function. + dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), + len(ghat_new_list) + len(Gn_median) + 1), + alpha, knew, withterm3=False)) + # find the new k nearest graphs. - # @todo: for now only consider the situation when only one graph is generated by IAM. - # when new distance is not smaller than the max of D_k, use random generation. - if np.abs(dhat_new - dis_k[-1]) < epsilon or dis_k[-1] < dhat_new or \ - is_iam_duplicate: -# Gs_nearest[0] = ghat_new_list[0].copy() -# sort_idx = np.argsort(dis_k) - print('Distance not better, switching to random generation now.') - print(str(dhat) + '->' + str(dhat_new)) + for idx_g, ghat_new in enumerate(ghat_new_list): + dhat_new = dhat_new_list[idx_g] - if allDkInitRandom: # use all k nearest graphs as the initials. - init_list = [g_init.copy() for g_init in Gk] - else: # use just the nearest graph as the initial. + # if the new distance is smaller than the max of D_k. + if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: + # check if the new distance is the same as one in D_k. + is_duplicate = False + for dis_tmp in dis_k[1:-1]: + if np.abs(dhat_new - dis_tmp) < epsilon: + is_duplicate = True + print('IAM: duplicate k nearest graph generated.') + break + if not is_duplicate: + if np.abs(dhat_new - dhat) < epsilon: + print('IAM: I am equal!') +# dhat = dhat_new +# ghat_list = [ghat_new.copy()] + else: + print('IAM: we got better k nearest neighbors!') + nb_updated_k_iam += 1 + print('the k nearest neighbors are updated', + nb_updated_k_iam, 'times.') + + dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance. + Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph. + sort_idx = np.argsort(dis_k) + dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. + Gk = [Gk[idx] for idx in sort_idx[0:k]] + if dhat_new < dhat: + print('IAM: I have smaller distance!') + print(str(dhat) + '->' + str(dhat_new)) + dhat = dhat_new + ghat_list = [Gk[0].copy()] + r = 0 + nb_updated_iam += 1 + + print('the graph is updated by IAM', nb_updated_iam, + 'times.') + nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), + with_labels=True) + ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") + plt.show() + + found_iam = True + + # when new distance is not smaller than the max of D_k, use random generation. + if not found_iam: + print('Distance not better, switching to random generation now.') + print(str(dhat) + '->' + str(dhat_new)) + + if InitRandomWithAllDk: # use all k nearest graphs as the initials. + init_list = [g_init.copy() for g_init in Gk] + else: # use just the nearest graph as the initial. init_list = [Gk[0].copy()] # number of edges to be changed. @@ -750,7 +402,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max for dis_tmp in dis_k[1:-1]: if np.abs(dhat_new - dis_tmp) < epsilon: is_duplicate = True - print('Random: generated duplicate k nearest graph.') + print('Random: duplicate k nearest graph generated.') break if not is_duplicate: if np.abs(dhat_new - dhat) < epsilon: @@ -789,93 +441,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max break l += 1 if not found_random: # l == l_max: - r += 1 - is_iam_duplicate = False - - else: # if the new distance is smaller than the max of D_k. - if len(dhat_new_list) == 1: - # check if the new distance is the same as one in D_k. - is_duplicate = False - for dis_tmp in dis_k[1:-1]: - if np.abs(dhat_new - dis_tmp) < epsilon: - is_duplicate = True - print('IAM: generated duplicate k nearest graph.') - break - if not is_duplicate: - if np.abs(dhat_new - dhat) < epsilon: - print('IAM: I am equal!') -# dhat = dhat_new -# ghat_list = [ghat_new.copy()] - is_iam_duplicate = True - else: - print('IAM: we got better k nearest neighbors!') - nb_updated_k_iam += 1 - print('the k nearest neighbors are updated by IAM', - nb_updated_k_iam, 'times.') - - dis_k = dhat_new_list + dis_k[0:k-1] # add the new nearest distances. - Gk = [nx.convert_node_labels_to_integers(ghat_new_list[0].copy())] \ - + Gk[0:k-1] # add the corresponding graphs. - sort_idx = np.argsort(dis_k) - dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. - Gk = [Gk[idx] for idx in sort_idx[0:k]] - if dhat_new < dhat: - print('IAM: I have smaller distance!') - print(str(dhat) + '->' + str(dhat_new)) - dhat = dhat_new - ghat_list = [Gk[0].copy()] - r = 0 - nb_updated_iam += 1 - - print('the graph is updated by IAM', nb_updated_iam, 'times.') - nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), - with_labels=True) - ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") - plt.show() - else: - is_iam_duplicate = True - - else: # @todo: may not work. - dis_k = dhat_new_list + dis_k # add the new nearest distances. - Gk = [nx.convert_node_labels_to_integers(g.copy()) for g - in ghat_new_list] + Gk # add the corresponding graphs. - sort_idx = np.argsort(dis_k) - if len([i for i in sort_idx[0:k] if i < len(dhat_new_list)]) > 0: - print('We got new k nearest neighbors! Hurray!') - dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances. - # print(dis_k[-1]) - Gk = [Gk[idx] for idx in sort_idx[0:k]] - nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist()) - if dhat_new < dhat: - print('I have smaller distance!') - print(str(dhat) + '->' + str(dhat_new)) - dhat = dis_k[0] - idx_best_list = np.argwhere(dis_k == dhat_new).flatten().tolist() - ghat_list = [Gk[idx].copy() for idx in idx_best_list] - # for g in ghat_list: - ## nx.draw_networkx(g) - ## plt.show() - # draw_Letter_graph(g) - # print(g.nodes(data=True)) - # print(g.edges(data=True)) - r = 0 - found = True - nb_updated_iam += 1 - print('the graph is updated by IAM', nb_updated_iam, 'times.') - - nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), - with_labels=True) - ## plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG") - plt.show() - else: - dis_k = [dis_k[idx] for idx in sort_idx[0:k]] - Gk = [Gk[idx] for idx in sort_idx[0:k]] -# Gn_nearest_median = [g.copy() for g in Gn_nearest_init] - if not found: - r += 1 + r += 1 -# old_dis = cur_dis -# cur_dis = dnew_best dis_of_each_itr.append(dhat) itr_total += 1 print('\nthe k shortest distances are', dis_k) @@ -942,4 +509,255 @@ def gram2distances(Kmatrix): for i2 in range(len(Kmatrix)): dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] dmatrix = np.sqrt(dmatrix) - return dmatrix \ No newline at end of file + return dmatrix + + +############################################################################### +# Old implementations. + +#def gk_iam(Gn, alpha): +# """This function constructs graph pre-image by the iterative pre-image +# framework in reference [1], algorithm 1, where the step of generating new +# graphs randomly is replaced by the IAM algorithm in reference [2]. +# +# notes +# ----- +# Every time a better graph is acquired, the older one is replaced by it. +# """ +# pass +# # compute k nearest neighbors of phi in DN. +# dis_list = [] # distance between g_star and each graph. +# for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): +# dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * +# k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * +# (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * +# k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) +# dis_list.append(dtemp) +# +# # sort +# sort_idx = np.argsort(dis_list) +# dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] +# g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN +# if dis_gs[0] == 0: # the exact pre-image. +# print('The exact pre-image is found from the input dataset.') +# return 0, g0hat +# dhat = dis_gs[0] # the nearest distance +# Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors +# gihat_list = [] +# +## i = 1 +# r = 1 +# while r < r_max: +# print('r =', r) +## found = False +# Gs_nearest = Gk + gihat_list +# g_tmp = iam(Gs_nearest) +# +# # compute distance between \psi and the new generated graph. +# knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None, +# p_quit=lmbda, n_iteration=20, remove_totters=False, +# n_jobs=multiprocessing.cpu_count(), verbose=False) +# dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * +# knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * +# (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * +# k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]) +# if dnew <= dhat: # the new distance is smaller +# print('I am smaller!') +# dhat = dnew +# g_new = g_tmp.copy() # found better graph. +# gihat_list = [g_new] +# dis_gs.append(dhat) +# r = 0 +# else: +# r += 1 +# +# ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list) +# +# return dhat, ghat + + +#def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max): +# """This function constructs graph pre-image by the iterative pre-image +# framework in reference [1], algorithm 1, where the step of generating new +# graphs randomly is replaced by the IAM algorithm in reference [2]. +# +# notes +# ----- +# Every time a better graph is acquired, its distance in kernel space is +# compared with the k nearest ones, and the k nearest distances from the k+1 +# distances will be used as the new ones. +# """ +# # compute k nearest neighbors of phi in DN. +# dis_list = [] # distance between g_star and each graph. +# for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): +# dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) +## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * +## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * +## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * +## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) +# dis_list.append(dtemp) +# +# # sort +# sort_idx = np.argsort(dis_list) +# dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances +# g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN +# if dis_gs[0] == 0: # the exact pre-image. +# print('The exact pre-image is found from the input dataset.') +# return 0, g0hat +# dhat = dis_gs[0] # the nearest distance +# ghat = g0hat.copy() +# Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors +# for gi in Gk: +# nx.draw_networkx(gi) +# plt.show() +# print(gi.nodes(data=True)) +# print(gi.edges(data=True)) +# Gs_nearest = Gk.copy() +## gihat_list = [] +# +## i = 1 +# r = 1 +# while r < r_max: +# print('r =', r) +## found = False +## Gs_nearest = Gk + gihat_list +## g_tmp = iam(Gs_nearest) +# g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1) +# nx.draw_networkx(g_tmp) +# plt.show() +# print(g_tmp.nodes(data=True)) +# print(g_tmp.edges(data=True)) +# +# # compute distance between \psi and the new generated graph. +# gi_list = [Gn[i] for i in idx_gi] +# knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False) +# dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew) +# +## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * +## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * +## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * +## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) +# if dnew <= dhat and g_tmp != ghat: # the new distance is smaller +# print('I am smaller!') +# print(str(dhat) + '->' + str(dnew)) +## nx.draw_networkx(ghat) +## plt.show() +## print('->') +## nx.draw_networkx(g_tmp) +## plt.show() +# +# dhat = dnew +# g_new = g_tmp.copy() # found better graph. +# ghat = g_tmp.copy() +# dis_gs.append(dhat) # add the new nearest distance. +# Gs_nearest.append(g_new) # add the corresponding graph. +# sort_idx = np.argsort(dis_gs) +# dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. +# Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] +# r = 0 +# else: +# r += 1 +# +# return dhat, ghat + + +#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max): +# """This function constructs graph pre-image by the iterative pre-image +# framework in reference [1], algorithm 1, where the step of generating new +# graphs randomly is replaced by the IAM algorithm in reference [2]. +# +# notes +# ----- +# Every time a set of n better graphs is acquired, their distances in kernel space are +# compared with the k nearest ones, and the k nearest distances from the k+n +# distances will be used as the new ones. +# """ +# Gn_median = [Gn[idx].copy() for idx in idx_gi] +# # compute k nearest neighbors of phi in DN. +# dis_list = [] # distance between g_star and each graph. +# for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout): +# dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix) +## dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * +## k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * +## (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * +## k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6]) +# dis_list.append(dtemp) +# +# # sort +# sort_idx = np.argsort(dis_list) +# dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances +# nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) +# g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN +# if dis_gs[0] == 0: # the exact pre-image. +# print('The exact pre-image is found from the input dataset.') +# return 0, g0hat_list +# dhat = dis_gs[0] # the nearest distance +# ghat_list = [g.copy() for g in g0hat_list] +# for g in ghat_list: +# nx.draw_networkx(g) +# plt.show() +# print(g.nodes(data=True)) +# print(g.edges(data=True)) +# Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors +# for gi in Gk: +# nx.draw_networkx(gi) +# plt.show() +# print(gi.nodes(data=True)) +# print(gi.edges(data=True)) +# Gs_nearest = Gk.copy() +## gihat_list = [] +# +## i = 1 +# r = 1 +# while r < r_max: +# print('r =', r) +## found = False +## Gs_nearest = Gk + gihat_list +## g_tmp = iam(Gs_nearest) +# g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations( +# Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1) +# for g in g_tmp_list: +# nx.draw_networkx(g) +# plt.show() +# print(g.nodes(data=True)) +# print(g.edges(data=True)) +# +# # compute distance between \psi and the new generated graphs. +# gi_list = [Gn[i] for i in idx_gi] +# knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False) +# dnew_list = [] +# for idx, g_tmp in enumerate(g_tmp_list): +# dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), +# len(g_tmp_list) + len(gi_list) + 1), alpha, knew)) +# +## dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * +## knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * +## alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * +## k_g1_list[1] + alpha[1] * alpha[1] * k_list[1]) +# +# # find the new k nearest graphs. +# dis_gs = dnew_list + dis_gs # add the new nearest distances. +# Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs. +# sort_idx = np.argsort(dis_gs) +# if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0: +# print('We got better k nearest neighbors! Hurray!') +# dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances. +# print(dis_gs[-1]) +# Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]] +# nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) +# if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0: +# print('I have smaller or equal distance!') +# dhat = dis_gs[0] +# print(str(dhat) + '->' + str(dhat)) +# idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist() +# ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list] +# for g in ghat_list: +# nx.draw_networkx(g) +# plt.show() +# print(g.nodes(data=True)) +# print(g.edges(data=True)) +# r = 0 +# else: +# r += 1 +# +# return dhat, ghat_list \ No newline at end of file diff --git a/preimage/preimage.py b/preimage/preimage_random.py similarity index 99% rename from preimage/preimage.py rename to preimage/preimage_random.py index 92d500d..5ba241a 100644 --- a/preimage/preimage.py +++ b/preimage/preimage_random.py @@ -57,7 +57,7 @@ def compute_kernel(Gn, graph_kernel, verbose): return Kmatrix -def random_preimage(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): +def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init] # compute k nearest neighbors of phi in DN. diff --git a/preimage/run_gk_iam.py b/preimage/run_gk_iam.py index f4d1d8e..c59e8f9 100644 --- a/preimage/run_gk_iam.py +++ b/preimage/run_gk_iam.py @@ -402,7 +402,7 @@ def test_iam_letter_h(): def test_random_preimage_letter_h(): - from preimage import random_preimage, compute_kernel + from preimage_random import preimage_random, compute_kernel ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', @@ -443,7 +443,7 @@ def test_random_preimage_letter_h(): # for each alpha for alpha in alpha_range: print('alpha =', alpha) - dhat, ghat_list = random_preimage(Gn_let, Gn_let, [alpha] * len(Gn_let), + dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7) diff --git a/preimage/setup.py b/preimage/setup.py deleted file mode 100644 index 381a51f..0000000 --- a/preimage/setup.py +++ /dev/null @@ -1,26 +0,0 @@ -#from distutils.core import setup -from distutils.extension import Extension -#from Cython.Distutils import build_ext - -from distutils.core import setup -from Cython.Build import cythonize - -#setup(ext_modules=cythonize("script.pyx")) - -extensions = [Extension("script", - sources=["script.pyx", "src/essai.cpp"], - include_dirs=["include","include/lsape", "include/Eigen", "include/nomad", "include/sgtelib", "include/libsvm.3.22", "include/fann", "include/boost_1_69_0"], - library_dirs=["lib/fann","lib/gedlib", "lib/libsvm.3.22","lib/nomad"], - libraries=["doublefann","sgtelib", "svm", "nomad"], - language="c++", - extra_compile_args=["-std=c++11"], - extra_link_args=["-std=c++11"])] - -setup(ext_modules=cythonize(extensions)) - -#extensions = [Extension("script", sources=["script.pyx", "include/gedlib-master/src/env/ged_env.ipp"], include_dirs=["."], language="c++")] - -#setup(name = "script", ext_modules = extensions, cmdclass = {'build_ext':build_ext},) - - -# Commande Bash : python setup.py build_ext --inplace diff --git a/preimage/test.py b/preimage/test.py index 05ec60d..d7d91ac 100644 --- a/preimage/test.py +++ b/preimage/test.py @@ -1,57 +1,84 @@ -#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/Cython_GedLib_2/lib/fann/:/export/home/lambertn/Documents/Cython_GedLib_2/lib/libsvm.3.22:/export/home/lambertn/Documents/Cython_GedLib_2/lib/nomad +#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad #Pour que "import script" trouve les librairies qu'a besoin GedLib #Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash -#Permet de fonctionner sur Idle et autre sans définir à chaque fois la variable d'environnement -#os.environ ne fonctionne pas dans ce cas -import librariesImport, script +import gedlibpy.librariesImport +from gedlibpy import gedlibpy +import networkx as nx -#import script -#truc = script.computeEditDistanceOnGXlGraphs('include/gedlib-master/data/datasets/Mutagenicity/data/','collections/MUTA_10.xml',"CHEM_1", "BIPARTITE", "") -#print(truc) -#script.PyRestartEnv() -#script.appel() - -def test() : -# script.appel() - - script.PyRestartEnv() - - print("Here is the Python function !") - +def init() : print("List of Edit Cost Options : ") - for i in script.listOfEditCostOptions : + for i in gedlibpy.list_of_edit_cost_options : print (i) print("") print("List of Method Options : ") - for j in script.listOfMethodOptions : + for j in gedlibpy.list_of_method_options : print (j) print("") + + print("List of Init Options : ") + for k in gedlibpy.list_of_init_options : + print (k) + print("") - script.PyLoadGXLGraph('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml') - listID = script.PyGetGraphIds() +def test(): - afficheId = "" - for i in listID : - afficheId+=str(i) + " " - print("Number of graphs = " + str(len(listID)) + ", list of Ids = " + afficheId) + gedlibpy.load_GXL_graphs('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml') + listID = gedlibpy.get_all_graph_ids() + gedlibpy.set_edit_cost("CHEM_1") + gedlibpy.init() + gedlibpy.set_method("IPFP", "") + gedlibpy.init_method() + g = listID[0] + h = listID[1] + gedlibpy.run_method(g, h) + print("Node Map : ", gedlibpy.get_node_map(g,h)) + print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) + print("Assignment Matrix : ") + print(gedlibpy.get_assignment_matrix(g, h)) + print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g,h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) - script.PySetEditCost("CHEM_1") - script.PyInitEnv() +def convertGraph(G): + G_new = nx.Graph() + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): + G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) + + return G_new - script.PySetMethod("BIPARTITE", "") - script.PyInitMethod() +def testNxGrapĥ(): + import sys + sys.path.insert(0, "../") + from pygraph.utils.graphfiles import loadDataset + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) + + gedlibpy.restart_env() + for graph in Gn: + g_new = convertGraph(graph) + gedlibpy.add_nx_graph(g_new, "") + + listID = gedlibpy.get_all_graph_ids() + gedlibpy.set_edit_cost("CHEM_1") + gedlibpy.init() + gedlibpy.set_method("IPFP", "") + gedlibpy.init_method() + + print(listID) g = listID[0] h = listID[1] - script.PyRunMethod(g,h) - liste = script.PyGetAllMap(g,h) - print("Forward map : " ,liste[0], ", Backward map : ", liste[1]) - print ("Upper Bound = " + str(script.PyGetUpperBound(g,h)) + ", Lower Bound = " + str(script.PyGetLowerBound(g,h)) + ", Runtime = " + str(script.PyGetRuntime(g,h))) + gedlibpy.run_method(g, h) + print("Node Map : ", gedlibpy.get_node_map(g, h)) + print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) + print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) -test() +init() +#testNxGrapĥ() diff --git a/preimage/test_random_mutag.py b/preimage/test_random_mutag.py index 4439f40..e974a93 100644 --- a/preimage/test_random_mutag.py +++ b/preimage/test_random_mutag.py @@ -22,7 +22,7 @@ from pygraph.utils.graphfiles import loadDataset # random pre-image paper.) def test_preimage_mix_2combination_all_pairs(): - from gk_iam import preimage_iam_random_mix, compute_kernel + from preimage_iam import preimage_iam_random_mix, compute_kernel from iam import median_distance ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb @@ -37,6 +37,8 @@ def test_preimage_mix_2combination_all_pairs(): alpha_range = np.linspace(0.5, 0.5, 1) k = 5 # k nearest neighbors epsilon = 1e-6 + InitIAMWithAllDk = True + InitRandomWithAllDk = True # parameters for GED function ged_cost='CHEM_1' ged_method='IPFP' @@ -127,7 +129,8 @@ def test_preimage_mix_2combination_all_pairs(): nb_updated_k_iam, nb_updated_k_random = \ preimage_iam_random_mix(Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, - l_max, gkernel, epsilon=epsilon, + l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + InitRandomWithAllDk=InitRandomWithAllDk, params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, 'removeNodes': removeNodes, 'connected': connected_iam}, @@ -194,7 +197,7 @@ def test_preimage_mix_2combination_all_pairs(): def test_gkiam_2combination_all_pairs(): - from gk_iam import gk_iam_nearest_multi, compute_kernel + from preimage_iam import preimage_iam, compute_kernel from iam import median_distance ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb @@ -206,8 +209,9 @@ def test_gkiam_2combination_all_pairs(): lmbda = 0.03 # termination probalility r_max = 10 # iteration limit for pre-image. alpha_range = np.linspace(0.5, 0.5, 1) - k = 10 # k nearest neighbors + k = 5 # k nearest neighbors epsilon = 1e-6 + InitIAMWithAllDk = False # parameters for GED function ged_cost='CHEM_1' ged_method='IPFP' @@ -292,9 +296,9 @@ def test_gkiam_2combination_all_pairs(): print('alpha =', alpha) time0 = time.time() dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \ - gk_iam_nearest_multi(Gn, [g1, g2], + preimage_iam(Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, - gkernel, epsilon=epsilon, + gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, 'removeNodes': removeNodes, 'connected': connected_iam}, @@ -463,7 +467,7 @@ def test_gkiam_2combination(): def test_random_preimage_2combination(): # from gk_iam import compute_kernel - from preimage import random_preimage + from preimage_random import preimage_random ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) @@ -535,7 +539,7 @@ def test_random_preimage_2combination(): print('\n-------------------------------------------------------\n') print('alpha =', alpha) time0 = time.time() - dhat, ghat, nb_updated = random_preimage(Gn, [g1, g2], [alpha, 1 - alpha], + dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, l, gkernel) time_total = time.time() - time0 + time_km @@ -610,5 +614,5 @@ if __name__ == '__main__': # random pre-image paper.) # test_random_preimage_2combination() # test_gkiam_2combination() - test_gkiam_2combination_all_pairs() -# test_preimage_mix_2combination_all_pairs() \ No newline at end of file +# test_gkiam_2combination_all_pairs() + test_preimage_mix_2combination_all_pairs() \ No newline at end of file