From dd810b92da4a2a8e33b25cd4bd8b2ff9ca0c0816 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Fri, 18 Oct 2019 18:03:57 +0200 Subject: [PATCH] Update pre-image. --- preimage/fitDistance.py | 103 ++++ preimage/ged.py | 197 +++++++ preimage/iam.py | 96 +--- preimage/preimage_iam.py | 98 +--- preimage/preimage_random.py | 51 +- preimage/test.py | 1 + preimage/test_iam.py | 167 ++++++ preimage/{run_gk_iam.py => test_others.py} | 42 +- .../{test_random_mutag.py => test_preimage_iam.py} | 587 ++++++++++----------- preimage/test_preimage_mix.py | 542 +++++++++++++++++++ preimage/test_preimage_random.py | 402 ++++++++++++++ preimage/utils.py | 109 ++++ 12 files changed, 1856 insertions(+), 539 deletions(-) create mode 100644 preimage/fitDistance.py create mode 100644 preimage/ged.py create mode 100644 preimage/test_iam.py rename preimage/{run_gk_iam.py => test_others.py} (95%) rename preimage/{test_random_mutag.py => test_preimage_iam.py} (53%) create mode 100644 preimage/test_preimage_mix.py create mode 100644 preimage/test_preimage_random.py create mode 100644 preimage/utils.py diff --git a/preimage/fitDistance.py b/preimage/fitDistance.py new file mode 100644 index 0000000..cc42c14 --- /dev/null +++ b/preimage/fitDistance.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Oct 16 14:20:06 2019 + +@author: ljia +""" +import numpy as np +from tqdm import tqdm + +import sys +sys.path.insert(0, "../") +from pygraph.utils.graphfiles import loadDataset +from ged import GED, get_nb_edit_operations +from utils import kernel_distance_matrix + +def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): + c_vi = 1 + c_vr = 1 + c_vs = 1 + c_ei = 1 + c_er = 1 + c_es = 1 + + # compute distances in feature space. + dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) + dis_k_vec = [] + for i in range(len(dis_k_mat)): + for j in range(i, len(dis_k_mat)): + dis_k_vec.append(dis_k_mat[i, j]) + dis_k_vec = np.array(dis_k_vec) + + residual_list = [] + edit_cost_list = [] + + for itr in range(itr_max): + print('iteration', itr) + ged_all = [] + n_vi_all = [] + n_vr_all = [] + n_vs_all = [] + n_ei_all = [] + n_er_all = [] + n_es_all = [] + # compute GEDs and numbers of edit operations. + edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] + edit_cost_list.append(edit_cost_constant) + for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): +# for i in range(len(Gn)): + for j in range(i, len(Gn)): + dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', + cost='CONSTANT', method='IPFP', + edit_cost_constant=edit_cost_constant, stabilizer='min', + repeat=30) + ged_all.append(dis) + n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], + Gn[j], pi_forward, pi_backward) + n_vi_all.append(n_vi) + n_vr_all.append(n_vr) + n_vs_all.append(n_vs) + n_ei_all.append(n_ei) + n_er_all.append(n_er) + n_es_all.append(n_es) + + residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) + residual_list.append(residual) + + # "fit" geds to distances in feature space by tuning edit costs using the + # Least Squares Method. + nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), + np.array(n_vs_all), np.array(n_ei_all), + np.array(n_er_all), np.array(n_es_all))) + edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, + rcond=None) + for i in range(len(edit_costs)): + if edit_costs[i] < 0: + if edit_costs[i] > -1e-3: + edit_costs[i] = 0 +# else: +# raise ValueError('The edit cost is negative.') + + c_vi = edit_costs[0] + c_vr = edit_costs[1] + c_vs = edit_costs[2] + c_ei = edit_costs[3] + c_er = edit_costs[4] + c_es = edit_costs[5] + + return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list + + + +if __name__ == '__main__': + from utils import remove_edges + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) + Gn = Gn[0:10] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + itr_max = 10 + c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ + fit_GED_to_kernel_distance(Gn, gkernel, itr_max) \ No newline at end of file diff --git a/preimage/ged.py b/preimage/ged.py new file mode 100644 index 0000000..49b4403 --- /dev/null +++ b/preimage/ged.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 17 18:44:59 2019 + +@author: ljia +""" +import numpy as np +import networkx as nx +from tqdm import tqdm +import sys + +from gedlibpy import librariesImport, gedlibpy + +def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', + edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50): + """ + Compute GED for 2 graphs. + """ + if lib == 'gedlibpy': + def convertGraph(G): + """Convert a graph to the proper NetworkX format that can be + recognized by library gedlibpy. + """ + G_new = nx.Graph() + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): +# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) + G_new.add_edge(str(nd1), str(nd2)) + + return G_new + + gedlibpy.restart_env() + gedlibpy.add_nx_graph(convertGraph(g1), "") + gedlibpy.add_nx_graph(convertGraph(g2), "") + + listID = gedlibpy.get_all_graph_ids() + gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) + gedlibpy.init() + gedlibpy.set_method(method, "") + gedlibpy.init_method() + + g = listID[0] + h = listID[1] + if stabilizer == None: + gedlibpy.run_method(g, h) + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + upper = gedlibpy.get_upper_bound(g, h) + lower = gedlibpy.get_lower_bound(g, h) + elif stabilizer == 'min': + upper = np.inf + for itr in range(repeat): + gedlibpy.run_method(g, h) + upper_tmp = gedlibpy.get_upper_bound(g, h) + if upper_tmp < upper: + upper = upper_tmp + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + lower = gedlibpy.get_lower_bound(g, h) + if upper == 0: + break + + dis = upper + + # make the map label correct (label remove map as np.inf) + nodes1 = [n for n in g1.nodes()] + nodes2 = [n for n in g2.nodes()] + nb1 = nx.number_of_nodes(g1) + nb2 = nx.number_of_nodes(g2) + pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] + pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] + + return dis, pi_forward, pi_backward + + +def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', + edit_cost_constant=[], stabilizer='min', repeat=50): + """ + Compute GEDs for a group of graphs. + """ + if lib == 'gedlibpy': + def convertGraph(G): + """Convert a graph to the proper NetworkX format that can be + recognized by library gedlibpy. + """ + G_new = nx.Graph() + for nd, attrs in G.nodes(data=True): + G_new.add_node(str(nd), chem=attrs['atom']) + for nd1, nd2, attrs in G.edges(data=True): +# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) + G_new.add_edge(str(nd1), str(nd2)) + + return G_new + + gedlibpy.restart_env() + gedlibpy.add_nx_graph(convertGraph(g1), "") + gedlibpy.add_nx_graph(convertGraph(g2), "") + + listID = gedlibpy.get_all_graph_ids() + gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) + gedlibpy.init() + gedlibpy.set_method(method, "") + gedlibpy.init_method() + + g = listID[0] + h = listID[1] + if stabilizer == None: + gedlibpy.run_method(g, h) + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + upper = gedlibpy.get_upper_bound(g, h) + lower = gedlibpy.get_lower_bound(g, h) + elif stabilizer == 'min': + upper = np.inf + for itr in range(repeat): + gedlibpy.run_method(g, h) + upper_tmp = gedlibpy.get_upper_bound(g, h) + if upper_tmp < upper: + upper = upper_tmp + pi_forward = gedlibpy.get_forward_map(g, h) + pi_backward = gedlibpy.get_backward_map(g, h) + lower = gedlibpy.get_lower_bound(g, h) + if upper == 0: + break + + dis = upper + + # make the map label correct (label remove map as np.inf) + nodes1 = [n for n in g1.nodes()] + nodes2 = [n for n in g2.nodes()] + nb1 = nx.number_of_nodes(g1) + nb2 = nx.number_of_nodes(g2) + pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] + pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] + + return dis, pi_forward, pi_backward + + +def ged_median(Gn, Gn_median, measure='ged', verbose=False, + ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): + dis_list = [] + pi_forward_list = [] + for idx, G in tqdm(enumerate(Gn), desc='computing median distances', + file=sys.stdout) if verbose else enumerate(Gn): + dis_sum = 0 + pi_forward_list.append([]) + for G_p in Gn_median: + dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, + cost=ged_cost, method=ged_method, saveGXL=saveGXL) + pi_forward_list[idx].append(pi_tmp_forward) + dis_sum += dis_tmp + dis_list.append(dis_sum) + return dis_list, pi_forward_list + + +def get_nb_edit_operations(g1, g2, forward_map, backward_map): + """Compute the number of each edit operations. + """ + n_vi = 0 + n_vr = 0 + n_vs = 0 + n_ei = 0 + n_er = 0 + n_es = 0 + + nodes1 = [n for n in g1.nodes()] + for i, map_i in enumerate(forward_map): + if map_i == np.inf: + n_vr += 1 + elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: + n_vs += 1 + for map_i in backward_map: + if map_i == np.inf: + n_vi += 1 + +# idx_nodes1 = range(0, len(node1)) + + edges1 = [e for e in g1.edges()] + nb_edges2_cnted = 0 + for n1, n2 in edges1: + idx1 = nodes1.index(n1) + idx2 = nodes1.index(n2) + # one of the nodes is removed, thus the edge is removed. + if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: + n_er += 1 + # corresponding edge is in g2. Edge label is not considered. + elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ + (forward_map[idx2], forward_map[idx1]) in g2.edges(): + nb_edges2_cnted += 1 + # corresponding nodes are in g2, however the edge is removed. + else: + n_er += 1 + n_ei = nx.number_of_edges(g2) - nb_edges2_cnted + + return n_vi, n_vr, n_vs, n_ei, n_er, n_es \ No newline at end of file diff --git a/preimage/iam.py b/preimage/iam.py index e4a2018..a7b01b9 100644 --- a/preimage/iam.py +++ b/preimage/iam.py @@ -12,10 +12,10 @@ import networkx as nx from tqdm import tqdm import sys -from gedlibpy import librariesImport, gedlibpy sys.path.insert(0, "../") from pygraph.utils.graphdataset import get_dataset_attributes from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels +from ged import GED, ged_median def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, @@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, # # find the best graph generated in this iteration and update pi_p. # @todo: should we update all graphs generated or just the best ones? - dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, + dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, **params_ged) # @todo: should we remove the identical and connectivity check? # Don't know which is faster. @@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, # phase 1: initilize. # compute set-median. dis_min = np.inf - dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median, + dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median, **params_ged) # find all smallest distances. if allBestInit: # try all best init graphs. @@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, - - - - - - - -############################################################################### -# Useful functions. - -def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', - stabilizer='min'): - """ - Compute GED. - """ - if lib == 'gedlibpy': - def convertGraph(G): - """Convert a graph to the proper NetworkX format that can be - recognized by library gedlibpy. - """ - G_new = nx.Graph() - for nd, attrs in G.nodes(data=True): - G_new.add_node(str(nd), chem=attrs['atom']) - for nd1, nd2, attrs in G.edges(data=True): -# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) - G_new.add_edge(str(nd1), str(nd2)) - - return G_new - - gedlibpy.restart_env() - gedlibpy.add_nx_graph(convertGraph(g1), "") - gedlibpy.add_nx_graph(convertGraph(g2), "") - - listID = gedlibpy.get_all_graph_ids() - gedlibpy.set_edit_cost(cost) - gedlibpy.init() - gedlibpy.set_method(method, "") - gedlibpy.init_method() - - g = listID[0] - h = listID[1] - if stabilizer == None: - gedlibpy.run_method(g, h) - pi_forward = gedlibpy.get_forward_map(g, h) - pi_backward = gedlibpy.get_backward_map(g, h) - upper = gedlibpy.get_upper_bound(g, h) - lower = gedlibpy.get_lower_bound(g, h) - elif stabilizer == 'min': - upper = np.inf - for itr in range(50): - gedlibpy.run_method(g, h) - upper_tmp = gedlibpy.get_upper_bound(g, h) - if upper_tmp < upper: - upper = upper_tmp - pi_forward = gedlibpy.get_forward_map(g, h) - pi_backward = gedlibpy.get_backward_map(g, h) - lower = gedlibpy.get_lower_bound(g, h) - if upper == 0: - break - - dis = upper - - # make the map label correct (label remove map as np.inf) - nodes1 = [n for n in g1.nodes()] - nodes2 = [n for n in g2.nodes()] - nb1 = nx.number_of_nodes(g1) - nb2 = nx.number_of_nodes(g2) - pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] - pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] - - return dis, pi_forward, pi_backward - - -def median_distance(Gn, Gn_median, measure='ged', verbose=False, - ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): - dis_list = [] - pi_forward_list = [] - for idx, G in tqdm(enumerate(Gn), desc='computing median distances', - file=sys.stdout) if verbose else enumerate(Gn): - dis_sum = 0 - pi_forward_list.append([]) - for G_p in Gn_median: - dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, - cost=ged_cost, method=ged_method, saveGXL=saveGXL) - pi_forward_list[idx].append(pi_tmp_forward) - dis_sum += dis_tmp - dis_list.append(dis_sum) - return dis_list, pi_forward_list - - ############################################################################### # Old implementations. diff --git a/preimage/preimage_iam.py b/preimage/preimage_iam.py index da4cc80..ff16955 100644 --- a/preimage/preimage_iam.py +++ b/preimage/preimage_iam.py @@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2]. """ import sys import numpy as np -import multiprocessing from tqdm import tqdm import networkx as nx import matplotlib.pyplot as plt import random from iam import iam_upgraded -sys.path.insert(0, "../") -from pygraph.kernels.marginalizedKernel import marginalizedkernel -from pygraph.kernels.untilHPathKernel import untilhpathkernel -from pygraph.kernels.spKernel import spkernel -import functools -from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct -from pygraph.kernels.structuralspKernel import structuralspkernel +from utils import dis_gstar, compute_kernel def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, @@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, # print(g.nodes(data=True)) # print(g.edges(data=True)) Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors - for gi in Gk: - nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) -# nx.draw_networkx(gi) - plt.show() -# draw_Letter_graph(g) - print(gi.nodes(data=True)) - print(gi.edges(data=True)) +# for gi in Gk: +# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) +## nx.draw_networkx(gi) +# plt.show() +## draw_Letter_graph(g) +# print(gi.nodes(data=True)) +# print(gi.edges(data=True)) # i = 1 r = 0 @@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, print('\nthe k shortest distances are', dis_k) print('the shortest distances for previous iterations are', dis_of_each_itr) - print('\nthe graph is updated', nb_updated, 'times.') + print('\n\nthe graph is updated', nb_updated, 'times.') print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') print('distances in kernel space:', dis_of_each_itr, '\n') @@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max # print(g.nodes(data=True)) # print(g.edges(data=True)) Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors - for gi in Gk: - nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) -# nx.draw_networkx(gi) - plt.show() -# draw_Letter_graph(g) - print(gi.nodes(data=True)) - print(gi.edges(data=True)) +# for gi in Gk: +# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) +## nx.draw_networkx(gi) +# plt.show() +## draw_Letter_graph(g) +# print(gi.nodes(data=True)) +# print(gi.edges(data=True)) r = 0 itr_total = 0 @@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max # compute distance between \psi and the new generated graph. knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) - dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) + dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), + alpha, knew, withterm3=False) # @todo: the new distance is smaller or also equal? if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: # check if the new distance is the same as one in D_k. @@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max print('\nthe k shortest distances are', dis_k) print('the shortest distances for previous iterations are', dis_of_each_itr) - print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', + print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', nb_updated_random, 'times.') print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 'times, and by random generation', nb_updated_k_random, 'times.') @@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max ############################################################################### -# useful functions. - -def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): - term1 = Kmatrix[idx_g, idx_g] - term2 = 0 - for i, a in enumerate(alpha): - term2 += a * Kmatrix[idx_g, idx_gi[i]] - term2 *= 2 - if withterm3 == False: - for i1, a1 in enumerate(alpha): - for i2, a2 in enumerate(alpha): - term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] - return np.sqrt(term1 - term2 + term3) - - -def compute_kernel(Gn, graph_kernel, verbose): - if graph_kernel == 'marginalizedkernel': - Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, - p_quit=0.03, n_iteration=10, remove_totters=False, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'untilhpathkernel': - Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, - depth=10, k_func='MinMax', compute_method='trie', - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'spkernel': - mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) - Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= - {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'structuralspkernel': - mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) - Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= - {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - - # normalization - Kmatrix_diag = Kmatrix.diagonal().copy() - for i in range(len(Kmatrix)): - for j in range(i, len(Kmatrix)): - Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) - Kmatrix[j][i] = Kmatrix[i][j] - return Kmatrix - - -def gram2distances(Kmatrix): - dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) - for i1 in range(len(Kmatrix)): - for i2 in range(len(Kmatrix)): - dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] - dmatrix = np.sqrt(dmatrix) - return dmatrix - - -############################################################################### # Old implementations. #def gk_iam(Gn, alpha): diff --git a/preimage/preimage_random.py b/preimage/preimage_random.py index 5ba241a..a8a24d9 100644 --- a/preimage/preimage_random.py +++ b/preimage/preimage_random.py @@ -10,51 +10,14 @@ pre-image import sys import numpy as np import random -import multiprocessing from tqdm import tqdm import networkx as nx import matplotlib.pyplot as plt sys.path.insert(0, "../") -from pygraph.utils.graphfiles import loadDataset -from pygraph.kernels.marginalizedKernel import marginalizedkernel -from pygraph.kernels.untilHPathKernel import untilhpathkernel -from pygraph.kernels.spKernel import spkernel -import functools -from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct -from pygraph.kernels.structuralspKernel import structuralspkernel -from gk_iam import dis_gstar - - -def compute_kernel(Gn, graph_kernel, verbose): - if graph_kernel == 'marginalizedkernel': - Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, - p_quit=0.03, n_iteration=10, remove_totters=False, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'untilhpathkernel': - Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, - depth=10, k_func='MinMax', compute_method='trie', - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'spkernel': - mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) - Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= - {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - elif graph_kernel == 'structuralspkernel': - mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) - Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= - {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, - n_jobs=multiprocessing.cpu_count(), verbose=verbose) - - # normalization - Kmatrix_diag = Kmatrix.diagonal().copy() - for i in range(len(Kmatrix)): - for j in range(i, len(Kmatrix)): - Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) - Kmatrix[j][i] = Kmatrix[i][j] - return Kmatrix +from utils import compute_kernel, dis_gstar def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): @@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke r = 0 # sod_list = [dhat] # found = False + dis_of_each_itr = [dhat] nb_updated = 0 g_best = [] while r < r_max: @@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke # p_quit=lmbda, n_iteration=20, remove_totters=False, # n_jobs=multiprocessing.cpu_count(), verbose=False) knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) - dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) + dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, + withterm3=False) if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew < dhat: print('\nI am smaller!') @@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke dihat_list = [dhat] else: r += 1 + + dis_of_each_itr.append(dhat) + print('the shortest distances for previous iterations are', dis_of_each_itr) # dis_best.append(dhat) - g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) + g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) + print('distances in kernel space:', dis_of_each_itr, '\n') + return dhat, g_best, nb_updated # return 0, 0, 0 if __name__ == '__main__': + from pygraph.utils.graphfiles import loadDataset # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', # 'extra_params': {}} # node/edge symb diff --git a/preimage/test.py b/preimage/test.py index d7d91ac..f3a13c8 100644 --- a/preimage/test.py +++ b/preimage/test.py @@ -80,5 +80,6 @@ def testNxGrapĥ(): print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) +#test() init() #testNxGrapĥ() diff --git a/preimage/test_iam.py b/preimage/test_iam.py new file mode 100644 index 0000000..ee51e4b --- /dev/null +++ b/preimage/test_iam.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Sep 5 15:59:00 2019 + +@author: ljia +""" + +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +import time +import random +#from tqdm import tqdm + +#import os +import sys +sys.path.insert(0, "../") +from pygraph.utils.graphfiles import loadDataset +from iam import iam_upgraded +from utils import remove_edges, compute_kernel, get_same_item_indices +from ged import ged_median + +############################################################################### +# tests on different numbers of median-sets. + +def test_iam_median_nb(): + + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + +# lmbda = 0.03 # termination probalility +# r_max = 10 # iteration limit for pre-image. +# alpha_range = np.linspace(0.5, 0.5, 1) +# k = 5 # k nearest neighbors +# epsilon = 1e-6 +# InitIAMWithAllDk = True + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + # parameters for IAM function + c_ei=1 + c_er=1 + c_es=1 + ite_max_iam = 50 + epsilon_iam = 0.001 + removeNodes = False + connected_iam = False + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list = [] + nb_updated_k_list = [] + g_best = [] + for nb_median in nb_median_range: + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + Gn_candidate = [g.copy() for g in Gn_median] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + time0 = time.time() + ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, + c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, + epsilon=epsilon_iam, removeNodes=removeNodes, + connected=connected_iam, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) + + time_total = time.time() - time0 + print('\ntime: ', time_total) + time_list.append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list.append(dhat) + g_best.append(ghat_list) + print('\nnumber of updates of the best graph: ', nb_updated) + nb_updated_list.append(nb_updated) + print('\nnumber of updates of k nearest graphs: ', nb_updated_k) + nb_updated_k_list.append(nb_updated_k) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), + with_labels=True) + plt.show() + plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + + '.png', format="PNG") + plt.clf() +# print(ghat_list[0].nodes(data=True)) +# print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list.append(sod_tmp) + sod_gs_min_list.append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', + nb_updated_list) + print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', + nb_updated_k_list) + print('\ntimes:', time_list) + + +############################################################################### + + +if __name__ == '__main__': +############################################################################### +# tests on different numbers of median-sets. + test_iam_median_nb() \ No newline at end of file diff --git a/preimage/run_gk_iam.py b/preimage/test_others.py similarity index 95% rename from preimage/run_gk_iam.py rename to preimage/test_others.py index c59e8f9..24dc4c5 100644 --- a/preimage/run_gk_iam.py +++ b/preimage/test_others.py @@ -15,6 +15,9 @@ import sys sys.path.insert(0, "../") from pygraph.utils.graphfiles import loadDataset from median import draw_Letter_graph +from ged import GED, ged_median +from utils import get_same_item_indices, compute_kernel, gram2distances, \ + dis_gstar, remove_edges # --------------------------- These are tests --------------------------------# @@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn): def test_who_is_the_closest_in_GED_space(Gn): - from iam import GED idx_gi = [0, 6] g1 = Gn[idx_gi[0]] g2 = Gn[idx_gi[1]] @@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn): def test_the_simple_two(Gn, gkernel): - from gk_iam import gk_iam_nearest_multi, compute_kernel + from gk_iam import gk_iam_nearest_multi lmbda = 0.03 # termination probalility r_max = 10 # recursions l = 500 @@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel): def test_remove_bests(Gn, gkernel): - from gk_iam import gk_iam_nearest_multi, compute_kernel + from gk_iam import gk_iam_nearest_multi lmbda = 0.03 # termination probalility r_max = 10 # recursions l = 500 @@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel): # Tests on dataset Letter-H. def test_gkiam_letter_h(): - from gk_iam import gk_iam_nearest_multi, compute_kernel - from iam import median_distance + from gk_iam import gk_iam_nearest_multi ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', @@ -305,7 +306,7 @@ def test_gkiam_letter_h(): print(g.edges(data=True)) # compute the corresponding sod in graph space. (alpha range not considered.) - sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', + sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter') sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) @@ -318,19 +319,6 @@ def test_gkiam_letter_h(): print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) print('\nnumber of updates for each letter: ', nb_updated_list) print('\ntimes:', time_list) - - -def get_same_item_indices(ls): - """Get the indices of the same items in a list. Return a dict keyed by items. - """ - idx_dict = {} - for idx, item in enumerate(ls): - if item in idx_dict: - idx_dict[item].append(idx) - else: - idx_dict[item] = [idx] - return idx_dict - #def compute_letter_median_by_average(Gn): # return g_median @@ -338,7 +326,6 @@ def get_same_item_indices(ls): def test_iam_letter_h(): from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations - from gk_iam import dis_gstar, compute_kernel ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', @@ -402,7 +389,7 @@ def test_iam_letter_h(): def test_random_preimage_letter_h(): - from preimage_random import preimage_random, compute_kernel + from preimage_random import preimage_random ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', @@ -463,7 +450,7 @@ def test_random_preimage_letter_h(): print(g.edges(data=True)) # compute the corresponding sod in graph space. (alpha range not considered.) - sod_tmp, _ = median_distance(g_best[0], Gn_let) + sod_tmp, _ = ged_median(g_best[0], Gn_let) sod_list.append(sod_tmp) sod_min_list.append(np.min(sod_tmp)) @@ -479,8 +466,7 @@ def test_random_preimage_letter_h(): def test_gkiam_mutag(): - from gk_iam import gk_iam_nearest_multi, compute_kernel - from iam import median_distance + from gk_iam import gk_iam_nearest_multi ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', 'extra_params': {}} # node nsymb # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', @@ -535,7 +521,7 @@ def test_gkiam_mutag(): print(g.edges(data=True)) # compute the corresponding sod in graph space. (alpha range not considered.) - sod_tmp, _ = median_distance(g_best[0], Gn_let) + sod_tmp, _ = ged_median(g_best[0], Gn_let) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) sod_ks_min_list.append(sod_ks) @@ -553,9 +539,7 @@ def test_gkiam_mutag(): # Re-test. def retest_the_simple_two(): - from gk_iam import gk_iam_nearest_multi, compute_kernel - from iam import median_distance - from test_random_mutag import remove_edges + from gk_iam import gk_iam_nearest_multi # The two simple graphs. # g1 = nx.Graph(name='haha') @@ -653,7 +637,7 @@ def retest_the_simple_two(): # compute the corresponding sod in graph space. for idx, item in enumerate(alpha_range): - sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, + sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) diff --git a/preimage/test_random_mutag.py b/preimage/test_preimage_iam.py similarity index 53% rename from preimage/test_random_mutag.py rename to preimage/test_preimage_iam.py index e974a93..936ce35 100644 --- a/preimage/test_random_mutag.py +++ b/preimage/test_preimage_iam.py @@ -10,20 +10,23 @@ import numpy as np import networkx as nx import matplotlib.pyplot as plt import time -from tqdm import tqdm +import random +#from tqdm import tqdm -import os +#import os import sys sys.path.insert(0, "../") from pygraph.utils.graphfiles import loadDataset +from utils import remove_edges, compute_kernel, get_same_item_indices +from ged import ged_median + +from preimage_iam import preimage_iam + ############################################################################### -# test on the combination of the two randomly chosen graphs. (the same as in the -# random pre-image paper.) +# tests on different values on grid of median-sets and k. -def test_preimage_mix_2combination_all_pairs(): - from preimage_iam import preimage_iam_random_mix, compute_kernel - from iam import median_distance +def test_preimage_iam_grid_k_median_nb(): ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) @@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs(): gkernel = 'marginalizedkernel' lmbda = 0.03 # termination probalility - r_max = 10 # iteration limit for pre-image. - l_max = 500 # update limit for random generation - alpha_range = np.linspace(0.5, 0.5, 1) - k = 5 # k nearest neighbors + r_max = 5 # iteration limit for pre-image. +# alpha_range = np.linspace(0.5, 0.5, 1) +# k = 5 # k nearest neighbors epsilon = 1e-6 InitIAMWithAllDk = True - InitRandomWithAllDk = True # parameters for GED function ged_cost='CHEM_1' ged_method='IPFP' @@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs(): removeNodes = True connected_iam = False - nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) - nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) - # test on each pair of graphs. -# for idx1 in range(len(Gn) - 1, -1, -1): -# for idx2 in range(idx1, -1, -1): - for idx1 in range(187, 188): - for idx2 in range(167, 168): - g1 = Gn[idx1].copy() - g2 = Gn[idx2].copy() - # Gn[10] = [] - # Gn[10] = [] + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + # number of nearest neighbors. + k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list = [] + nb_updated_k_list = [] + g_best = [] + for idx_nb, nb_median in enumerate(nb_median_range): + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + + time_list.append([]) + dis_ks_min_list.append([]) + sod_gs_list.append([]) + sod_gs_min_list.append([]) + nb_updated_list.append([]) + nb_updated_k_list.append([]) + g_best.append([]) + + for k in k_range: + print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') + print('k =', k) + time0 = time.time() + dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ + preimage_iam(Gn, Gn_median, + alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, + gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, + 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, + 'removeNodes': removeNodes, 'connected': connected_iam}, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) + + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list[idx_nb].append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list[idx_nb].append(dhat) + g_best[idx_nb].append(ghat_list) + print('\nnumber of updates of the best graph by IAM: ', nb_updated) + nb_updated_list[idx_nb].append(nb_updated) + print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k) + nb_updated_k_list[idx_nb].append(nb_updated_k) - nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) - plt.savefig("results/preimage_mix/mutag187.png", format="PNG") - plt.show() - plt.clf() - nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) - plt.savefig("results/preimage_mix/mutag167.png", format="PNG") - plt.show() + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), + with_labels=True) + plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + + '_k' + str(k) + '.png', format="PNG") + # plt.show() plt.clf() + # print(ghat_list[0].nodes(data=True)) + # print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list[idx_nb].append(sod_tmp) + sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs and k: ', + sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs and k: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', + nb_updated_list) + print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', + nb_updated_k_list) + print('\ntimes:', time_list) + + + + - ################################################################### -# Gn_mix = [g.copy() for g in Gn] -# Gn_mix.append(g1.copy()) -# Gn_mix.append(g2.copy()) -# -# # compute -# time0 = time.time() -# km = compute_kernel(Gn_mix, gkernel, True) -# time_km = time.time() - time0 -# -# # write Gram matrix to file and read it. -# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) - - ################################################################### - gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') - km = gmfile['gm'] - time_km = gmfile['gmtime'] - # modify mixed gram matrix. - for i in range(len(Gn)): - km[i, len(Gn)] = km[i, idx1] - km[i, len(Gn) + 1] = km[i, idx2] - km[len(Gn), i] = km[i, idx1] - km[len(Gn) + 1, i] = km[i, idx2] - km[len(Gn), len(Gn)] = km[idx1, idx1] - km[len(Gn), len(Gn) + 1] = km[idx1, idx2] - km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] - km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] - - ################################################################### -# # use only the two graphs in median set as candidates. -# Gn = [g1.copy(), g2.copy()] -# Gn_mix = Gn + [g1.copy(), g2.copy()] -# # compute -# time0 = time.time() -# km = compute_kernel(Gn_mix, gkernel, True) -# time_km = time.time() - time0 + +############################################################################### +# tests on different numbers of median-sets. + +def test_preimage_iam_median_nb(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' - - time_list = [] - dis_ks_min_list = [] - sod_gs_list = [] - sod_gs_min_list = [] - nb_updated_list_iam = [] - nb_updated_list_random = [] - nb_updated_k_list_iam = [] - nb_updated_k_list_random = [] - g_best = [] - # for each alpha - for alpha in alpha_range: - print('\n-------------------------------------------------------\n') - print('alpha =', alpha) - time0 = time.time() - dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ - nb_updated_k_iam, nb_updated_k_random = \ - preimage_iam_random_mix(Gn, [g1, g2], - [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, - l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, - InitRandomWithAllDk=InitRandomWithAllDk, - params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, - 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, - 'removeNodes': removeNodes, 'connected': connected_iam}, - params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, - 'saveGXL': saveGXL}) - time_total = time.time() - time0 + time_km - print('time: ', time_total) - time_list.append(time_total) - dis_ks_min_list.append(dhat) - g_best.append(ghat_list) - nb_updated_list_iam.append(nb_updated_iam) - nb_updated_list_random.append(nb_updated_random) - nb_updated_k_list_iam.append(nb_updated_k_iam) - nb_updated_k_list_random.append(nb_updated_k_random) - - # show best graphs and save them to file. - for idx, item in enumerate(alpha_range): - print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) - print('one of the possible corresponding pre-images is') - nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), - with_labels=True) - plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) - + '_alpha' + str(item) + '.png', format="PNG") -# plt.show() - plt.clf() -# print(g_best[idx][0].nodes(data=True)) -# print(g_best[idx][0].edges(data=True)) - - # for g in g_best[idx]: - # draw_Letter_graph(g, savepath='results/gk_iam/') - ## nx.draw_networkx(g) - ## plt.show() - # print(g.nodes(data=True)) - # print(g.edges(data=True)) + lmbda = 0.03 # termination probalility + r_max = 10 # iteration limit for pre-image. +# alpha_range = np.linspace(0.5, 0.5, 1) + k = 5 # k nearest neighbors + epsilon = 1e-6 + InitIAMWithAllDk = True + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + # parameters for IAM function + c_ei=1 + c_er=1 + c_es=1 + ite_max_iam = 50 + epsilon_iam = 0.001 + removeNodes = True + connected_iam = False + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list = [] + nb_updated_k_list = [] + g_best = [] + for nb_median in nb_median_range: + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() - # compute the corresponding sod in graph space. - for idx, item in enumerate(alpha_range): - sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, - ged_method=ged_method, saveGXL=saveGXL) - sod_gs_list.append(sod_tmp) - sod_gs_min_list.append(np.min(sod_tmp)) + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] - print('\nsods in graph space: ', sod_gs_list) - print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) - print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) - print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) - print('\nnumber of updates of the best graph for each alpha by random generation: ', - nb_updated_list_random) - print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', - nb_updated_k_list_iam) - print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', - nb_updated_k_list_random) - print('\ntimes:', time_list) - nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] - nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] + ################################################################### + alpha_range = [1 / nb_median] * nb_median + time0 = time.time() + dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ + preimage_iam(Gn, Gn_median, + alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, + gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, + 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, + 'removeNodes': removeNodes, 'connected': connected_iam}, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) - str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ - % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) - with open('results/preimage_mix/nb_updates.txt', 'r+') as file: - content = file.read() - file.seek(0, 0) - file.write(str_fw + content) - - + time_total = time.time() - time0 + time_km + print('\ntime: ', time_total) + time_list.append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list.append(dhat) + g_best.append(ghat_list) + print('\nnumber of updates of the best graph: ', nb_updated) + nb_updated_list.append(nb_updated) + print('\nnumber of updates of k nearest graphs: ', nb_updated_k) + nb_updated_k_list.append(nb_updated_k) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), + with_labels=True) +# plt.show() + plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + + '.png', format="PNG") + plt.clf() +# print(ghat_list[0].nodes(data=True)) +# print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list.append(sod_tmp) + sod_gs_min_list.append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', + nb_updated_list) + print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', + nb_updated_k_list) + print('\ntimes:', time_list) + + + + + + +############################################################################### +# test on the combination of the two randomly chosen graphs. (the same as in the +# random pre-image paper.) def test_gkiam_2combination_all_pairs(): - from preimage_iam import preimage_iam, compute_kernel - from iam import median_distance ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) @@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs(): # compute the corresponding sod in graph space. for idx, item in enumerate(alpha_range): - sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, + sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) @@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs(): def test_gkiam_2combination(): - from gk_iam import gk_iam_nearest_multi, compute_kernel - from iam import median_distance + from gk_iam import gk_iam_nearest_multi ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', 'extra_params': {}} # node/edge symb Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) @@ -451,7 +578,7 @@ def test_gkiam_2combination(): # compute the corresponding sod in graph space. for idx, item in enumerate(alpha_range): - sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, + sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL) sod_gs_list.append(sod_tmp) sod_gs_min_list.append(np.min(sod_tmp)) @@ -463,148 +590,6 @@ def test_gkiam_2combination(): print('\ntimes:', time_list) - - -def test_random_preimage_2combination(): -# from gk_iam import compute_kernel - from preimage_random import preimage_random - ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', - 'extra_params': {}} # node/edge symb - Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) -# Gn = Gn[0:12] - remove_edges(Gn) - gkernel = 'marginalizedkernel' - -# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) -# print(dis_max, dis_min, dis_mean) - - lmbda = 0.03 # termination probalility - r_max = 10 # iteration limit for pre-image. - l = 500 - alpha_range = np.linspace(0, 1, 11) - k = 5 # k nearest neighbors - - # randomly select two molecules - np.random.seed(1) - idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) - g1 = Gn[idx_gi[0]].copy() - g2 = Gn[idx_gi[1]].copy() - -# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) -# plt.savefig("results/random_preimage/mutag10.png", format="PNG") -# plt.show() -# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) -# plt.savefig("results/random_preimage/mutag11.png", format="PNG") -# plt.show() - - ###################################################################### -# Gn_mix = [g.copy() for g in Gn] -# Gn_mix.append(g1.copy()) -# Gn_mix.append(g2.copy()) -# -## g_tmp = iam([g1, g2]) -## nx.draw_networkx(g_tmp) -## plt.show() -# -# # compute -# time0 = time.time() -# km = compute_kernel(Gn_mix, gkernel, True) -# time_km = time.time() - time0 - - ################################################################### - idx1 = idx_gi[0] - idx2 = idx_gi[1] - gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') - km = gmfile['gm'] - time_km = gmfile['gmtime'] - # modify mixed gram matrix. - for i in range(len(Gn)): - km[i, len(Gn)] = km[i, idx1] - km[i, len(Gn) + 1] = km[i, idx2] - km[len(Gn), i] = km[i, idx1] - km[len(Gn) + 1, i] = km[i, idx2] - km[len(Gn), len(Gn)] = km[idx1, idx1] - km[len(Gn), len(Gn) + 1] = km[idx1, idx2] - km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] - km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] - - ################################################################### - - time_list = [] - nb_updated_list = [] - g_best = [] - dis_ks_min_list = [] - # for each alpha - for alpha in alpha_range: - print('\n-------------------------------------------------------\n') - print('alpha =', alpha) - time0 = time.time() - dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], - range(len(Gn), len(Gn) + 2), km, - k, r_max, l, gkernel) - time_total = time.time() - time0 + time_km - print('time: ', time_total) - time_list.append(time_total) - dis_ks_min_list.append(dhat) - g_best.append(ghat) - nb_updated_list.append(nb_updated) - - # show best graphs and save them to file. - for idx, item in enumerate(alpha_range): - print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) - print('one of the possible corresponding pre-images is') - nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), - with_labels=True) - plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") - plt.show() - plt.clf() - print(g_best[idx].nodes(data=True)) - print(g_best[idx].edges(data=True)) - -# # compute the corresponding sod in graph space. (alpha range not considered.) -# sod_tmp, _ = median_distance(g_best[0], Gn_let) -# sod_gs_list.append(sod_tmp) -# sod_gs_min_list.append(np.min(sod_tmp)) -# sod_ks_min_list.append(sod_ks) -# nb_updated_list.append(nb_updated) - -# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) - print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) - print('\nnumber of updates for each alpha: ', nb_updated_list) - print('\ntimes:', time_list) - - - - -############################################################################### -# help functions - -def remove_edges(Gn): - for G in Gn: - for _, _, attrs in G.edges(data=True): - attrs.clear() - - -def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): - from gk_iam import compute_kernel - dis_mat = np.empty((len(Gn), len(Gn))) - if Kmatrix == None: - Kmatrix = compute_kernel(Gn, gkernel, True) - for i in range(len(Gn)): - for j in range(i, len(Gn)): - dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] - if dis < 0: - if dis > -1e-10: - dis = 0 - else: - raise ValueError('The distance is negative.') - dis_mat[i, j] = np.sqrt(dis) - dis_mat[j, i] = dis_mat[i, j] - dis_max = np.max(np.max(dis_mat)) - dis_min = np.min(np.min(dis_mat[dis_mat != 0])) - dis_mean = np.mean(np.mean(dis_mat)) - return dis_mat, dis_max, dis_min, dis_mean - ############################################################################### @@ -612,7 +597,13 @@ if __name__ == '__main__': ############################################################################### # test on the combination of the two randomly chosen graphs. (the same as in the # random pre-image paper.) -# test_random_preimage_2combination() # test_gkiam_2combination() # test_gkiam_2combination_all_pairs() - test_preimage_mix_2combination_all_pairs() \ No newline at end of file + +############################################################################### +# tests on different numbers of median-sets. + test_preimage_iam_median_nb() + +############################################################################### +# tests on different values on grid of median-sets and k. +# test_preimage_iam_grid_k_median_nb() \ No newline at end of file diff --git a/preimage/test_preimage_mix.py b/preimage/test_preimage_mix.py new file mode 100644 index 0000000..ab6f8b4 --- /dev/null +++ b/preimage/test_preimage_mix.py @@ -0,0 +1,542 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Sep 5 15:59:00 2019 + +@author: ljia +""" + +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +import time +import random +#from tqdm import tqdm + +#import os +import sys +sys.path.insert(0, "../") +from pygraph.utils.graphfiles import loadDataset +from ged import ged_median +from utils import compute_kernel, get_same_item_indices, remove_edges +from preimage_iam import preimage_iam_random_mix + +############################################################################### +# tests on different values on grid of median-sets and k. + +def test_preimage_mix_grid_k_median_nb(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + + lmbda = 0.03 # termination probalility + r_max = 5 # iteration limit for pre-image. + l_max = 500 # update limit for random generation +# alpha_range = np.linspace(0.5, 0.5, 1) +# k = 5 # k nearest neighbors + epsilon = 1e-6 + InitIAMWithAllDk = True + InitRandomWithAllDk = True + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + # parameters for IAM function + c_ei=1 + c_er=1 + c_es=1 + ite_max_iam = 50 + epsilon_iam = 0.001 + removeNodes = True + connected_iam = False + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + # number of nearest neighbors. + k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list_iam = [] + nb_updated_list_random = [] + nb_updated_k_list_iam = [] + nb_updated_k_list_random = [] + g_best = [] + for idx_nb, nb_median in enumerate(nb_median_range): + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + + time_list.append([]) + dis_ks_min_list.append([]) + sod_gs_list.append([]) + sod_gs_min_list.append([]) + nb_updated_list_iam.append([]) + nb_updated_list_random.append([]) + nb_updated_k_list_iam.append([]) + nb_updated_k_list_random.append([]) + g_best.append([]) + + for k in k_range: + print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') + print('k =', k) + time0 = time.time() + dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ + nb_updated_k_iam, nb_updated_k_random = \ + preimage_iam_random_mix(Gn, Gn_median, + alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, + l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + InitRandomWithAllDk=InitRandomWithAllDk, + params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, + 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, + 'removeNodes': removeNodes, 'connected': connected_iam}, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) + + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list[idx_nb].append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list[idx_nb].append(dhat) + g_best[idx_nb].append(ghat_list) + print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) + nb_updated_list_iam[idx_nb].append(nb_updated_iam) + print('\nnumber of updates of the best graph by random generation: ', + nb_updated_random) + nb_updated_list_random[idx_nb].append(nb_updated_random) + print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) + nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam) + print('\nnumber of updates of k nearest graphs by random generation: ', + nb_updated_k_random) + nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), + with_labels=True) + plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + + '_k' + str(k) + '.png', format="PNG") + # plt.show() + plt.clf() + # print(ghat_list[0].nodes(data=True)) + # print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list[idx_nb].append(sod_tmp) + sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs and k: ', + sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs and k: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', + nb_updated_list_iam) + print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', + nb_updated_list_random) + print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', + nb_updated_k_list_iam) + print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', + nb_updated_k_list_random) + print('\ntimes:', time_list) + + + + +############################################################################### +# tests on different numbers of median-sets. + +def test_preimage_mix_median_nb(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + + lmbda = 0.03 # termination probalility + r_max = 5 # iteration limit for pre-image. + l_max = 500 # update limit for random generation +# alpha_range = np.linspace(0.5, 0.5, 1) + k = 5 # k nearest neighbors + epsilon = 1e-6 + InitIAMWithAllDk = True + InitRandomWithAllDk = True + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + # parameters for IAM function + c_ei=1 + c_er=1 + c_es=1 + ite_max_iam = 50 + epsilon_iam = 0.001 + removeNodes = True + connected_iam = False + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list_iam = [] + nb_updated_list_random = [] + nb_updated_k_list_iam = [] + nb_updated_k_list_random = [] + g_best = [] + for nb_median in nb_median_range: + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + time0 = time.time() + dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ + nb_updated_k_iam, nb_updated_k_random = \ + preimage_iam_random_mix(Gn, Gn_median, + alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, + l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + InitRandomWithAllDk=InitRandomWithAllDk, + params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, + 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, + 'removeNodes': removeNodes, 'connected': connected_iam}, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) + + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list.append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list.append(dhat) + g_best.append(ghat_list) + print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) + nb_updated_list_iam.append(nb_updated_iam) + print('\nnumber of updates of the best graph by random generation: ', + nb_updated_random) + nb_updated_list_random.append(nb_updated_random) + print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) + nb_updated_k_list_iam.append(nb_updated_k_iam) + print('\nnumber of updates of k nearest graphs by random generation: ', + nb_updated_k_random) + nb_updated_k_list_random.append(nb_updated_k_random) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), + with_labels=True) + plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + + '.png', format="PNG") +# plt.show() + plt.clf() +# print(ghat_list[0].nodes(data=True)) +# print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list.append(sod_tmp) + sod_gs_min_list.append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', + nb_updated_list_iam) + print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', + nb_updated_list_random) + print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', + nb_updated_k_list_iam) + print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', + nb_updated_k_list_random) + print('\ntimes:', time_list) + + + +############################################################################### +# test on the combination of the two randomly chosen graphs. (the same as in the +# random pre-image paper.) + +def test_preimage_mix_2combination_all_pairs(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + + lmbda = 0.03 # termination probalility + r_max = 10 # iteration limit for pre-image. + l_max = 500 # update limit for random generation + alpha_range = np.linspace(0.5, 0.5, 1) + k = 5 # k nearest neighbors + epsilon = 1e-6 + InitIAMWithAllDk = True + InitRandomWithAllDk = True + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + # parameters for IAM function + c_ei=1 + c_er=1 + c_es=1 + ite_max_iam = 50 + epsilon_iam = 0.001 + removeNodes = True + connected_iam = False + + nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) + nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) + # test on each pair of graphs. +# for idx1 in range(len(Gn) - 1, -1, -1): +# for idx2 in range(idx1, -1, -1): + for idx1 in range(187, 188): + for idx2 in range(167, 168): + g1 = Gn[idx1].copy() + g2 = Gn[idx2].copy() + # Gn[10] = [] + # Gn[10] = [] + + nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) + plt.savefig("results/preimage_mix/mutag187.png", format="PNG") + plt.show() + plt.clf() + nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) + plt.savefig("results/preimage_mix/mutag167.png", format="PNG") + plt.show() + plt.clf() + + ################################################################### +# Gn_mix = [g.copy() for g in Gn] +# Gn_mix.append(g1.copy()) +# Gn_mix.append(g2.copy()) +# +# # compute +# time0 = time.time() +# km = compute_kernel(Gn_mix, gkernel, True) +# time_km = time.time() - time0 +# +# # write Gram matrix to file and read it. +# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') + km = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + for i in range(len(Gn)): + km[i, len(Gn)] = km[i, idx1] + km[i, len(Gn) + 1] = km[i, idx2] + km[len(Gn), i] = km[i, idx1] + km[len(Gn) + 1, i] = km[i, idx2] + km[len(Gn), len(Gn)] = km[idx1, idx1] + km[len(Gn), len(Gn) + 1] = km[idx1, idx2] + km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] + km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] + + ################################################################### +# # use only the two graphs in median set as candidates. +# Gn = [g1.copy(), g2.copy()] +# Gn_mix = Gn + [g1.copy(), g2.copy()] +# # compute +# time0 = time.time() +# km = compute_kernel(Gn_mix, gkernel, True) +# time_km = time.time() - time0 + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list_iam = [] + nb_updated_list_random = [] + nb_updated_k_list_iam = [] + nb_updated_k_list_random = [] + g_best = [] + # for each alpha + for alpha in alpha_range: + print('\n-------------------------------------------------------\n') + print('alpha =', alpha) + time0 = time.time() + dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ + nb_updated_k_iam, nb_updated_k_random = \ + preimage_iam_random_mix(Gn, [g1, g2], + [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, + l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, + InitRandomWithAllDk=InitRandomWithAllDk, + params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, + 'ite_max': ite_max_iam, 'epsilon': epsilon_iam, + 'removeNodes': removeNodes, 'connected': connected_iam}, + params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, + 'saveGXL': saveGXL}) + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list.append(time_total) + dis_ks_min_list.append(dhat) + g_best.append(ghat_list) + nb_updated_list_iam.append(nb_updated_iam) + nb_updated_list_random.append(nb_updated_random) + nb_updated_k_list_iam.append(nb_updated_k_iam) + nb_updated_k_list_random.append(nb_updated_k_random) + + # show best graphs and save them to file. + for idx, item in enumerate(alpha_range): + print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) + print('one of the possible corresponding pre-images is') + nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), + with_labels=True) + plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) + + '_alpha' + str(item) + '.png', format="PNG") +# plt.show() + plt.clf() +# print(g_best[idx][0].nodes(data=True)) +# print(g_best[idx][0].edges(data=True)) + + # for g in g_best[idx]: + # draw_Letter_graph(g, savepath='results/gk_iam/') + ## nx.draw_networkx(g) + ## plt.show() + # print(g.nodes(data=True)) + # print(g.edges(data=True)) + + # compute the corresponding sod in graph space. + for idx, item in enumerate(alpha_range): + sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list.append(sod_tmp) + sod_gs_min_list.append(np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) + print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) + print('\nnumber of updates of the best graph for each alpha by random generation: ', + nb_updated_list_random) + print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', + nb_updated_k_list_iam) + print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', + nb_updated_k_list_random) + print('\ntimes:', time_list) + nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] + nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] + + str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ + % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) + with open('results/preimage_mix/nb_updates.txt', 'r+') as file: + content = file.read() + file.seek(0, 0) + file.write(str_fw + content) + +############################################################################### + + +if __name__ == '__main__': +############################################################################### +# test on the combination of the two randomly chosen graphs. (the same as in the +# random pre-image paper.) +# test_preimage_mix_2combination_all_pairs() + +############################################################################### +# tests on different numbers of median-sets. +# test_preimage_mix_median_nb() + +############################################################################### +# tests on different values on grid of median-sets and k. + test_preimage_mix_grid_k_median_nb() \ No newline at end of file diff --git a/preimage/test_preimage_random.py b/preimage/test_preimage_random.py new file mode 100644 index 0000000..53d991b --- /dev/null +++ b/preimage/test_preimage_random.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Sep 5 15:59:00 2019 + +@author: ljia +""" + +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +import time +import random +#from tqdm import tqdm + +#import os +import sys +sys.path.insert(0, "../") +from pygraph.utils.graphfiles import loadDataset + +from preimage_random import preimage_random +from ged import ged_median +from utils import compute_kernel, get_same_item_indices, remove_edges + + +############################################################################### +# tests on different values on grid of median-sets and k. + +def test_preimage_random_grid_k_median_nb(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + + lmbda = 0.03 # termination probalility + r_max = 5 # iteration limit for pre-image. + l = 500 # update limit for random generation +# alpha_range = np.linspace(0.5, 0.5, 1) +# k = 5 # k nearest neighbors + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + # number of nearest neighbors. + k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list = [] + g_best = [] + for idx_nb, nb_median in enumerate(nb_median_range): + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + + time_list.append([]) + dis_ks_min_list.append([]) + sod_gs_list.append([]) + sod_gs_min_list.append([]) + nb_updated_list.append([]) + g_best.append([]) + + for k in k_range: + print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') + print('k =', k) + time0 = time.time() + dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, + range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) + + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list[idx_nb].append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list[idx_nb].append(dhat) + g_best[idx_nb].append(ghat) + print('\nnumber of updates of the best graph: ', nb_updated) + nb_updated_list[idx_nb].append(nb_updated) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), + with_labels=True) + plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + + '_k' + str(k) + '.png', format="PNG") + # plt.show() + plt.clf() + # print(ghat_list[0].nodes(data=True)) + # print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list[idx_nb].append(sod_tmp) + sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs and k: ', + sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs and k: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', + nb_updated_list) + print('\ntimes:', time_list) + + + + +############################################################################### +# tests on different numbers of median-sets. + +def test_preimage_random_median_nb(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:50] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + + lmbda = 0.03 # termination probalility + r_max = 5 # iteration limit for pre-image. + l = 500 # update limit for random generation +# alpha_range = np.linspace(0.5, 0.5, 1) + k = 5 # k nearest neighbors + # parameters for GED function + ged_cost='CHEM_1' + ged_method='IPFP' + saveGXL='gedlib' + + # number of graphs; we what to compute the median of these graphs. + nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] + + # find out all the graphs classified to positive group 1. + idx_dict = get_same_item_indices(y_all) + Gn = [Gn[i] for i in idx_dict[1]] + +# # compute Gram matrix. +# time0 = time.time() +# km = compute_kernel(Gn, gkernel, True) +# time_km = time.time() - time0 +# # write Gram matrix to file. +# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) + + + time_list = [] + dis_ks_min_list = [] + sod_gs_list = [] + sod_gs_min_list = [] + nb_updated_list = [] + g_best = [] + for nb_median in nb_median_range: + print('\n-------------------------------------------------------') + print('number of median graphs =', nb_median) + random.seed(1) + idx_rdm = random.sample(range(len(Gn)), nb_median) + print('graphs chosen:', idx_rdm) + Gn_median = [Gn[idx].copy() for idx in idx_rdm] + +# for g in Gn_median: +# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) +## plt.savefig("results/preimage_mix/mutag.png", format="PNG") +# plt.show() +# plt.clf() + + ################################################################### + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') + km_tmp = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + km[i, j] = km_tmp[i, j] + km[j, i] = km[i, j] + for i in range(len(Gn)): + for j, idx in enumerate(idx_rdm): + km[i, len(Gn) + j] = km[i, idx] + km[len(Gn) + j, i] = km[i, idx] + for i, idx1 in enumerate(idx_rdm): + for j, idx2 in enumerate(idx_rdm): + km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] + + ################################################################### + alpha_range = [1 / nb_median] * nb_median + time0 = time.time() + dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, + range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) + + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list.append(time_total) + print('\nsmallest distance in kernel space: ', dhat) + dis_ks_min_list.append(dhat) + g_best.append(ghat) + print('\nnumber of updates of the best graph: ', nb_updated) + nb_updated_list.append(nb_updated) + + # show the best graph and save it to file. + print('the shortest distance is', dhat) + print('one of the possible corresponding pre-images is') + nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), + with_labels=True) + plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + + '.png', format="PNG") +# plt.show() + plt.clf() +# print(ghat_list[0].nodes(data=True)) +# print(ghat_list[0].edges(data=True)) + + # compute the corresponding sod in graph space. + sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, + ged_method=ged_method, saveGXL=saveGXL) + sod_gs_list.append(sod_tmp) + sod_gs_min_list.append(np.min(sod_tmp)) + print('\nsmallest sod in graph space: ', np.min(sod_tmp)) + + print('\nsods in graph space: ', sod_gs_list) + print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each set of median graphs: ', + dis_ks_min_list) + print('\nnumber of updates of the best graph for each set of median graphs: ', + nb_updated_list) + print('\ntimes:', time_list) + + + +############################################################################### +# test on the combination of the two randomly chosen graphs. (the same as in the +# random pre-image paper.) + +def test_random_preimage_2combination(): + ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', + 'extra_params': {}} # node/edge symb + Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) +# Gn = Gn[0:12] + remove_edges(Gn) + gkernel = 'marginalizedkernel' + +# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) +# print(dis_max, dis_min, dis_mean) + + lmbda = 0.03 # termination probalility + r_max = 10 # iteration limit for pre-image. + l = 500 + alpha_range = np.linspace(0, 1, 11) + k = 5 # k nearest neighbors + + # randomly select two molecules + np.random.seed(1) + idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) + g1 = Gn[idx_gi[0]].copy() + g2 = Gn[idx_gi[1]].copy() + +# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) +# plt.savefig("results/random_preimage/mutag10.png", format="PNG") +# plt.show() +# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) +# plt.savefig("results/random_preimage/mutag11.png", format="PNG") +# plt.show() + + ###################################################################### +# Gn_mix = [g.copy() for g in Gn] +# Gn_mix.append(g1.copy()) +# Gn_mix.append(g2.copy()) +# +## g_tmp = iam([g1, g2]) +## nx.draw_networkx(g_tmp) +## plt.show() +# +# # compute +# time0 = time.time() +# km = compute_kernel(Gn_mix, gkernel, True) +# time_km = time.time() - time0 + + ################################################################### + idx1 = idx_gi[0] + idx2 = idx_gi[1] + gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') + km = gmfile['gm'] + time_km = gmfile['gmtime'] + # modify mixed gram matrix. + for i in range(len(Gn)): + km[i, len(Gn)] = km[i, idx1] + km[i, len(Gn) + 1] = km[i, idx2] + km[len(Gn), i] = km[i, idx1] + km[len(Gn) + 1, i] = km[i, idx2] + km[len(Gn), len(Gn)] = km[idx1, idx1] + km[len(Gn), len(Gn) + 1] = km[idx1, idx2] + km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] + km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] + + ################################################################### + + time_list = [] + nb_updated_list = [] + g_best = [] + dis_ks_min_list = [] + # for each alpha + for alpha in alpha_range: + print('\n-------------------------------------------------------\n') + print('alpha =', alpha) + time0 = time.time() + dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], + range(len(Gn), len(Gn) + 2), km, + k, r_max, l, gkernel) + time_total = time.time() - time0 + time_km + print('time: ', time_total) + time_list.append(time_total) + dis_ks_min_list.append(dhat) + g_best.append(ghat) + nb_updated_list.append(nb_updated) + + # show best graphs and save them to file. + for idx, item in enumerate(alpha_range): + print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) + print('one of the possible corresponding pre-images is') + nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), + with_labels=True) + plt.show() + plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") + plt.clf() + print(g_best[idx].nodes(data=True)) + print(g_best[idx].edges(data=True)) + +# # compute the corresponding sod in graph space. (alpha range not considered.) +# sod_tmp, _ = median_distance(g_best[0], Gn_let) +# sod_gs_list.append(sod_tmp) +# sod_gs_min_list.append(np.min(sod_tmp)) +# sod_ks_min_list.append(sod_ks) +# nb_updated_list.append(nb_updated) + +# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) + print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) + print('\nnumber of updates for each alpha: ', nb_updated_list) + print('\ntimes:', time_list) + +############################################################################### + + +if __name__ == '__main__': +############################################################################### +# test on the combination of the two randomly chosen graphs. (the same as in the +# random pre-image paper.) +# test_random_preimage_2combination() + +############################################################################### +# tests all algorithms on different numbers of median-sets. + test_preimage_random_median_nb() + +############################################################################### +# tests all algorithms on different values on grid of median-sets and k. +# test_preimage_random_grid_k_median_nb() \ No newline at end of file diff --git a/preimage/utils.py b/preimage/utils.py new file mode 100644 index 0000000..58431e3 --- /dev/null +++ b/preimage/utils.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Oct 17 19:05:07 2019 + +Useful functions. +@author: ljia +""" +#import networkx as nx + +import multiprocessing +import numpy as np + +import sys +sys.path.insert(0, "../") +from pygraph.kernels.marginalizedKernel import marginalizedkernel +from pygraph.kernels.untilHPathKernel import untilhpathkernel +from pygraph.kernels.spKernel import spkernel +import functools +from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct +from pygraph.kernels.structuralspKernel import structuralspkernel + + +def remove_edges(Gn): + for G in Gn: + for _, _, attrs in G.edges(data=True): + attrs.clear() + +def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): + term1 = Kmatrix[idx_g, idx_g] + term2 = 0 + for i, a in enumerate(alpha): + term2 += a * Kmatrix[idx_g, idx_gi[i]] + term2 *= 2 + if withterm3 == False: + for i1, a1 in enumerate(alpha): + for i2, a2 in enumerate(alpha): + term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] + return np.sqrt(term1 - term2 + term3) + + +def compute_kernel(Gn, graph_kernel, verbose): + if graph_kernel == 'marginalizedkernel': + Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, + p_quit=0.03, n_iteration=10, remove_totters=False, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'untilhpathkernel': + Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, + depth=10, k_func='MinMax', compute_method='trie', + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'spkernel': + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= + {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + elif graph_kernel == 'structuralspkernel': + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) + Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= + {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, + n_jobs=multiprocessing.cpu_count(), verbose=verbose) + + # normalization + Kmatrix_diag = Kmatrix.diagonal().copy() + for i in range(len(Kmatrix)): + for j in range(i, len(Kmatrix)): + Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) + Kmatrix[j][i] = Kmatrix[i][j] + return Kmatrix + + +def gram2distances(Kmatrix): + dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) + for i1 in range(len(Kmatrix)): + for i2 in range(len(Kmatrix)): + dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] + dmatrix = np.sqrt(dmatrix) + return dmatrix + + +def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): + dis_mat = np.empty((len(Gn), len(Gn))) + if Kmatrix == None: + Kmatrix = compute_kernel(Gn, gkernel, True) + for i in range(len(Gn)): + for j in range(i, len(Gn)): + dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] + if dis < 0: + if dis > -1e-10: + dis = 0 + else: + raise ValueError('The distance is negative.') + dis_mat[i, j] = np.sqrt(dis) + dis_mat[j, i] = dis_mat[i, j] + dis_max = np.max(np.max(dis_mat)) + dis_min = np.min(np.min(dis_mat[dis_mat != 0])) + dis_mean = np.mean(np.mean(dis_mat)) + return dis_mat, dis_max, dis_min, dis_mean + + +def get_same_item_indices(ls): + """Get the indices of the same items in a list. Return a dict keyed by items. + """ + idx_dict = {} + for idx, item in enumerate(ls): + if item in idx_dict: + idx_dict[item].append(idx) + else: + idx_dict[item] = [idx] + return idx_dict \ No newline at end of file