Browse Source

Update pre-image.

v0.1
jajupmochi 5 years ago
parent
commit
dd810b92da
12 changed files with 1856 additions and 539 deletions
  1. +103
    -0
      preimage/fitDistance.py
  2. +197
    -0
      preimage/ged.py
  3. +3
    -93
      preimage/iam.py
  4. +19
    -79
      preimage/preimage_iam.py
  5. +11
    -40
      preimage/preimage_random.py
  6. +1
    -0
      preimage/test.py
  7. +167
    -0
      preimage/test_iam.py
  8. +13
    -29
      preimage/test_others.py
  9. +289
    -298
      preimage/test_preimage_iam.py
  10. +542
    -0
      preimage/test_preimage_mix.py
  11. +402
    -0
      preimage/test_preimage_random.py
  12. +109
    -0
      preimage/utils.py

+ 103
- 0
preimage/fitDistance.py View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 16 14:20:06 2019

@author: ljia
"""
import numpy as np
from tqdm import tqdm

import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from ged import GED, get_nb_edit_operations
from utils import kernel_distance_matrix

def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
c_vi = 1
c_vr = 1
c_vs = 1
c_ei = 1
c_er = 1
c_es = 1
# compute distances in feature space.
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
dis_k_vec = []
for i in range(len(dis_k_mat)):
for j in range(i, len(dis_k_mat)):
dis_k_vec.append(dis_k_mat[i, j])
dis_k_vec = np.array(dis_k_vec)
residual_list = []
edit_cost_list = []
for itr in range(itr_max):
print('iteration', itr)
ged_all = []
n_vi_all = []
n_vr_all = []
n_vs_all = []
n_ei_all = []
n_er_all = []
n_es_all = []
# compute GEDs and numbers of edit operations.
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
edit_cost_list.append(edit_cost_constant)
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
# for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
cost='CONSTANT', method='IPFP',
edit_cost_constant=edit_cost_constant, stabilizer='min',
repeat=30)
ged_all.append(dis)
n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i],
Gn[j], pi_forward, pi_backward)
n_vi_all.append(n_vi)
n_vr_all.append(n_vr)
n_vs_all.append(n_vs)
n_ei_all.append(n_ei)
n_er_all.append(n_er)
n_es_all.append(n_es)
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
residual_list.append(residual)
# "fit" geds to distances in feature space by tuning edit costs using the
# Least Squares Method.
nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all),
np.array(n_vs_all), np.array(n_ei_all),
np.array(n_er_all), np.array(n_es_all)))
edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
rcond=None)
for i in range(len(edit_costs)):
if edit_costs[i] < 0:
if edit_costs[i] > -1e-3:
edit_costs[i] = 0
# else:
# raise ValueError('The edit cost is negative.')
c_vi = edit_costs[0]
c_vr = edit_costs[1]
c_vs = edit_costs[2]
c_ei = edit_costs[3]
c_er = edit_costs[4]
c_es = edit_costs[5]
return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list



if __name__ == '__main__':
from utils import remove_edges
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
Gn = Gn[0:10]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
itr_max = 10
c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)

+ 197
- 0
preimage/ged.py View File

@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 17 18:44:59 2019

@author: ljia
"""
import numpy as np
import networkx as nx
from tqdm import tqdm
import sys

from gedlibpy import librariesImport, gedlibpy

def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50):
"""
Compute GED for 2 graphs.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(repeat):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP',
edit_cost_constant=[], stabilizer='min', repeat=50):
"""
Compute GEDs for a group of graphs.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(repeat):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def ged_median(Gn, Gn_median, measure='ged', verbose=False,
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
dis_list = []
pi_forward_list = []
for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
file=sys.stdout) if verbose else enumerate(Gn):
dis_sum = 0
pi_forward_list.append([])
for G_p in Gn_median:
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
cost=ged_cost, method=ged_method, saveGXL=saveGXL)
pi_forward_list[idx].append(pi_tmp_forward)
dis_sum += dis_tmp
dis_list.append(dis_sum)
return dis_list, pi_forward_list


def get_nb_edit_operations(g1, g2, forward_map, backward_map):
"""Compute the number of each edit operations.
"""
n_vi = 0
n_vr = 0
n_vs = 0
n_ei = 0
n_er = 0
n_es = 0
nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map):
if map_i == np.inf:
n_vr += 1
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
n_vs += 1
for map_i in backward_map:
if map_i == np.inf:
n_vi += 1
# idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()]
nb_edges2_cnted = 0
for n1, n2 in edges1:
idx1 = nodes1.index(n1)
idx2 = nodes1.index(n2)
# one of the nodes is removed, thus the edge is removed.
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
n_er += 1
# corresponding edge is in g2. Edge label is not considered.
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
(forward_map[idx2], forward_map[idx1]) in g2.edges():
nb_edges2_cnted += 1
# corresponding nodes are in g2, however the edge is removed.
else:
n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, n_ei, n_er, n_es

+ 3
- 93
preimage/iam.py View File

@@ -12,10 +12,10 @@ import networkx as nx
from tqdm import tqdm from tqdm import tqdm


import sys import sys
from gedlibpy import librariesImport, gedlibpy
sys.path.insert(0, "../") sys.path.insert(0, "../")
from pygraph.utils.graphdataset import get_dataset_attributes from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
from ged import GED, ged_median




def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
@@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
# # find the best graph generated in this iteration and update pi_p. # # find the best graph generated in this iteration and update pi_p.
# @todo: should we update all graphs generated or just the best ones? # @todo: should we update all graphs generated or just the best ones?
dis_list, pi_forward_list = median_distance(G_new_list, Gn_median,
dis_list, pi_forward_list = ged_median(G_new_list, Gn_median,
**params_ged) **params_ged)
# @todo: should we remove the identical and connectivity check? # @todo: should we remove the identical and connectivity check?
# Don't know which is faster. # Don't know which is faster.
@@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
# phase 1: initilize. # phase 1: initilize.
# compute set-median. # compute set-median.
dis_min = np.inf dis_min = np.inf
dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median,
dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
**params_ged) **params_ged)
# find all smallest distances. # find all smallest distances.
if allBestInit: # try all best init graphs. if allBestInit: # try all best init graphs.
@@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,













###############################################################################
# Useful functions.

def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit',
stabilizer='min'):
"""
Compute GED.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(50):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def median_distance(Gn, Gn_median, measure='ged', verbose=False,
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
dis_list = []
pi_forward_list = []
for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
file=sys.stdout) if verbose else enumerate(Gn):
dis_sum = 0
pi_forward_list.append([])
for G_p in Gn_median:
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
cost=ged_cost, method=ged_method, saveGXL=saveGXL)
pi_forward_list[idx].append(pi_tmp_forward)
dis_sum += dis_tmp
dis_list.append(dis_sum)
return dis_list, pi_forward_list


############################################################################### ###############################################################################
# Old implementations. # Old implementations.


+ 19
- 79
preimage/preimage_iam.py View File

@@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2].
""" """
import sys import sys
import numpy as np import numpy as np
import multiprocessing
from tqdm import tqdm from tqdm import tqdm
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import random import random


from iam import iam_upgraded from iam import iam_upgraded
sys.path.insert(0, "../")
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel
from utils import dis_gstar, compute_kernel




def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
@@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
# print(g.nodes(data=True)) # print(g.nodes(data=True))
# print(g.edges(data=True)) # print(g.edges(data=True))
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
for gi in Gk:
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
# nx.draw_networkx(gi)
plt.show()
# draw_Letter_graph(g)
print(gi.nodes(data=True))
print(gi.edges(data=True))
# for gi in Gk:
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
## nx.draw_networkx(gi)
# plt.show()
## draw_Letter_graph(g)
# print(gi.nodes(data=True))
# print(gi.edges(data=True))
# i = 1 # i = 1
r = 0 r = 0
@@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
print('\nthe k shortest distances are', dis_k) print('\nthe k shortest distances are', dis_k)
print('the shortest distances for previous iterations are', dis_of_each_itr) print('the shortest distances for previous iterations are', dis_of_each_itr)
print('\nthe graph is updated', nb_updated, 'times.')
print('\n\nthe graph is updated', nb_updated, 'times.')
print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
print('distances in kernel space:', dis_of_each_itr, '\n') print('distances in kernel space:', dis_of_each_itr, '\n')
@@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
# print(g.nodes(data=True)) # print(g.nodes(data=True))
# print(g.edges(data=True)) # print(g.edges(data=True))
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
for gi in Gk:
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
# nx.draw_networkx(gi)
plt.show()
# draw_Letter_graph(g)
print(gi.nodes(data=True))
print(gi.edges(data=True))
# for gi in Gk:
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
## nx.draw_networkx(gi)
# plt.show()
## draw_Letter_graph(g)
# print(gi.nodes(data=True))
# print(gi.edges(data=True))
r = 0 r = 0
itr_total = 0 itr_total = 0
@@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
# compute distance between \psi and the new generated graph. # compute distance between \psi and the new generated graph.
knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1),
alpha, knew, withterm3=False)
# @todo: the new distance is smaller or also equal? # @todo: the new distance is smaller or also equal?
if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
# check if the new distance is the same as one in D_k. # check if the new distance is the same as one in D_k.
@@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
print('\nthe k shortest distances are', dis_k) print('\nthe k shortest distances are', dis_k)
print('the shortest distances for previous iterations are', dis_of_each_itr) print('the shortest distances for previous iterations are', dis_of_each_itr)
print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
nb_updated_random, 'times.') nb_updated_random, 'times.')
print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam,
'times, and by random generation', nb_updated_k_random, 'times.') 'times, and by random generation', nb_updated_k_random, 'times.')
@@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max




############################################################################### ###############################################################################
# useful functions.

def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g]
term2 = 0
for i, a in enumerate(alpha):
term2 += a * Kmatrix[idx_g, idx_gi[i]]
term2 *= 2
if withterm3 == False:
for i1, a1 in enumerate(alpha):
for i2, a2 in enumerate(alpha):
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
return np.sqrt(term1 - term2 + term3)


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix

def gram2distances(Kmatrix):
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
for i1 in range(len(Kmatrix)):
for i2 in range(len(Kmatrix)):
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
dmatrix = np.sqrt(dmatrix)
return dmatrix


###############################################################################
# Old implementations. # Old implementations.
#def gk_iam(Gn, alpha): #def gk_iam(Gn, alpha):


+ 11
- 40
preimage/preimage_random.py View File

@@ -10,51 +10,14 @@ pre-image
import sys import sys
import numpy as np import numpy as np
import random import random
import multiprocessing
from tqdm import tqdm from tqdm import tqdm
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt




sys.path.insert(0, "../") sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel


from gk_iam import dis_gstar


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix
from utils import compute_kernel, dis_gstar




def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
@@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
r = 0 r = 0
# sod_list = [dhat] # sod_list = [dhat]
# found = False # found = False
dis_of_each_itr = [dhat]
nb_updated = 0 nb_updated = 0
g_best = [] g_best = []
while r < r_max: while r < r_max:
@@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
# p_quit=lmbda, n_iteration=20, remove_totters=False, # p_quit=lmbda, n_iteration=20, remove_totters=False,
# n_jobs=multiprocessing.cpu_count(), verbose=False) # n_jobs=multiprocessing.cpu_count(), verbose=False)
knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew,
withterm3=False)
if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew <= dhat: # @todo: the new distance is smaller or also equal?
if dnew < dhat: if dnew < dhat:
print('\nI am smaller!') print('\nI am smaller!')
@@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
dihat_list = [dhat] dihat_list = [dhat]
else: else:
r += 1 r += 1
dis_of_each_itr.append(dhat)
print('the shortest distances for previous iterations are', dis_of_each_itr)
# dis_best.append(dhat) # dis_best.append(dhat)
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
print('distances in kernel space:', dis_of_each_itr, '\n')
return dhat, g_best, nb_updated return dhat, g_best, nb_updated
# return 0, 0, 0 # return 0, 0, 0




if __name__ == '__main__': if __name__ == '__main__':
from pygraph.utils.graphfiles import loadDataset
# ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
# 'extra_params': {}} # node/edge symb # 'extra_params': {}} # node/edge symb


+ 1
- 0
preimage/test.py View File

@@ -80,5 +80,6 @@ def testNxGrapĥ():
print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))


#test()
init() init()
#testNxGrapĥ() #testNxGrapĥ()

+ 167
- 0
preimage/test_iam.py View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from iam import iam_upgraded
from utils import remove_edges, compute_kernel, get_same_item_indices
from ged import ged_median

###############################################################################
# tests on different numbers of median-sets.

def test_iam_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# lmbda = 0.03 # termination probalility
# r_max = 10 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
# epsilon = 1e-6
# InitIAMWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = False
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
Gn_candidate = [g.copy() for g in Gn_median]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate,
c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
epsilon=epsilon_iam, removeNodes=removeNodes,
connected=connected_iam,
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0
print('\ntime: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
nb_updated_k_list.append(nb_updated_k)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.show()
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)
###############################################################################

if __name__ == '__main__':
###############################################################################
# tests on different numbers of median-sets.
test_iam_median_nb()

preimage/run_gk_iam.py → preimage/test_others.py View File

@@ -15,6 +15,9 @@ import sys
sys.path.insert(0, "../") sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset from pygraph.utils.graphfiles import loadDataset
from median import draw_Letter_graph from median import draw_Letter_graph
from ged import GED, ged_median
from utils import get_same_item_indices, compute_kernel, gram2distances, \
dis_gstar, remove_edges




# --------------------------- These are tests --------------------------------# # --------------------------- These are tests --------------------------------#
@@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn):




def test_who_is_the_closest_in_GED_space(Gn): def test_who_is_the_closest_in_GED_space(Gn):
from iam import GED
idx_gi = [0, 6] idx_gi = [0, 6]
g1 = Gn[idx_gi[0]] g1 = Gn[idx_gi[0]]
g2 = Gn[idx_gi[1]] g2 = Gn[idx_gi[1]]
@@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn):
def test_the_simple_two(Gn, gkernel): def test_the_simple_two(Gn, gkernel):
from gk_iam import gk_iam_nearest_multi, compute_kernel
from gk_iam import gk_iam_nearest_multi
lmbda = 0.03 # termination probalility lmbda = 0.03 # termination probalility
r_max = 10 # recursions r_max = 10 # recursions
l = 500 l = 500
@@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel):
def test_remove_bests(Gn, gkernel): def test_remove_bests(Gn, gkernel):
from gk_iam import gk_iam_nearest_multi, compute_kernel
from gk_iam import gk_iam_nearest_multi
lmbda = 0.03 # termination probalility lmbda = 0.03 # termination probalility
r_max = 10 # recursions r_max = 10 # recursions
l = 500 l = 500
@@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel):
# Tests on dataset Letter-H. # Tests on dataset Letter-H.
def test_gkiam_letter_h(): def test_gkiam_letter_h():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb 'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -305,7 +306,7 @@ def test_gkiam_letter_h():
print(g.edges(data=True)) print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.) # compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER',
sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER',
ged_method='IPFP', saveGXL='gedlib-letter') ged_method='IPFP', saveGXL='gedlib-letter')
sod_gs_list.append(sod_tmp) sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp)) sod_gs_min_list.append(np.min(sod_tmp))
@@ -318,19 +319,6 @@ def test_gkiam_letter_h():
print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
print('\nnumber of updates for each letter: ', nb_updated_list) print('\nnumber of updates for each letter: ', nb_updated_list)
print('\ntimes:', time_list) print('\ntimes:', time_list)
def get_same_item_indices(ls):
"""Get the indices of the same items in a list. Return a dict keyed by items.
"""
idx_dict = {}
for idx, item in enumerate(ls):
if item in idx_dict:
idx_dict[item].append(idx)
else:
idx_dict[item] = [idx]
return idx_dict



#def compute_letter_median_by_average(Gn): #def compute_letter_median_by_average(Gn):
# return g_median # return g_median
@@ -338,7 +326,6 @@ def get_same_item_indices(ls):


def test_iam_letter_h(): def test_iam_letter_h():
from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
from gk_iam import dis_gstar, compute_kernel
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb 'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -402,7 +389,7 @@ def test_iam_letter_h():
def test_random_preimage_letter_h(): def test_random_preimage_letter_h():
from preimage_random import preimage_random, compute_kernel
from preimage_random import preimage_random
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb 'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -463,7 +450,7 @@ def test_random_preimage_letter_h():
print(g.edges(data=True)) print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.) # compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let)
sod_tmp, _ = ged_median(g_best[0], Gn_let)
sod_list.append(sod_tmp) sod_list.append(sod_tmp)
sod_min_list.append(np.min(sod_tmp)) sod_min_list.append(np.min(sod_tmp))
@@ -479,8 +466,7 @@ def test_random_preimage_letter_h():
def test_gkiam_mutag(): def test_gkiam_mutag():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb 'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -535,7 +521,7 @@ def test_gkiam_mutag():
print(g.edges(data=True)) print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.) # compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let)
sod_tmp, _ = ged_median(g_best[0], Gn_let)
sod_gs_list.append(sod_tmp) sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp)) sod_gs_min_list.append(np.min(sod_tmp))
sod_ks_min_list.append(sod_ks) sod_ks_min_list.append(sod_ks)
@@ -553,9 +539,7 @@ def test_gkiam_mutag():
# Re-test. # Re-test.
def retest_the_simple_two(): def retest_the_simple_two():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from test_random_mutag import remove_edges
from gk_iam import gk_iam_nearest_multi
# The two simple graphs. # The two simple graphs.
# g1 = nx.Graph(name='haha') # g1 = nx.Graph(name='haha')
@@ -653,7 +637,7 @@ def retest_the_simple_two():
# compute the corresponding sod in graph space. # compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range): for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL) ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp) sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp)) sod_gs_min_list.append(np.min(sod_tmp))

preimage/test_random_mutag.py → preimage/test_preimage_iam.py View File

@@ -10,20 +10,23 @@ import numpy as np
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import time import time
from tqdm import tqdm
import random
#from tqdm import tqdm


import os
#import os
import sys import sys
sys.path.insert(0, "../") sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset from pygraph.utils.graphfiles import loadDataset
from utils import remove_edges, compute_kernel, get_same_item_indices
from ged import ged_median

from preimage_iam import preimage_iam



############################################################################### ###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# tests on different values on grid of median-sets and k.


def test_preimage_mix_2combination_all_pairs():
from preimage_iam import preimage_iam_random_mix, compute_kernel
from iam import median_distance
def test_preimage_iam_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb 'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs():
gkernel = 'marginalizedkernel' gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
r_max = 5 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
epsilon = 1e-6 epsilon = 1e-6
InitIAMWithAllDk = True InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function # parameters for GED function
ged_cost='CHEM_1' ged_cost='CHEM_1'
ged_method='IPFP' ged_method='IPFP'
@@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs():
removeNodes = True removeNodes = True
connected_iam = False connected_iam = False
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
# test on each pair of graphs.
# for idx1 in range(len(Gn) - 1, -1, -1):
# for idx2 in range(idx1, -1, -1):
for idx1 in range(187, 188):
for idx2 in range(167, 168):
g1 = Gn[idx1].copy()
g2 = Gn[idx2].copy()
# Gn[10] = []
# Gn[10] = []
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list.append([])
nb_updated_k_list.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
preimage_iam(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated)
nb_updated_list[idx_nb].append(nb_updated)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
nb_updated_k_list[idx_nb].append(nb_updated_k)
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
plt.show()
plt.clf()
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
plt.show()
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf() plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)


###################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
#
# # write Gram matrix to file and read it.
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################
# # use only the two graphs in median set as candidates.
# Gn = [g1.copy(), g2.copy()]
# Gn_mix = Gn + [g1.copy(), g2.copy()]
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0

###############################################################################
# tests on different numbers of median-sets.

def test_preimage_iam_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, [g1, g2],
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
nb_updated_list_iam.append(nb_updated_iam)
nb_updated_list_random.append(nb_updated_random)
nb_updated_k_list_iam.append(nb_updated_k_iam)
nb_updated_k_list_random.append(nb_updated_k_random)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2)
+ '_alpha' + str(item) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(g_best[idx][0].nodes(data=True))
# print(g_best[idx][0].edges(data=True))
# for g in g_best[idx]:
# draw_Letter_graph(g, savepath='results/gk_iam/')
## nx.draw_networkx(g)
## plt.show()
# print(g.nodes(data=True))
# print(g.edges(data=True))
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
print('\nnumber of updates of the best graph for each alpha by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
preimage_iam(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
content = file.read()
file.seek(0, 0)
file.write(str_fw + content)
time_total = time.time() - time0 + time_km
print('\ntime: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
nb_updated_k_list.append(nb_updated_k)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
# plt.show()
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)


def test_gkiam_2combination_all_pairs(): def test_gkiam_2combination_all_pairs():
from preimage_iam import preimage_iam, compute_kernel
from iam import median_distance
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb 'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs():
# compute the corresponding sod in graph space. # compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range): for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL) ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp) sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp)) sod_gs_min_list.append(np.min(sod_tmp))
@@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs():


def test_gkiam_2combination(): def test_gkiam_2combination():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb 'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -451,7 +578,7 @@ def test_gkiam_2combination():
# compute the corresponding sod in graph space. # compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range): for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL) ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp) sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp)) sod_gs_min_list.append(np.min(sod_tmp))
@@ -463,148 +590,6 @@ def test_gkiam_2combination():
print('\ntimes:', time_list) print('\ntimes:', time_list)
def test_random_preimage_2combination():
# from gk_iam import compute_kernel
from preimage_random import preimage_random
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:12]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
# print(dis_max, dis_min, dis_mean)
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l = 500
alpha_range = np.linspace(0, 1, 11)
k = 5 # k nearest neighbors
# randomly select two molecules
np.random.seed(1)
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
g1 = Gn[idx_gi[0]].copy()
g2 = Gn[idx_gi[1]].copy()
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag10.png", format="PNG")
# plt.show()
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag11.png", format="PNG")
# plt.show()
######################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
## g_tmp = iam([g1, g2])
## nx.draw_networkx(g_tmp)
## plt.show()
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
###################################################################
idx1 = idx_gi[0]
idx2 = idx_gi[1]
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################

time_list = []
nb_updated_list = []
g_best = []
dis_ks_min_list = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha],
range(len(Gn), len(Gn) + 2), km,
k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
nb_updated_list.append(nb_updated)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'),
with_labels=True)
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
plt.show()
plt.clf()
print(g_best[idx].nodes(data=True))
print(g_best[idx].edges(data=True))
# # compute the corresponding sod in graph space. (alpha range not considered.)
# sod_tmp, _ = median_distance(g_best[0], Gn_let)
# sod_gs_list.append(sod_tmp)
# sod_gs_min_list.append(np.min(sod_tmp))
# sod_ks_min_list.append(sod_ks)
# nb_updated_list.append(nb_updated)
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates for each alpha: ', nb_updated_list)
print('\ntimes:', time_list)

###############################################################################
# help functions

def remove_edges(Gn):
for G in Gn:
for _, _, attrs in G.edges(data=True):
attrs.clear()
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
from gk_iam import compute_kernel
dis_mat = np.empty((len(Gn), len(Gn)))
if Kmatrix == None:
Kmatrix = compute_kernel(Gn, gkernel, True)
for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
if dis < 0:
if dis > -1e-10:
dis = 0
else:
raise ValueError('The distance is negative.')
dis_mat[i, j] = np.sqrt(dis)
dis_mat[j, i] = dis_mat[i, j]
dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean
############################################################################### ###############################################################################


@@ -612,7 +597,13 @@ if __name__ == '__main__':
############################################################################### ###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the # test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.) # random pre-image paper.)
# test_random_preimage_2combination()
# test_gkiam_2combination() # test_gkiam_2combination()
# test_gkiam_2combination_all_pairs() # test_gkiam_2combination_all_pairs()
test_preimage_mix_2combination_all_pairs()
###############################################################################
# tests on different numbers of median-sets.
test_preimage_iam_median_nb()
###############################################################################
# tests on different values on grid of median-sets and k.
# test_preimage_iam_grid_k_median_nb()

+ 542
- 0
preimage/test_preimage_mix.py View File

@@ -0,0 +1,542 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges
from preimage_iam import preimage_iam_random_mix

###############################################################################
# tests on different values on grid of median-sets and k.

def test_preimage_mix_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list_iam.append([])
nb_updated_list_random.append([])
nb_updated_k_list_iam.append([])
nb_updated_k_list_random.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
nb_updated_list_iam[idx_nb].append(nb_updated_iam)
print('\nnumber of updates of the best graph by random generation: ',
nb_updated_random)
nb_updated_list_random[idx_nb].append(nb_updated_random)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
print('\nnumber of updates of k nearest graphs by random generation: ',
nb_updated_k_random)
nb_updated_k_list_random[idx_nb].append(nb_updated_k_random)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list_iam)
print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)


###############################################################################
# tests on different numbers of median-sets.

def test_preimage_mix_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
nb_updated_list_iam.append(nb_updated_iam)
print('\nnumber of updates of the best graph by random generation: ',
nb_updated_random)
nb_updated_list_random.append(nb_updated_random)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
nb_updated_k_list_iam.append(nb_updated_k_iam)
print('\nnumber of updates of k nearest graphs by random generation: ',
nb_updated_k_random)
nb_updated_k_list_random.append(nb_updated_k_random)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list_iam)
print('\nnumber of updates of the best graph for each set of median graphs by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)

def test_preimage_mix_2combination_all_pairs():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
# test on each pair of graphs.
# for idx1 in range(len(Gn) - 1, -1, -1):
# for idx2 in range(idx1, -1, -1):
for idx1 in range(187, 188):
for idx2 in range(167, 168):
g1 = Gn[idx1].copy()
g2 = Gn[idx2].copy()
# Gn[10] = []
# Gn[10] = []
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
plt.show()
plt.clf()
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
plt.show()
plt.clf()

###################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
#
# # write Gram matrix to file and read it.
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################
# # use only the two graphs in median set as candidates.
# Gn = [g1.copy(), g2.copy()]
# Gn_mix = Gn + [g1.copy(), g2.copy()]
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, [g1, g2],
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
nb_updated_list_iam.append(nb_updated_iam)
nb_updated_list_random.append(nb_updated_random)
nb_updated_k_list_iam.append(nb_updated_k_iam)
nb_updated_k_list_random.append(nb_updated_k_random)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2)
+ '_alpha' + str(item) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(g_best[idx][0].nodes(data=True))
# print(g_best[idx][0].edges(data=True))
# for g in g_best[idx]:
# draw_Letter_graph(g, savepath='results/gk_iam/')
## nx.draw_networkx(g)
## plt.show()
# print(g.nodes(data=True))
# print(g.edges(data=True))
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
print('\nnumber of updates of the best graph for each alpha by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
content = file.read()
file.seek(0, 0)
file.write(str_fw + content)
###############################################################################

if __name__ == '__main__':
###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# test_preimage_mix_2combination_all_pairs()
###############################################################################
# tests on different numbers of median-sets.
# test_preimage_mix_median_nb()
###############################################################################
# tests on different values on grid of median-sets and k.
test_preimage_mix_grid_k_median_nb()

+ 402
- 0
preimage/test_preimage_random.py View File

@@ -0,0 +1,402 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset

from preimage_random import preimage_random
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges


###############################################################################
# tests on different values on grid of median-sets and k.

def test_preimage_random_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list[idx_nb].append(nb_updated)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
with_labels=True)
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list)
print('\ntimes:', time_list)



###############################################################################
# tests on different numbers of median-sets.

def test_preimage_random_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
with_labels=True)
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs: ',
nb_updated_list)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
def test_random_preimage_2combination():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:12]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
# print(dis_max, dis_min, dis_mean)
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l = 500
alpha_range = np.linspace(0, 1, 11)
k = 5 # k nearest neighbors
# randomly select two molecules
np.random.seed(1)
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
g1 = Gn[idx_gi[0]].copy()
g2 = Gn[idx_gi[1]].copy()
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag10.png", format="PNG")
# plt.show()
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag11.png", format="PNG")
# plt.show()
######################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
## g_tmp = iam([g1, g2])
## nx.draw_networkx(g_tmp)
## plt.show()
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
###################################################################
idx1 = idx_gi[0]
idx2 = idx_gi[1]
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################

time_list = []
nb_updated_list = []
g_best = []
dis_ks_min_list = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha],
range(len(Gn), len(Gn) + 2), km,
k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
nb_updated_list.append(nb_updated)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'),
with_labels=True)
plt.show()
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
plt.clf()
print(g_best[idx].nodes(data=True))
print(g_best[idx].edges(data=True))
# # compute the corresponding sod in graph space. (alpha range not considered.)
# sod_tmp, _ = median_distance(g_best[0], Gn_let)
# sod_gs_list.append(sod_tmp)
# sod_gs_min_list.append(np.min(sod_tmp))
# sod_ks_min_list.append(sod_ks)
# nb_updated_list.append(nb_updated)
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates for each alpha: ', nb_updated_list)
print('\ntimes:', time_list)
###############################################################################

if __name__ == '__main__':
###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# test_random_preimage_2combination()
###############################################################################
# tests all algorithms on different numbers of median-sets.
test_preimage_random_median_nb()
###############################################################################
# tests all algorithms on different values on grid of median-sets and k.
# test_preimage_random_grid_k_median_nb()

+ 109
- 0
preimage/utils.py View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 17 19:05:07 2019

Useful functions.
@author: ljia
"""
#import networkx as nx

import multiprocessing
import numpy as np

import sys
sys.path.insert(0, "../")
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel


def remove_edges(Gn):
for G in Gn:
for _, _, attrs in G.edges(data=True):
attrs.clear()
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g]
term2 = 0
for i, a in enumerate(alpha):
term2 += a * Kmatrix[idx_g, idx_gi[i]]
term2 *= 2
if withterm3 == False:
for i1, a1 in enumerate(alpha):
for i2, a2 in enumerate(alpha):
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
return np.sqrt(term1 - term2 + term3)


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix

def gram2distances(Kmatrix):
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
for i1 in range(len(Kmatrix)):
for i2 in range(len(Kmatrix)):
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
dmatrix = np.sqrt(dmatrix)
return dmatrix


def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
dis_mat = np.empty((len(Gn), len(Gn)))
if Kmatrix == None:
Kmatrix = compute_kernel(Gn, gkernel, True)
for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
if dis < 0:
if dis > -1e-10:
dis = 0
else:
raise ValueError('The distance is negative.')
dis_mat[i, j] = np.sqrt(dis)
dis_mat[j, i] = dis_mat[i, j]
dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean


def get_same_item_indices(ls):
"""Get the indices of the same items in a list. Return a dict keyed by items.
"""
idx_dict = {}
for idx, item in enumerate(ls):
if item in idx_dict:
idx_dict[item].append(idx)
else:
idx_dict[item] = [idx]
return idx_dict

Loading…
Cancel
Save