Browse Source

Update pre-image.

v0.1
jajupmochi 5 years ago
parent
commit
dd810b92da
12 changed files with 1856 additions and 539 deletions
  1. +103
    -0
      preimage/fitDistance.py
  2. +197
    -0
      preimage/ged.py
  3. +3
    -93
      preimage/iam.py
  4. +19
    -79
      preimage/preimage_iam.py
  5. +11
    -40
      preimage/preimage_random.py
  6. +1
    -0
      preimage/test.py
  7. +167
    -0
      preimage/test_iam.py
  8. +13
    -29
      preimage/test_others.py
  9. +289
    -298
      preimage/test_preimage_iam.py
  10. +542
    -0
      preimage/test_preimage_mix.py
  11. +402
    -0
      preimage/test_preimage_random.py
  12. +109
    -0
      preimage/utils.py

+ 103
- 0
preimage/fitDistance.py View File

@@ -0,0 +1,103 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 16 14:20:06 2019

@author: ljia
"""
import numpy as np
from tqdm import tqdm

import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from ged import GED, get_nb_edit_operations
from utils import kernel_distance_matrix

def fit_GED_to_kernel_distance(Gn, gkernel, itr_max):
c_vi = 1
c_vr = 1
c_vs = 1
c_ei = 1
c_er = 1
c_es = 1
# compute distances in feature space.
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel)
dis_k_vec = []
for i in range(len(dis_k_mat)):
for j in range(i, len(dis_k_mat)):
dis_k_vec.append(dis_k_mat[i, j])
dis_k_vec = np.array(dis_k_vec)
residual_list = []
edit_cost_list = []
for itr in range(itr_max):
print('iteration', itr)
ged_all = []
n_vi_all = []
n_vr_all = []
n_vs_all = []
n_ei_all = []
n_er_all = []
n_es_all = []
# compute GEDs and numbers of edit operations.
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
edit_cost_list.append(edit_cost_constant)
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
# for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy',
cost='CONSTANT', method='IPFP',
edit_cost_constant=edit_cost_constant, stabilizer='min',
repeat=30)
ged_all.append(dis)
n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i],
Gn[j], pi_forward, pi_backward)
n_vi_all.append(n_vi)
n_vr_all.append(n_vr)
n_vs_all.append(n_vs)
n_ei_all.append(n_ei)
n_er_all.append(n_er)
n_es_all.append(n_es)
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec)))
residual_list.append(residual)
# "fit" geds to distances in feature space by tuning edit costs using the
# Least Squares Method.
nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all),
np.array(n_vs_all), np.array(n_ei_all),
np.array(n_er_all), np.array(n_es_all)))
edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
rcond=None)
for i in range(len(edit_costs)):
if edit_costs[i] < 0:
if edit_costs[i] > -1e-3:
edit_costs[i] = 0
# else:
# raise ValueError('The edit cost is negative.')
c_vi = edit_costs[0]
c_vr = edit_costs[1]
c_vs = edit_costs[2]
c_ei = edit_costs[3]
c_er = edit_costs[4]
c_es = edit_costs[5]
return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list



if __name__ == '__main__':
from utils import remove_edges
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
Gn = Gn[0:10]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
itr_max = 10
c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \
fit_GED_to_kernel_distance(Gn, gkernel, itr_max)

+ 197
- 0
preimage/ged.py View File

@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 17 18:44:59 2019

@author: ljia
"""
import numpy as np
import networkx as nx
from tqdm import tqdm
import sys

from gedlibpy import librariesImport, gedlibpy

def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP',
edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50):
"""
Compute GED for 2 graphs.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(repeat):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP',
edit_cost_constant=[], stabilizer='min', repeat=50):
"""
Compute GEDs for a group of graphs.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(repeat):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def ged_median(Gn, Gn_median, measure='ged', verbose=False,
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
dis_list = []
pi_forward_list = []
for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
file=sys.stdout) if verbose else enumerate(Gn):
dis_sum = 0
pi_forward_list.append([])
for G_p in Gn_median:
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
cost=ged_cost, method=ged_method, saveGXL=saveGXL)
pi_forward_list[idx].append(pi_tmp_forward)
dis_sum += dis_tmp
dis_list.append(dis_sum)
return dis_list, pi_forward_list


def get_nb_edit_operations(g1, g2, forward_map, backward_map):
"""Compute the number of each edit operations.
"""
n_vi = 0
n_vr = 0
n_vs = 0
n_ei = 0
n_er = 0
n_es = 0
nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map):
if map_i == np.inf:
n_vr += 1
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
n_vs += 1
for map_i in backward_map:
if map_i == np.inf:
n_vi += 1
# idx_nodes1 = range(0, len(node1))
edges1 = [e for e in g1.edges()]
nb_edges2_cnted = 0
for n1, n2 in edges1:
idx1 = nodes1.index(n1)
idx2 = nodes1.index(n2)
# one of the nodes is removed, thus the edge is removed.
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
n_er += 1
# corresponding edge is in g2. Edge label is not considered.
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
(forward_map[idx2], forward_map[idx1]) in g2.edges():
nb_edges2_cnted += 1
# corresponding nodes are in g2, however the edge is removed.
else:
n_er += 1
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
return n_vi, n_vr, n_vs, n_ei, n_er, n_es

+ 3
- 93
preimage/iam.py View File

@@ -12,10 +12,10 @@ import networkx as nx
from tqdm import tqdm

import sys
from gedlibpy import librariesImport, gedlibpy
sys.path.insert(0, "../")
from pygraph.utils.graphdataset import get_dataset_attributes
from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
from ged import GED, ged_median


def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
@@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
# # find the best graph generated in this iteration and update pi_p.
# @todo: should we update all graphs generated or just the best ones?
dis_list, pi_forward_list = median_distance(G_new_list, Gn_median,
dis_list, pi_forward_list = ged_median(G_new_list, Gn_median,
**params_ged)
# @todo: should we remove the identical and connectivity check?
# Don't know which is faster.
@@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,
# phase 1: initilize.
# compute set-median.
dis_min = np.inf
dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median,
dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
**params_ged)
# find all smallest distances.
if allBestInit: # try all best init graphs.
@@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50,










###############################################################################
# Useful functions.

def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit',
stabilizer='min'):
"""
Compute GED.
"""
if lib == 'gedlibpy':
def convertGraph(G):
"""Convert a graph to the proper NetworkX format that can be
recognized by library gedlibpy.
"""
G_new = nx.Graph()
for nd, attrs in G.nodes(data=True):
G_new.add_node(str(nd), chem=attrs['atom'])
for nd1, nd2, attrs in G.edges(data=True):
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
G_new.add_edge(str(nd1), str(nd2))
return G_new
gedlibpy.restart_env()
gedlibpy.add_nx_graph(convertGraph(g1), "")
gedlibpy.add_nx_graph(convertGraph(g2), "")

listID = gedlibpy.get_all_graph_ids()
gedlibpy.set_edit_cost(cost)
gedlibpy.init()
gedlibpy.set_method(method, "")
gedlibpy.init_method()

g = listID[0]
h = listID[1]
if stabilizer == None:
gedlibpy.run_method(g, h)
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
upper = gedlibpy.get_upper_bound(g, h)
lower = gedlibpy.get_lower_bound(g, h)
elif stabilizer == 'min':
upper = np.inf
for itr in range(50):
gedlibpy.run_method(g, h)
upper_tmp = gedlibpy.get_upper_bound(g, h)
if upper_tmp < upper:
upper = upper_tmp
pi_forward = gedlibpy.get_forward_map(g, h)
pi_backward = gedlibpy.get_backward_map(g, h)
lower = gedlibpy.get_lower_bound(g, h)
if upper == 0:
break
dis = upper
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in g1.nodes()]
nodes2 = [n for n in g2.nodes()]
nb1 = nx.number_of_nodes(g1)
nb2 = nx.number_of_nodes(g2)
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return dis, pi_forward, pi_backward


def median_distance(Gn, Gn_median, measure='ged', verbose=False,
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'):
dis_list = []
pi_forward_list = []
for idx, G in tqdm(enumerate(Gn), desc='computing median distances',
file=sys.stdout) if verbose else enumerate(Gn):
dis_sum = 0
pi_forward_list.append([])
for G_p in Gn_median:
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p,
cost=ged_cost, method=ged_method, saveGXL=saveGXL)
pi_forward_list[idx].append(pi_tmp_forward)
dis_sum += dis_tmp
dis_list.append(dis_sum)
return dis_list, pi_forward_list


###############################################################################
# Old implementations.


+ 19
- 79
preimage/preimage_iam.py View File

@@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2].
"""
import sys
import numpy as np
import multiprocessing
from tqdm import tqdm
import networkx as nx
import matplotlib.pyplot as plt
import random

from iam import iam_upgraded
sys.path.insert(0, "../")
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel
from utils import dis_gstar, compute_kernel


def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
@@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
# print(g.nodes(data=True))
# print(g.edges(data=True))
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
for gi in Gk:
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
# nx.draw_networkx(gi)
plt.show()
# draw_Letter_graph(g)
print(gi.nodes(data=True))
print(gi.edges(data=True))
# for gi in Gk:
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
## nx.draw_networkx(gi)
# plt.show()
## draw_Letter_graph(g)
# print(gi.nodes(data=True))
# print(gi.edges(data=True))
# i = 1
r = 0
@@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max,
print('\nthe k shortest distances are', dis_k)
print('the shortest distances for previous iterations are', dis_of_each_itr)
print('\nthe graph is updated', nb_updated, 'times.')
print('\n\nthe graph is updated', nb_updated, 'times.')
print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
print('distances in kernel space:', dis_of_each_itr, '\n')
@@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
# print(g.nodes(data=True))
# print(g.edges(data=True))
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
for gi in Gk:
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
# nx.draw_networkx(gi)
plt.show()
# draw_Letter_graph(g)
print(gi.nodes(data=True))
print(gi.edges(data=True))
# for gi in Gk:
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
## nx.draw_networkx(gi)
# plt.show()
## draw_Letter_graph(g)
# print(gi.nodes(data=True))
# print(gi.edges(data=True))
r = 0
itr_total = 0
@@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
# compute distance between \psi and the new generated graph.
knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1),
alpha, knew, withterm3=False)
# @todo: the new distance is smaller or also equal?
if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
# check if the new distance is the same as one in D_k.
@@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max
print('\nthe k shortest distances are', dis_k)
print('the shortest distances for previous iterations are', dis_of_each_itr)
print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
nb_updated_random, 'times.')
print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam,
'times, and by random generation', nb_updated_k_random, 'times.')
@@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max


###############################################################################
# useful functions.

def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g]
term2 = 0
for i, a in enumerate(alpha):
term2 += a * Kmatrix[idx_g, idx_gi[i]]
term2 *= 2
if withterm3 == False:
for i1, a1 in enumerate(alpha):
for i2, a2 in enumerate(alpha):
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
return np.sqrt(term1 - term2 + term3)


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix

def gram2distances(Kmatrix):
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
for i1 in range(len(Kmatrix)):
for i2 in range(len(Kmatrix)):
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
dmatrix = np.sqrt(dmatrix)
return dmatrix


###############################################################################
# Old implementations.
#def gk_iam(Gn, alpha):


+ 11
- 40
preimage/preimage_random.py View File

@@ -10,51 +10,14 @@ pre-image
import sys
import numpy as np
import random
import multiprocessing
from tqdm import tqdm
import networkx as nx
import matplotlib.pyplot as plt


sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel

from gk_iam import dis_gstar


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix
from utils import compute_kernel, dis_gstar


def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
@@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
r = 0
# sod_list = [dhat]
# found = False
dis_of_each_itr = [dhat]
nb_updated = 0
g_best = []
while r < r_max:
@@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
# p_quit=lmbda, n_iteration=20, remove_totters=False,
# n_jobs=multiprocessing.cpu_count(), verbose=False)
knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False)
dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew,
withterm3=False)
if dnew <= dhat: # @todo: the new distance is smaller or also equal?
if dnew < dhat:
print('\nI am smaller!')
@@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke
dihat_list = [dhat]
else:
r += 1
dis_of_each_itr.append(dhat)
print('the shortest distances for previous iterations are', dis_of_each_itr)
# dis_best.append(dhat)
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
print('distances in kernel space:', dis_of_each_itr, '\n')
return dhat, g_best, nb_updated
# return 0, 0, 0


if __name__ == '__main__':
from pygraph.utils.graphfiles import loadDataset
# ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
# 'extra_params': {}} # node/edge symb


+ 1
- 0
preimage/test.py View File

@@ -80,5 +80,6 @@ def testNxGrapĥ():
print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))

#test()
init()
#testNxGrapĥ()

+ 167
- 0
preimage/test_iam.py View File

@@ -0,0 +1,167 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from iam import iam_upgraded
from utils import remove_edges, compute_kernel, get_same_item_indices
from ged import ged_median

###############################################################################
# tests on different numbers of median-sets.

def test_iam_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# lmbda = 0.03 # termination probalility
# r_max = 10 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
# epsilon = 1e-6
# InitIAMWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = False
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
Gn_candidate = [g.copy() for g in Gn_median]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate,
c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
epsilon=epsilon_iam, removeNodes=removeNodes,
connected=connected_iam,
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0
print('\ntime: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
nb_updated_k_list.append(nb_updated_k)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.show()
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)
###############################################################################

if __name__ == '__main__':
###############################################################################
# tests on different numbers of median-sets.
test_iam_median_nb()

preimage/run_gk_iam.py → preimage/test_others.py View File

@@ -15,6 +15,9 @@ import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from median import draw_Letter_graph
from ged import GED, ged_median
from utils import get_same_item_indices, compute_kernel, gram2distances, \
dis_gstar, remove_edges


# --------------------------- These are tests --------------------------------#
@@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn):


def test_who_is_the_closest_in_GED_space(Gn):
from iam import GED
idx_gi = [0, 6]
g1 = Gn[idx_gi[0]]
g2 = Gn[idx_gi[1]]
@@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn):
def test_the_simple_two(Gn, gkernel):
from gk_iam import gk_iam_nearest_multi, compute_kernel
from gk_iam import gk_iam_nearest_multi
lmbda = 0.03 # termination probalility
r_max = 10 # recursions
l = 500
@@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel):
def test_remove_bests(Gn, gkernel):
from gk_iam import gk_iam_nearest_multi, compute_kernel
from gk_iam import gk_iam_nearest_multi
lmbda = 0.03 # termination probalility
r_max = 10 # recursions
l = 500
@@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel):
# Tests on dataset Letter-H.
def test_gkiam_letter_h():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -305,7 +306,7 @@ def test_gkiam_letter_h():
print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER',
sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER',
ged_method='IPFP', saveGXL='gedlib-letter')
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
@@ -318,19 +319,6 @@ def test_gkiam_letter_h():
print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list)
print('\nnumber of updates for each letter: ', nb_updated_list)
print('\ntimes:', time_list)
def get_same_item_indices(ls):
"""Get the indices of the same items in a list. Return a dict keyed by items.
"""
idx_dict = {}
for idx, item in enumerate(ls):
if item in idx_dict:
idx_dict[item].append(idx)
else:
idx_dict[item] = [idx]
return idx_dict


#def compute_letter_median_by_average(Gn):
# return g_median
@@ -338,7 +326,6 @@ def get_same_item_indices(ls):

def test_iam_letter_h():
from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
from gk_iam import dis_gstar, compute_kernel
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -402,7 +389,7 @@ def test_iam_letter_h():
def test_random_preimage_letter_h():
from preimage_random import preimage_random, compute_kernel
from preimage_random import preimage_random
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -463,7 +450,7 @@ def test_random_preimage_letter_h():
print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let)
sod_tmp, _ = ged_median(g_best[0], Gn_let)
sod_list.append(sod_tmp)
sod_min_list.append(np.min(sod_tmp))
@@ -479,8 +466,7 @@ def test_random_preimage_letter_h():
def test_gkiam_mutag():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
'extra_params': {}} # node nsymb
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
@@ -535,7 +521,7 @@ def test_gkiam_mutag():
print(g.edges(data=True))
# compute the corresponding sod in graph space. (alpha range not considered.)
sod_tmp, _ = median_distance(g_best[0], Gn_let)
sod_tmp, _ = ged_median(g_best[0], Gn_let)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
sod_ks_min_list.append(sod_ks)
@@ -553,9 +539,7 @@ def test_gkiam_mutag():
# Re-test.
def retest_the_simple_two():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from test_random_mutag import remove_edges
from gk_iam import gk_iam_nearest_multi
# The two simple graphs.
# g1 = nx.Graph(name='haha')
@@ -653,7 +637,7 @@ def retest_the_simple_two():
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))

preimage/test_random_mutag.py → preimage/test_preimage_iam.py View File

@@ -10,20 +10,23 @@ import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
import random
#from tqdm import tqdm

import os
#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from utils import remove_edges, compute_kernel, get_same_item_indices
from ged import ged_median

from preimage_iam import preimage_iam


###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# tests on different values on grid of median-sets and k.

def test_preimage_mix_2combination_all_pairs():
from preimage_iam import preimage_iam_random_mix, compute_kernel
from iam import median_distance
def test_preimage_iam_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs():
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
r_max = 5 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
@@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs():
removeNodes = True
connected_iam = False
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
# test on each pair of graphs.
# for idx1 in range(len(Gn) - 1, -1, -1):
# for idx2 in range(idx1, -1, -1):
for idx1 in range(187, 188):
for idx2 in range(167, 168):
g1 = Gn[idx1].copy()
g2 = Gn[idx2].copy()
# Gn[10] = []
# Gn[10] = []
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list.append([])
nb_updated_k_list.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
preimage_iam(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated)
nb_updated_list[idx_nb].append(nb_updated)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
nb_updated_k_list[idx_nb].append(nb_updated_k)
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
plt.show()
plt.clf()
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
plt.show()
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)

###################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
#
# # write Gram matrix to file and read it.
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################
# # use only the two graphs in median set as candidates.
# Gn = [g1.copy(), g2.copy()]
# Gn_mix = Gn + [g1.copy(), g2.copy()]
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0

###############################################################################
# tests on different numbers of median-sets.

def test_preimage_iam_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, [g1, g2],
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
nb_updated_list_iam.append(nb_updated_iam)
nb_updated_list_random.append(nb_updated_random)
nb_updated_k_list_iam.append(nb_updated_k_iam)
nb_updated_k_list_random.append(nb_updated_k_random)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2)
+ '_alpha' + str(item) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(g_best[idx][0].nodes(data=True))
# print(g_best[idx][0].edges(data=True))
# for g in g_best[idx]:
# draw_Letter_graph(g, savepath='results/gk_iam/')
## nx.draw_networkx(g)
## plt.show()
# print(g.nodes(data=True))
# print(g.edges(data=True))
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
nb_updated_k_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
print('\nnumber of updates of the best graph for each alpha by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
preimage_iam(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
content = file.read()
file.seek(0, 0)
file.write(str_fw + content)
time_total = time.time() - time0 + time_km
print('\ntime: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
nb_updated_k_list.append(nb_updated_k)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
# plt.show()
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)

def test_gkiam_2combination_all_pairs():
from preimage_iam import preimage_iam, compute_kernel
from iam import median_distance
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs():
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
@@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs():

def test_gkiam_2combination():
from gk_iam import gk_iam_nearest_multi, compute_kernel
from iam import median_distance
from gk_iam import gk_iam_nearest_multi
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
@@ -451,7 +578,7 @@ def test_gkiam_2combination():
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost,
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
@@ -463,148 +590,6 @@ def test_gkiam_2combination():
print('\ntimes:', time_list)
def test_random_preimage_2combination():
# from gk_iam import compute_kernel
from preimage_random import preimage_random
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:12]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
# print(dis_max, dis_min, dis_mean)
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l = 500
alpha_range = np.linspace(0, 1, 11)
k = 5 # k nearest neighbors
# randomly select two molecules
np.random.seed(1)
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
g1 = Gn[idx_gi[0]].copy()
g2 = Gn[idx_gi[1]].copy()
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag10.png", format="PNG")
# plt.show()
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag11.png", format="PNG")
# plt.show()
######################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
## g_tmp = iam([g1, g2])
## nx.draw_networkx(g_tmp)
## plt.show()
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
###################################################################
idx1 = idx_gi[0]
idx2 = idx_gi[1]
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################

time_list = []
nb_updated_list = []
g_best = []
dis_ks_min_list = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha],
range(len(Gn), len(Gn) + 2), km,
k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
nb_updated_list.append(nb_updated)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'),
with_labels=True)
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
plt.show()
plt.clf()
print(g_best[idx].nodes(data=True))
print(g_best[idx].edges(data=True))
# # compute the corresponding sod in graph space. (alpha range not considered.)
# sod_tmp, _ = median_distance(g_best[0], Gn_let)
# sod_gs_list.append(sod_tmp)
# sod_gs_min_list.append(np.min(sod_tmp))
# sod_ks_min_list.append(sod_ks)
# nb_updated_list.append(nb_updated)
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates for each alpha: ', nb_updated_list)
print('\ntimes:', time_list)

###############################################################################
# help functions

def remove_edges(Gn):
for G in Gn:
for _, _, attrs in G.edges(data=True):
attrs.clear()
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
from gk_iam import compute_kernel
dis_mat = np.empty((len(Gn), len(Gn)))
if Kmatrix == None:
Kmatrix = compute_kernel(Gn, gkernel, True)
for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
if dis < 0:
if dis > -1e-10:
dis = 0
else:
raise ValueError('The distance is negative.')
dis_mat[i, j] = np.sqrt(dis)
dis_mat[j, i] = dis_mat[i, j]
dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean
###############################################################################

@@ -612,7 +597,13 @@ if __name__ == '__main__':
###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# test_random_preimage_2combination()
# test_gkiam_2combination()
# test_gkiam_2combination_all_pairs()
test_preimage_mix_2combination_all_pairs()
###############################################################################
# tests on different numbers of median-sets.
test_preimage_iam_median_nb()
###############################################################################
# tests on different values on grid of median-sets and k.
# test_preimage_iam_grid_k_median_nb()

+ 542
- 0
preimage/test_preimage_mix.py View File

@@ -0,0 +1,542 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges
from preimage_iam import preimage_iam_random_mix

###############################################################################
# tests on different values on grid of median-sets and k.

def test_preimage_mix_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list_iam.append([])
nb_updated_list_random.append([])
nb_updated_k_list_iam.append([])
nb_updated_k_list_random.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
nb_updated_list_iam[idx_nb].append(nb_updated_iam)
print('\nnumber of updates of the best graph by random generation: ',
nb_updated_random)
nb_updated_list_random[idx_nb].append(nb_updated_random)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
print('\nnumber of updates of k nearest graphs by random generation: ',
nb_updated_k_random)
nb_updated_k_list_random[idx_nb].append(nb_updated_k_random)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list_iam)
print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)


###############################################################################
# tests on different numbers of median-sets.

def test_preimage_mix_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, Gn_median,
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
nb_updated_list_iam.append(nb_updated_iam)
print('\nnumber of updates of the best graph by random generation: ',
nb_updated_random)
nb_updated_list_random.append(nb_updated_random)
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
nb_updated_k_list_iam.append(nb_updated_k_iam)
print('\nnumber of updates of k nearest graphs by random generation: ',
nb_updated_k_random)
nb_updated_k_list_random.append(nb_updated_k_random)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ',
nb_updated_list_iam)
print('\nnumber of updates of the best graph for each set of median graphs by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)

def test_preimage_mix_2combination_all_pairs():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l_max = 500 # update limit for random generation
alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
epsilon = 1e-6
InitIAMWithAllDk = True
InitRandomWithAllDk = True
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# parameters for IAM function
c_ei=1
c_er=1
c_es=1
ite_max_iam = 50
epsilon_iam = 0.001
removeNodes = True
connected_iam = False
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
# test on each pair of graphs.
# for idx1 in range(len(Gn) - 1, -1, -1):
# for idx2 in range(idx1, -1, -1):
for idx1 in range(187, 188):
for idx2 in range(167, 168):
g1 = Gn[idx1].copy()
g2 = Gn[idx2].copy()
# Gn[10] = []
# Gn[10] = []
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
plt.show()
plt.clf()
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
plt.show()
plt.clf()

###################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
#
# # write Gram matrix to file and read it.
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################
# # use only the two graphs in median set as candidates.
# Gn = [g1.copy(), g2.copy()]
# Gn_mix = Gn + [g1.copy(), g2.copy()]
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list_iam = []
nb_updated_list_random = []
nb_updated_k_list_iam = []
nb_updated_k_list_random = []
g_best = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
nb_updated_k_iam, nb_updated_k_random = \
preimage_iam_random_mix(Gn, [g1, g2],
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max,
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
InitRandomWithAllDk=InitRandomWithAllDk,
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es,
'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
'removeNodes': removeNodes, 'connected': connected_iam},
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method,
'saveGXL': saveGXL})
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat_list)
nb_updated_list_iam.append(nb_updated_iam)
nb_updated_list_random.append(nb_updated_random)
nb_updated_k_list_iam.append(nb_updated_k_iam)
nb_updated_k_list_random.append(nb_updated_k_random)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'),
with_labels=True)
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2)
+ '_alpha' + str(item) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(g_best[idx][0].nodes(data=True))
# print(g_best[idx][0].edges(data=True))
# for g in g_best[idx]:
# draw_Letter_graph(g, savepath='results/gk_iam/')
## nx.draw_networkx(g)
## plt.show()
# print(g.nodes(data=True))
# print(g.edges(data=True))
# compute the corresponding sod in graph space.
for idx, item in enumerate(alpha_range):
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
print('\nnumber of updates of the best graph for each alpha by random generation: ',
nb_updated_list_random)
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ',
nb_updated_k_list_iam)
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ',
nb_updated_k_list_random)
print('\ntimes:', time_list)
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
content = file.read()
file.seek(0, 0)
file.write(str_fw + content)
###############################################################################

if __name__ == '__main__':
###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# test_preimage_mix_2combination_all_pairs()
###############################################################################
# tests on different numbers of median-sets.
# test_preimage_mix_median_nb()
###############################################################################
# tests on different values on grid of median-sets and k.
test_preimage_mix_grid_k_median_nb()

+ 402
- 0
preimage/test_preimage_random.py View File

@@ -0,0 +1,402 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 5 15:59:00 2019

@author: ljia
"""

import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
import random
#from tqdm import tqdm

#import os
import sys
sys.path.insert(0, "../")
from pygraph.utils.graphfiles import loadDataset

from preimage_random import preimage_random
from ged import ged_median
from utils import compute_kernel, get_same_item_indices, remove_edges


###############################################################################
# tests on different values on grid of median-sets and k.

def test_preimage_random_grid_k_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
# k = 5 # k nearest neighbors
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# number of nearest neighbors.
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
g_best = []
for idx_nb, nb_median in enumerate(nb_median_range):
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time_list.append([])
dis_ks_min_list.append([])
sod_gs_list.append([])
sod_gs_min_list.append([])
nb_updated_list.append([])
g_best.append([])
for k in k_range:
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
print('k =', k)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list[idx_nb].append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list[idx_nb].append(dhat)
g_best[idx_nb].append(ghat)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list[idx_nb].append(nb_updated)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
with_labels=True)
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
'_k' + str(k) + '.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list[idx_nb].append(sod_tmp)
sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs and k: ',
sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs and k: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ',
nb_updated_list)
print('\ntimes:', time_list)



###############################################################################
# tests on different numbers of median-sets.

def test_preimage_random_median_nb():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:50]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
lmbda = 0.03 # termination probalility
r_max = 5 # iteration limit for pre-image.
l = 500 # update limit for random generation
# alpha_range = np.linspace(0.5, 0.5, 1)
k = 5 # k nearest neighbors
# parameters for GED function
ged_cost='CHEM_1'
ged_method='IPFP'
saveGXL='gedlib'
# number of graphs; we what to compute the median of these graphs.
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
# find out all the graphs classified to positive group 1.
idx_dict = get_same_item_indices(y_all)
Gn = [Gn[i] for i in idx_dict[1]]
# # compute Gram matrix.
# time0 = time.time()
# km = compute_kernel(Gn, gkernel, True)
# time_km = time.time() - time0
# # write Gram matrix to file.
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
time_list = []
dis_ks_min_list = []
sod_gs_list = []
sod_gs_min_list = []
nb_updated_list = []
g_best = []
for nb_median in nb_median_range:
print('\n-------------------------------------------------------')
print('number of median graphs =', nb_median)
random.seed(1)
idx_rdm = random.sample(range(len(Gn)), nb_median)
print('graphs chosen:', idx_rdm)
Gn_median = [Gn[idx].copy() for idx in idx_rdm]
# for g in Gn_median:
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
## plt.savefig("results/preimage_mix/mutag.png", format="PNG")
# plt.show()
# plt.clf()
###################################################################
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
km_tmp = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
for i in range(len(Gn)):
for j in range(i, len(Gn)):
km[i, j] = km_tmp[i, j]
km[j, i] = km[i, j]
for i in range(len(Gn)):
for j, idx in enumerate(idx_rdm):
km[i, len(Gn) + j] = km[i, idx]
km[len(Gn) + j, i] = km[i, idx]
for i, idx1 in enumerate(idx_rdm):
for j, idx2 in enumerate(idx_rdm):
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
###################################################################
alpha_range = [1 / nb_median] * nb_median
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range,
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
print('\nsmallest distance in kernel space: ', dhat)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
print('\nnumber of updates of the best graph: ', nb_updated)
nb_updated_list.append(nb_updated)
# show the best graph and save it to file.
print('the shortest distance is', dhat)
print('one of the possible corresponding pre-images is')
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'),
with_labels=True)
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) +
'.png', format="PNG")
# plt.show()
plt.clf()
# print(ghat_list[0].nodes(data=True))
# print(ghat_list[0].edges(data=True))
# compute the corresponding sod in graph space.
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost,
ged_method=ged_method, saveGXL=saveGXL)
sod_gs_list.append(sod_tmp)
sod_gs_min_list.append(np.min(sod_tmp))
print('\nsmallest sod in graph space: ', np.min(sod_tmp))
print('\nsods in graph space: ', sod_gs_list)
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each set of median graphs: ',
dis_ks_min_list)
print('\nnumber of updates of the best graph for each set of median graphs: ',
nb_updated_list)
print('\ntimes:', time_list)

###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
def test_random_preimage_2combination():
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
'extra_params': {}} # node/edge symb
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
# Gn = Gn[0:12]
remove_edges(Gn)
gkernel = 'marginalizedkernel'
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
# print(dis_max, dis_min, dis_mean)
lmbda = 0.03 # termination probalility
r_max = 10 # iteration limit for pre-image.
l = 500
alpha_range = np.linspace(0, 1, 11)
k = 5 # k nearest neighbors
# randomly select two molecules
np.random.seed(1)
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
g1 = Gn[idx_gi[0]].copy()
g2 = Gn[idx_gi[1]].copy()
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag10.png", format="PNG")
# plt.show()
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
# plt.savefig("results/random_preimage/mutag11.png", format="PNG")
# plt.show()
######################################################################
# Gn_mix = [g.copy() for g in Gn]
# Gn_mix.append(g1.copy())
# Gn_mix.append(g2.copy())
#
## g_tmp = iam([g1, g2])
## nx.draw_networkx(g_tmp)
## plt.show()
#
# # compute
# time0 = time.time()
# km = compute_kernel(Gn_mix, gkernel, True)
# time_km = time.time() - time0
###################################################################
idx1 = idx_gi[0]
idx2 = idx_gi[1]
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
km = gmfile['gm']
time_km = gmfile['gmtime']
# modify mixed gram matrix.
for i in range(len(Gn)):
km[i, len(Gn)] = km[i, idx1]
km[i, len(Gn) + 1] = km[i, idx2]
km[len(Gn), i] = km[i, idx1]
km[len(Gn) + 1, i] = km[i, idx2]
km[len(Gn), len(Gn)] = km[idx1, idx1]
km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
###################################################################

time_list = []
nb_updated_list = []
g_best = []
dis_ks_min_list = []
# for each alpha
for alpha in alpha_range:
print('\n-------------------------------------------------------\n')
print('alpha =', alpha)
time0 = time.time()
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha],
range(len(Gn), len(Gn) + 2), km,
k, r_max, l, gkernel)
time_total = time.time() - time0 + time_km
print('time: ', time_total)
time_list.append(time_total)
dis_ks_min_list.append(dhat)
g_best.append(ghat)
nb_updated_list.append(nb_updated)
# show best graphs and save them to file.
for idx, item in enumerate(alpha_range):
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
print('one of the possible corresponding pre-images is')
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'),
with_labels=True)
plt.show()
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
plt.clf()
print(g_best[idx].nodes(data=True))
print(g_best[idx].edges(data=True))
# # compute the corresponding sod in graph space. (alpha range not considered.)
# sod_tmp, _ = median_distance(g_best[0], Gn_let)
# sod_gs_list.append(sod_tmp)
# sod_gs_min_list.append(np.min(sod_tmp))
# sod_ks_min_list.append(sod_ks)
# nb_updated_list.append(nb_updated)
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list)
print('\nnumber of updates for each alpha: ', nb_updated_list)
print('\ntimes:', time_list)
###############################################################################

if __name__ == '__main__':
###############################################################################
# test on the combination of the two randomly chosen graphs. (the same as in the
# random pre-image paper.)
# test_random_preimage_2combination()
###############################################################################
# tests all algorithms on different numbers of median-sets.
test_preimage_random_median_nb()
###############################################################################
# tests all algorithms on different values on grid of median-sets and k.
# test_preimage_random_grid_k_median_nb()

+ 109
- 0
preimage/utils.py View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 17 19:05:07 2019

Useful functions.
@author: ljia
"""
#import networkx as nx

import multiprocessing
import numpy as np

import sys
sys.path.insert(0, "../")
from pygraph.kernels.marginalizedKernel import marginalizedkernel
from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.kernels.spKernel import spkernel
import functools
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct
from pygraph.kernels.structuralspKernel import structuralspkernel


def remove_edges(Gn):
for G in Gn:
for _, _, attrs in G.edges(data=True):
attrs.clear()
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
term1 = Kmatrix[idx_g, idx_g]
term2 = 0
for i, a in enumerate(alpha):
term2 += a * Kmatrix[idx_g, idx_gi[i]]
term2 *= 2
if withterm3 == False:
for i1, a1 in enumerate(alpha):
for i2, a2 in enumerate(alpha):
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
return np.sqrt(term1 - term2 + term3)


def compute_kernel(Gn, graph_kernel, verbose):
if graph_kernel == 'marginalizedkernel':
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None,
p_quit=0.03, n_iteration=10, remove_totters=False,
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'untilhpathkernel':
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None,
depth=10, k_func='MinMax', compute_method='trie',
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'spkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
elif graph_kernel == 'structuralspkernel':
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels=
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
n_jobs=multiprocessing.cpu_count(), verbose=verbose)
# normalization
Kmatrix_diag = Kmatrix.diagonal().copy()
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
Kmatrix[j][i] = Kmatrix[i][j]
return Kmatrix

def gram2distances(Kmatrix):
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
for i1 in range(len(Kmatrix)):
for i2 in range(len(Kmatrix)):
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
dmatrix = np.sqrt(dmatrix)
return dmatrix


def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None):
dis_mat = np.empty((len(Gn), len(Gn)))
if Kmatrix == None:
Kmatrix = compute_kernel(Gn, gkernel, True)
for i in range(len(Gn)):
for j in range(i, len(Gn)):
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
if dis < 0:
if dis > -1e-10:
dis = 0
else:
raise ValueError('The distance is negative.')
dis_mat[i, j] = np.sqrt(dis)
dis_mat[j, i] = dis_mat[i, j]
dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean


def get_same_item_indices(ls):
"""Get the indices of the same items in a list. Return a dict keyed by items.
"""
idx_dict = {}
for idx, item in enumerate(ls):
if item in idx_dict:
idx_dict[item].append(idx)
else:
idx_dict[item] = [idx]
return idx_dict

Loading…
Cancel
Save