@@ -0,0 +1,103 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Oct 16 14:20:06 2019 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
from tqdm import tqdm | |||||
import sys | |||||
sys.path.insert(0, "../") | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from ged import GED, get_nb_edit_operations | |||||
from utils import kernel_distance_matrix | |||||
def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||||
c_vi = 1 | |||||
c_vr = 1 | |||||
c_vs = 1 | |||||
c_ei = 1 | |||||
c_er = 1 | |||||
c_es = 1 | |||||
# compute distances in feature space. | |||||
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) | |||||
dis_k_vec = [] | |||||
for i in range(len(dis_k_mat)): | |||||
for j in range(i, len(dis_k_mat)): | |||||
dis_k_vec.append(dis_k_mat[i, j]) | |||||
dis_k_vec = np.array(dis_k_vec) | |||||
residual_list = [] | |||||
edit_cost_list = [] | |||||
for itr in range(itr_max): | |||||
print('iteration', itr) | |||||
ged_all = [] | |||||
n_vi_all = [] | |||||
n_vr_all = [] | |||||
n_vs_all = [] | |||||
n_ei_all = [] | |||||
n_er_all = [] | |||||
n_es_all = [] | |||||
# compute GEDs and numbers of edit operations. | |||||
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] | |||||
edit_cost_list.append(edit_cost_constant) | |||||
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | |||||
# for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', | |||||
cost='CONSTANT', method='IPFP', | |||||
edit_cost_constant=edit_cost_constant, stabilizer='min', | |||||
repeat=30) | |||||
ged_all.append(dis) | |||||
n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], | |||||
Gn[j], pi_forward, pi_backward) | |||||
n_vi_all.append(n_vi) | |||||
n_vr_all.append(n_vr) | |||||
n_vs_all.append(n_vs) | |||||
n_ei_all.append(n_ei) | |||||
n_er_all.append(n_er) | |||||
n_es_all.append(n_es) | |||||
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||||
residual_list.append(residual) | |||||
# "fit" geds to distances in feature space by tuning edit costs using the | |||||
# Least Squares Method. | |||||
nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), | |||||
np.array(n_vs_all), np.array(n_ei_all), | |||||
np.array(n_er_all), np.array(n_es_all))) | |||||
edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||||
rcond=None) | |||||
for i in range(len(edit_costs)): | |||||
if edit_costs[i] < 0: | |||||
if edit_costs[i] > -1e-3: | |||||
edit_costs[i] = 0 | |||||
# else: | |||||
# raise ValueError('The edit cost is negative.') | |||||
c_vi = edit_costs[0] | |||||
c_vr = edit_costs[1] | |||||
c_vs = edit_costs[2] | |||||
c_ei = edit_costs[3] | |||||
c_er = edit_costs[4] | |||||
c_es = edit_costs[5] | |||||
return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list | |||||
if __name__ == '__main__': | |||||
from utils import remove_edges | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
Gn = Gn[0:10] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
itr_max = 10 | |||||
c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ | |||||
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) |
@@ -0,0 +1,197 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Oct 17 18:44:59 2019 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
from tqdm import tqdm | |||||
import sys | |||||
from gedlibpy import librariesImport, gedlibpy | |||||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||||
edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50): | |||||
""" | |||||
Compute GED for 2 graphs. | |||||
""" | |||||
if lib == 'gedlibpy': | |||||
def convertGraph(G): | |||||
"""Convert a graph to the proper NetworkX format that can be | |||||
recognized by library gedlibpy. | |||||
""" | |||||
G_new = nx.Graph() | |||||
for nd, attrs in G.nodes(data=True): | |||||
G_new.add_node(str(nd), chem=attrs['atom']) | |||||
for nd1, nd2, attrs in G.edges(data=True): | |||||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||||
G_new.add_edge(str(nd1), str(nd2)) | |||||
return G_new | |||||
gedlibpy.restart_env() | |||||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||||
listID = gedlibpy.get_all_graph_ids() | |||||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||||
gedlibpy.init() | |||||
gedlibpy.set_method(method, "") | |||||
gedlibpy.init_method() | |||||
g = listID[0] | |||||
h = listID[1] | |||||
if stabilizer == None: | |||||
gedlibpy.run_method(g, h) | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
upper = gedlibpy.get_upper_bound(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
elif stabilizer == 'min': | |||||
upper = np.inf | |||||
for itr in range(repeat): | |||||
gedlibpy.run_method(g, h) | |||||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||||
if upper_tmp < upper: | |||||
upper = upper_tmp | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
if upper == 0: | |||||
break | |||||
dis = upper | |||||
# make the map label correct (label remove map as np.inf) | |||||
nodes1 = [n for n in g1.nodes()] | |||||
nodes2 = [n for n in g2.nodes()] | |||||
nb1 = nx.number_of_nodes(g1) | |||||
nb2 = nx.number_of_nodes(g2) | |||||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
return dis, pi_forward, pi_backward | |||||
def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||||
edit_cost_constant=[], stabilizer='min', repeat=50): | |||||
""" | |||||
Compute GEDs for a group of graphs. | |||||
""" | |||||
if lib == 'gedlibpy': | |||||
def convertGraph(G): | |||||
"""Convert a graph to the proper NetworkX format that can be | |||||
recognized by library gedlibpy. | |||||
""" | |||||
G_new = nx.Graph() | |||||
for nd, attrs in G.nodes(data=True): | |||||
G_new.add_node(str(nd), chem=attrs['atom']) | |||||
for nd1, nd2, attrs in G.edges(data=True): | |||||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||||
G_new.add_edge(str(nd1), str(nd2)) | |||||
return G_new | |||||
gedlibpy.restart_env() | |||||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||||
listID = gedlibpy.get_all_graph_ids() | |||||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||||
gedlibpy.init() | |||||
gedlibpy.set_method(method, "") | |||||
gedlibpy.init_method() | |||||
g = listID[0] | |||||
h = listID[1] | |||||
if stabilizer == None: | |||||
gedlibpy.run_method(g, h) | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
upper = gedlibpy.get_upper_bound(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
elif stabilizer == 'min': | |||||
upper = np.inf | |||||
for itr in range(repeat): | |||||
gedlibpy.run_method(g, h) | |||||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||||
if upper_tmp < upper: | |||||
upper = upper_tmp | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
if upper == 0: | |||||
break | |||||
dis = upper | |||||
# make the map label correct (label remove map as np.inf) | |||||
nodes1 = [n for n in g1.nodes()] | |||||
nodes2 = [n for n in g2.nodes()] | |||||
nb1 = nx.number_of_nodes(g1) | |||||
nb2 = nx.number_of_nodes(g2) | |||||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
return dis, pi_forward, pi_backward | |||||
def ged_median(Gn, Gn_median, measure='ged', verbose=False, | |||||
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||||
dis_list = [] | |||||
pi_forward_list = [] | |||||
for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||||
file=sys.stdout) if verbose else enumerate(Gn): | |||||
dis_sum = 0 | |||||
pi_forward_list.append([]) | |||||
for G_p in Gn_median: | |||||
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||||
cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||||
pi_forward_list[idx].append(pi_tmp_forward) | |||||
dis_sum += dis_tmp | |||||
dis_list.append(dis_sum) | |||||
return dis_list, pi_forward_list | |||||
def get_nb_edit_operations(g1, g2, forward_map, backward_map): | |||||
"""Compute the number of each edit operations. | |||||
""" | |||||
n_vi = 0 | |||||
n_vr = 0 | |||||
n_vs = 0 | |||||
n_ei = 0 | |||||
n_er = 0 | |||||
n_es = 0 | |||||
nodes1 = [n for n in g1.nodes()] | |||||
for i, map_i in enumerate(forward_map): | |||||
if map_i == np.inf: | |||||
n_vr += 1 | |||||
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: | |||||
n_vs += 1 | |||||
for map_i in backward_map: | |||||
if map_i == np.inf: | |||||
n_vi += 1 | |||||
# idx_nodes1 = range(0, len(node1)) | |||||
edges1 = [e for e in g1.edges()] | |||||
nb_edges2_cnted = 0 | |||||
for n1, n2 in edges1: | |||||
idx1 = nodes1.index(n1) | |||||
idx2 = nodes1.index(n2) | |||||
# one of the nodes is removed, thus the edge is removed. | |||||
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: | |||||
n_er += 1 | |||||
# corresponding edge is in g2. Edge label is not considered. | |||||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ | |||||
(forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||||
nb_edges2_cnted += 1 | |||||
# corresponding nodes are in g2, however the edge is removed. | |||||
else: | |||||
n_er += 1 | |||||
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted | |||||
return n_vi, n_vr, n_vs, n_ei, n_er, n_es |
@@ -12,10 +12,10 @@ import networkx as nx | |||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import sys | import sys | ||||
from gedlibpy import librariesImport, gedlibpy | |||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from pygraph.utils.graphdataset import get_dataset_attributes | from pygraph.utils.graphdataset import get_dataset_attributes | ||||
from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | ||||
from ged import GED, ged_median | |||||
def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | ||||
@@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||||
# # find the best graph generated in this iteration and update pi_p. | # # find the best graph generated in this iteration and update pi_p. | ||||
# @todo: should we update all graphs generated or just the best ones? | # @todo: should we update all graphs generated or just the best ones? | ||||
dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, | |||||
dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, | |||||
**params_ged) | **params_ged) | ||||
# @todo: should we remove the identical and connectivity check? | # @todo: should we remove the identical and connectivity check? | ||||
# Don't know which is faster. | # Don't know which is faster. | ||||
@@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||||
# phase 1: initilize. | # phase 1: initilize. | ||||
# compute set-median. | # compute set-median. | ||||
dis_min = np.inf | dis_min = np.inf | ||||
dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median, | |||||
dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median, | |||||
**params_ged) | **params_ged) | ||||
# find all smallest distances. | # find all smallest distances. | ||||
if allBestInit: # try all best init graphs. | if allBestInit: # try all best init graphs. | ||||
@@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||||
############################################################################### | |||||
# Useful functions. | |||||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', | |||||
stabilizer='min'): | |||||
""" | |||||
Compute GED. | |||||
""" | |||||
if lib == 'gedlibpy': | |||||
def convertGraph(G): | |||||
"""Convert a graph to the proper NetworkX format that can be | |||||
recognized by library gedlibpy. | |||||
""" | |||||
G_new = nx.Graph() | |||||
for nd, attrs in G.nodes(data=True): | |||||
G_new.add_node(str(nd), chem=attrs['atom']) | |||||
for nd1, nd2, attrs in G.edges(data=True): | |||||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||||
G_new.add_edge(str(nd1), str(nd2)) | |||||
return G_new | |||||
gedlibpy.restart_env() | |||||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||||
listID = gedlibpy.get_all_graph_ids() | |||||
gedlibpy.set_edit_cost(cost) | |||||
gedlibpy.init() | |||||
gedlibpy.set_method(method, "") | |||||
gedlibpy.init_method() | |||||
g = listID[0] | |||||
h = listID[1] | |||||
if stabilizer == None: | |||||
gedlibpy.run_method(g, h) | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
upper = gedlibpy.get_upper_bound(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
elif stabilizer == 'min': | |||||
upper = np.inf | |||||
for itr in range(50): | |||||
gedlibpy.run_method(g, h) | |||||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||||
if upper_tmp < upper: | |||||
upper = upper_tmp | |||||
pi_forward = gedlibpy.get_forward_map(g, h) | |||||
pi_backward = gedlibpy.get_backward_map(g, h) | |||||
lower = gedlibpy.get_lower_bound(g, h) | |||||
if upper == 0: | |||||
break | |||||
dis = upper | |||||
# make the map label correct (label remove map as np.inf) | |||||
nodes1 = [n for n in g1.nodes()] | |||||
nodes2 = [n for n in g2.nodes()] | |||||
nb1 = nx.number_of_nodes(g1) | |||||
nb2 = nx.number_of_nodes(g2) | |||||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||||
return dis, pi_forward, pi_backward | |||||
def median_distance(Gn, Gn_median, measure='ged', verbose=False, | |||||
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||||
dis_list = [] | |||||
pi_forward_list = [] | |||||
for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||||
file=sys.stdout) if verbose else enumerate(Gn): | |||||
dis_sum = 0 | |||||
pi_forward_list.append([]) | |||||
for G_p in Gn_median: | |||||
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||||
cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||||
pi_forward_list[idx].append(pi_tmp_forward) | |||||
dis_sum += dis_tmp | |||||
dis_list.append(dis_sum) | |||||
return dis_list, pi_forward_list | |||||
############################################################################### | ############################################################################### | ||||
# Old implementations. | # Old implementations. | ||||
@@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2]. | |||||
""" | """ | ||||
import sys | import sys | ||||
import numpy as np | import numpy as np | ||||
import multiprocessing | |||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import networkx as nx | import networkx as nx | ||||
import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
import random | import random | ||||
from iam import iam_upgraded | from iam import iam_upgraded | ||||
sys.path.insert(0, "../") | |||||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||||
from pygraph.kernels.spKernel import spkernel | |||||
import functools | |||||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
from utils import dis_gstar, compute_kernel | |||||
def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | ||||
@@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
# print(g.nodes(data=True)) | # print(g.nodes(data=True)) | ||||
# print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | ||||
for gi in Gk: | |||||
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
# nx.draw_networkx(gi) | |||||
plt.show() | |||||
# draw_Letter_graph(g) | |||||
print(gi.nodes(data=True)) | |||||
print(gi.edges(data=True)) | |||||
# for gi in Gk: | |||||
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
## nx.draw_networkx(gi) | |||||
# plt.show() | |||||
## draw_Letter_graph(g) | |||||
# print(gi.nodes(data=True)) | |||||
# print(gi.edges(data=True)) | |||||
# i = 1 | # i = 1 | ||||
r = 0 | r = 0 | ||||
@@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||||
print('\nthe k shortest distances are', dis_k) | print('\nthe k shortest distances are', dis_k) | ||||
print('the shortest distances for previous iterations are', dis_of_each_itr) | print('the shortest distances for previous iterations are', dis_of_each_itr) | ||||
print('\nthe graph is updated', nb_updated, 'times.') | |||||
print('\n\nthe graph is updated', nb_updated, 'times.') | |||||
print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') | print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') | ||||
print('distances in kernel space:', dis_of_each_itr, '\n') | print('distances in kernel space:', dis_of_each_itr, '\n') | ||||
@@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||||
# print(g.nodes(data=True)) | # print(g.nodes(data=True)) | ||||
# print(g.edges(data=True)) | # print(g.edges(data=True)) | ||||
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | ||||
for gi in Gk: | |||||
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
# nx.draw_networkx(gi) | |||||
plt.show() | |||||
# draw_Letter_graph(g) | |||||
print(gi.nodes(data=True)) | |||||
print(gi.edges(data=True)) | |||||
# for gi in Gk: | |||||
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||||
## nx.draw_networkx(gi) | |||||
# plt.show() | |||||
## draw_Letter_graph(g) | |||||
# print(gi.nodes(data=True)) | |||||
# print(gi.edges(data=True)) | |||||
r = 0 | r = 0 | ||||
itr_total = 0 | itr_total = 0 | ||||
@@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||||
# compute distance between \psi and the new generated graph. | # compute distance between \psi and the new generated graph. | ||||
knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) | knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) | ||||
dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||||
dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), | |||||
alpha, knew, withterm3=False) | |||||
# @todo: the new distance is smaller or also equal? | # @todo: the new distance is smaller or also equal? | ||||
if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: | if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: | ||||
# check if the new distance is the same as one in D_k. | # check if the new distance is the same as one in D_k. | ||||
@@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||||
print('\nthe k shortest distances are', dis_k) | print('\nthe k shortest distances are', dis_k) | ||||
print('the shortest distances for previous iterations are', dis_of_each_itr) | print('the shortest distances for previous iterations are', dis_of_each_itr) | ||||
print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||||
print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||||
nb_updated_random, 'times.') | nb_updated_random, 'times.') | ||||
print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, | print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, | ||||
'times, and by random generation', nb_updated_k_random, 'times.') | 'times, and by random generation', nb_updated_k_random, 'times.') | ||||
@@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||||
############################################################################### | ############################################################################### | ||||
# useful functions. | |||||
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
term1 = Kmatrix[idx_g, idx_g] | |||||
term2 = 0 | |||||
for i, a in enumerate(alpha): | |||||
term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||||
term2 *= 2 | |||||
if withterm3 == False: | |||||
for i1, a1 in enumerate(alpha): | |||||
for i2, a2 in enumerate(alpha): | |||||
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
return np.sqrt(term1 - term2 + term3) | |||||
def compute_kernel(Gn, graph_kernel, verbose): | |||||
if graph_kernel == 'marginalizedkernel': | |||||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'untilhpathkernel': | |||||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||||
depth=10, k_func='MinMax', compute_method='trie', | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'spkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'structuralspkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
# normalization | |||||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
for i in range(len(Kmatrix)): | |||||
for j in range(i, len(Kmatrix)): | |||||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
return Kmatrix | |||||
def gram2distances(Kmatrix): | |||||
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||||
for i1 in range(len(Kmatrix)): | |||||
for i2 in range(len(Kmatrix)): | |||||
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||||
dmatrix = np.sqrt(dmatrix) | |||||
return dmatrix | |||||
############################################################################### | |||||
# Old implementations. | # Old implementations. | ||||
#def gk_iam(Gn, alpha): | #def gk_iam(Gn, alpha): | ||||
@@ -10,51 +10,14 @@ pre-image | |||||
import sys | import sys | ||||
import numpy as np | import numpy as np | ||||
import random | import random | ||||
import multiprocessing | |||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import networkx as nx | import networkx as nx | ||||
import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from pygraph.utils.graphfiles import loadDataset | |||||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||||
from pygraph.kernels.spKernel import spkernel | |||||
import functools | |||||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
from gk_iam import dis_gstar | |||||
def compute_kernel(Gn, graph_kernel, verbose): | |||||
if graph_kernel == 'marginalizedkernel': | |||||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'untilhpathkernel': | |||||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||||
depth=10, k_func='MinMax', compute_method='trie', | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'spkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'structuralspkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
# normalization | |||||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
for i in range(len(Kmatrix)): | |||||
for j in range(i, len(Kmatrix)): | |||||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
return Kmatrix | |||||
from utils import compute_kernel, dis_gstar | |||||
def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | ||||
@@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||||
r = 0 | r = 0 | ||||
# sod_list = [dhat] | # sod_list = [dhat] | ||||
# found = False | # found = False | ||||
dis_of_each_itr = [dhat] | |||||
nb_updated = 0 | nb_updated = 0 | ||||
g_best = [] | g_best = [] | ||||
while r < r_max: | while r < r_max: | ||||
@@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||||
# p_quit=lmbda, n_iteration=20, remove_totters=False, | # p_quit=lmbda, n_iteration=20, remove_totters=False, | ||||
# n_jobs=multiprocessing.cpu_count(), verbose=False) | # n_jobs=multiprocessing.cpu_count(), verbose=False) | ||||
knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | ||||
dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||||
dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, | |||||
withterm3=False) | |||||
if dnew <= dhat: # @todo: the new distance is smaller or also equal? | if dnew <= dhat: # @todo: the new distance is smaller or also equal? | ||||
if dnew < dhat: | if dnew < dhat: | ||||
print('\nI am smaller!') | print('\nI am smaller!') | ||||
@@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||||
dihat_list = [dhat] | dihat_list = [dhat] | ||||
else: | else: | ||||
r += 1 | r += 1 | ||||
dis_of_each_itr.append(dhat) | |||||
print('the shortest distances for previous iterations are', dis_of_each_itr) | |||||
# dis_best.append(dhat) | # dis_best.append(dhat) | ||||
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
print('distances in kernel space:', dis_of_each_itr, '\n') | |||||
return dhat, g_best, nb_updated | return dhat, g_best, nb_updated | ||||
# return 0, 0, 0 | # return 0, 0, 0 | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
from pygraph.utils.graphfiles import loadDataset | |||||
# ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | # ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
# 'extra_params': {}} # node/edge symb | # 'extra_params': {}} # node/edge symb | ||||
@@ -80,5 +80,6 @@ def testNxGrapĥ(): | |||||
print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) | print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) | ||||
print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) | print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) | ||||
#test() | |||||
init() | init() | ||||
#testNxGrapĥ() | #testNxGrapĥ() |
@@ -0,0 +1,167 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Sep 5 15:59:00 2019 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
import matplotlib.pyplot as plt | |||||
import time | |||||
import random | |||||
#from tqdm import tqdm | |||||
#import os | |||||
import sys | |||||
sys.path.insert(0, "../") | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from iam import iam_upgraded | |||||
from utils import remove_edges, compute_kernel, get_same_item_indices | |||||
from ged import ged_median | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
def test_iam_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
# lmbda = 0.03 # termination probalility | |||||
# r_max = 10 # iteration limit for pre-image. | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
# k = 5 # k nearest neighbors | |||||
# epsilon = 1e-6 | |||||
# InitIAMWithAllDk = True | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# parameters for IAM function | |||||
c_ei=1 | |||||
c_er=1 | |||||
c_es=1 | |||||
ite_max_iam = 50 | |||||
epsilon_iam = 0.001 | |||||
removeNodes = False | |||||
connected_iam = False | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list = [] | |||||
nb_updated_k_list = [] | |||||
g_best = [] | |||||
for nb_median in nb_median_range: | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
Gn_candidate = [g.copy() for g in Gn_median] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time0 = time.time() | |||||
ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, | |||||
c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, | |||||
epsilon=epsilon_iam, removeNodes=removeNodes, | |||||
connected=connected_iam, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 | |||||
print('\ntime: ', time_total) | |||||
time_list.append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat_list) | |||||
print('\nnumber of updates of the best graph: ', nb_updated) | |||||
nb_updated_list.append(nb_updated) | |||||
print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||||
nb_updated_k_list.append(nb_updated_k) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
with_labels=True) | |||||
plt.show() | |||||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||||
'.png', format="PNG") | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||||
nb_updated_list) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||||
nb_updated_k_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
if __name__ == '__main__': | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
test_iam_median_nb() |
@@ -15,6 +15,9 @@ import sys | |||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from pygraph.utils.graphfiles import loadDataset | from pygraph.utils.graphfiles import loadDataset | ||||
from median import draw_Letter_graph | from median import draw_Letter_graph | ||||
from ged import GED, ged_median | |||||
from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||||
dis_gstar, remove_edges | |||||
# --------------------------- These are tests --------------------------------# | # --------------------------- These are tests --------------------------------# | ||||
@@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn): | |||||
def test_who_is_the_closest_in_GED_space(Gn): | def test_who_is_the_closest_in_GED_space(Gn): | ||||
from iam import GED | |||||
idx_gi = [0, 6] | idx_gi = [0, 6] | ||||
g1 = Gn[idx_gi[0]] | g1 = Gn[idx_gi[0]] | ||||
g2 = Gn[idx_gi[1]] | g2 = Gn[idx_gi[1]] | ||||
@@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn): | |||||
def test_the_simple_two(Gn, gkernel): | def test_the_simple_two(Gn, gkernel): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from gk_iam import gk_iam_nearest_multi | |||||
lmbda = 0.03 # termination probalility | lmbda = 0.03 # termination probalility | ||||
r_max = 10 # recursions | r_max = 10 # recursions | ||||
l = 500 | l = 500 | ||||
@@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel): | |||||
def test_remove_bests(Gn, gkernel): | def test_remove_bests(Gn, gkernel): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from gk_iam import gk_iam_nearest_multi | |||||
lmbda = 0.03 # termination probalility | lmbda = 0.03 # termination probalility | ||||
r_max = 10 # recursions | r_max = 10 # recursions | ||||
l = 500 | l = 500 | ||||
@@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel): | |||||
# Tests on dataset Letter-H. | # Tests on dataset Letter-H. | ||||
def test_gkiam_letter_h(): | def test_gkiam_letter_h(): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from iam import median_distance | |||||
from gk_iam import gk_iam_nearest_multi | |||||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ||||
'extra_params': {}} # node nsymb | 'extra_params': {}} # node nsymb | ||||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | ||||
@@ -305,7 +306,7 @@ def test_gkiam_letter_h(): | |||||
print(g.edges(data=True)) | print(g.edges(data=True)) | ||||
# compute the corresponding sod in graph space. (alpha range not considered.) | # compute the corresponding sod in graph space. (alpha range not considered.) | ||||
sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', | |||||
sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', | |||||
ged_method='IPFP', saveGXL='gedlib-letter') | ged_method='IPFP', saveGXL='gedlib-letter') | ||||
sod_gs_list.append(sod_tmp) | sod_gs_list.append(sod_tmp) | ||||
sod_gs_min_list.append(np.min(sod_tmp)) | sod_gs_min_list.append(np.min(sod_tmp)) | ||||
@@ -318,19 +319,6 @@ def test_gkiam_letter_h(): | |||||
print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | ||||
print('\nnumber of updates for each letter: ', nb_updated_list) | print('\nnumber of updates for each letter: ', nb_updated_list) | ||||
print('\ntimes:', time_list) | print('\ntimes:', time_list) | ||||
def get_same_item_indices(ls): | |||||
"""Get the indices of the same items in a list. Return a dict keyed by items. | |||||
""" | |||||
idx_dict = {} | |||||
for idx, item in enumerate(ls): | |||||
if item in idx_dict: | |||||
idx_dict[item].append(idx) | |||||
else: | |||||
idx_dict[item] = [idx] | |||||
return idx_dict | |||||
#def compute_letter_median_by_average(Gn): | #def compute_letter_median_by_average(Gn): | ||||
# return g_median | # return g_median | ||||
@@ -338,7 +326,6 @@ def get_same_item_indices(ls): | |||||
def test_iam_letter_h(): | def test_iam_letter_h(): | ||||
from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations | from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations | ||||
from gk_iam import dis_gstar, compute_kernel | |||||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ||||
'extra_params': {}} # node nsymb | 'extra_params': {}} # node nsymb | ||||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | ||||
@@ -402,7 +389,7 @@ def test_iam_letter_h(): | |||||
def test_random_preimage_letter_h(): | def test_random_preimage_letter_h(): | ||||
from preimage_random import preimage_random, compute_kernel | |||||
from preimage_random import preimage_random | |||||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ||||
'extra_params': {}} # node nsymb | 'extra_params': {}} # node nsymb | ||||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | ||||
@@ -463,7 +450,7 @@ def test_random_preimage_letter_h(): | |||||
print(g.edges(data=True)) | print(g.edges(data=True)) | ||||
# compute the corresponding sod in graph space. (alpha range not considered.) | # compute the corresponding sod in graph space. (alpha range not considered.) | ||||
sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||||
sod_list.append(sod_tmp) | sod_list.append(sod_tmp) | ||||
sod_min_list.append(np.min(sod_tmp)) | sod_min_list.append(np.min(sod_tmp)) | ||||
@@ -479,8 +466,7 @@ def test_random_preimage_letter_h(): | |||||
def test_gkiam_mutag(): | def test_gkiam_mutag(): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from iam import median_distance | |||||
from gk_iam import gk_iam_nearest_multi | |||||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | ||||
'extra_params': {}} # node nsymb | 'extra_params': {}} # node nsymb | ||||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | # ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | ||||
@@ -535,7 +521,7 @@ def test_gkiam_mutag(): | |||||
print(g.edges(data=True)) | print(g.edges(data=True)) | ||||
# compute the corresponding sod in graph space. (alpha range not considered.) | # compute the corresponding sod in graph space. (alpha range not considered.) | ||||
sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||||
sod_gs_list.append(sod_tmp) | sod_gs_list.append(sod_tmp) | ||||
sod_gs_min_list.append(np.min(sod_tmp)) | sod_gs_min_list.append(np.min(sod_tmp)) | ||||
sod_ks_min_list.append(sod_ks) | sod_ks_min_list.append(sod_ks) | ||||
@@ -553,9 +539,7 @@ def test_gkiam_mutag(): | |||||
# Re-test. | # Re-test. | ||||
def retest_the_simple_two(): | def retest_the_simple_two(): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from iam import median_distance | |||||
from test_random_mutag import remove_edges | |||||
from gk_iam import gk_iam_nearest_multi | |||||
# The two simple graphs. | # The two simple graphs. | ||||
# g1 = nx.Graph(name='haha') | # g1 = nx.Graph(name='haha') | ||||
@@ -653,7 +637,7 @@ def retest_the_simple_two(): | |||||
# compute the corresponding sod in graph space. | # compute the corresponding sod in graph space. | ||||
for idx, item in enumerate(alpha_range): | for idx, item in enumerate(alpha_range): | ||||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | ged_method=ged_method, saveGXL=saveGXL) | ||||
sod_gs_list.append(sod_tmp) | sod_gs_list.append(sod_tmp) | ||||
sod_gs_min_list.append(np.min(sod_tmp)) | sod_gs_min_list.append(np.min(sod_tmp)) |
@@ -10,20 +10,23 @@ import numpy as np | |||||
import networkx as nx | import networkx as nx | ||||
import matplotlib.pyplot as plt | import matplotlib.pyplot as plt | ||||
import time | import time | ||||
from tqdm import tqdm | |||||
import random | |||||
#from tqdm import tqdm | |||||
import os | |||||
#import os | |||||
import sys | import sys | ||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from pygraph.utils.graphfiles import loadDataset | from pygraph.utils.graphfiles import loadDataset | ||||
from utils import remove_edges, compute_kernel, get_same_item_indices | |||||
from ged import ged_median | |||||
from preimage_iam import preimage_iam | |||||
############################################################################### | ############################################################################### | ||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
# tests on different values on grid of median-sets and k. | |||||
def test_preimage_mix_2combination_all_pairs(): | |||||
from preimage_iam import preimage_iam_random_mix, compute_kernel | |||||
from iam import median_distance | |||||
def test_preimage_iam_grid_k_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
@@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs(): | |||||
gkernel = 'marginalizedkernel' | gkernel = 'marginalizedkernel' | ||||
lmbda = 0.03 # termination probalility | lmbda = 0.03 # termination probalility | ||||
r_max = 10 # iteration limit for pre-image. | |||||
l_max = 500 # update limit for random generation | |||||
alpha_range = np.linspace(0.5, 0.5, 1) | |||||
k = 5 # k nearest neighbors | |||||
r_max = 5 # iteration limit for pre-image. | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
# k = 5 # k nearest neighbors | |||||
epsilon = 1e-6 | epsilon = 1e-6 | ||||
InitIAMWithAllDk = True | InitIAMWithAllDk = True | ||||
InitRandomWithAllDk = True | |||||
# parameters for GED function | # parameters for GED function | ||||
ged_cost='CHEM_1' | ged_cost='CHEM_1' | ||||
ged_method='IPFP' | ged_method='IPFP' | ||||
@@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs(): | |||||
removeNodes = True | removeNodes = True | ||||
connected_iam = False | connected_iam = False | ||||
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||||
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||||
# test on each pair of graphs. | |||||
# for idx1 in range(len(Gn) - 1, -1, -1): | |||||
# for idx2 in range(idx1, -1, -1): | |||||
for idx1 in range(187, 188): | |||||
for idx2 in range(167, 168): | |||||
g1 = Gn[idx1].copy() | |||||
g2 = Gn[idx2].copy() | |||||
# Gn[10] = [] | |||||
# Gn[10] = [] | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# number of nearest neighbors. | |||||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list = [] | |||||
nb_updated_k_list = [] | |||||
g_best = [] | |||||
for idx_nb, nb_median in enumerate(nb_median_range): | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time_list.append([]) | |||||
dis_ks_min_list.append([]) | |||||
sod_gs_list.append([]) | |||||
sod_gs_min_list.append([]) | |||||
nb_updated_list.append([]) | |||||
nb_updated_k_list.append([]) | |||||
g_best.append([]) | |||||
for k in k_range: | |||||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||||
print('k =', k) | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||||
preimage_iam(Gn, Gn_median, | |||||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||||
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list[idx_nb].append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list[idx_nb].append(dhat) | |||||
g_best[idx_nb].append(ghat_list) | |||||
print('\nnumber of updates of the best graph by IAM: ', nb_updated) | |||||
nb_updated_list[idx_nb].append(nb_updated) | |||||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k) | |||||
nb_updated_k_list[idx_nb].append(nb_updated_k) | |||||
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||||
plt.show() | |||||
plt.clf() | |||||
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||||
plt.show() | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||||
'_k' + str(k) + '.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | plt.clf() | ||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list[idx_nb].append(sod_tmp) | |||||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||||
sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||||
nb_updated_list) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||||
nb_updated_k_list) | |||||
print('\ntimes:', time_list) | |||||
################################################################### | |||||
# Gn_mix = [g.copy() for g in Gn] | |||||
# Gn_mix.append(g1.copy()) | |||||
# Gn_mix.append(g2.copy()) | |||||
# | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# | |||||
# # write Gram matrix to file and read it. | |||||
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
km = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
for i in range(len(Gn)): | |||||
km[i, len(Gn)] = km[i, idx1] | |||||
km[i, len(Gn) + 1] = km[i, idx2] | |||||
km[len(Gn), i] = km[i, idx1] | |||||
km[len(Gn) + 1, i] = km[i, idx2] | |||||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
################################################################### | |||||
# # use only the two graphs in median set as candidates. | |||||
# Gn = [g1.copy(), g2.copy()] | |||||
# Gn_mix = Gn + [g1.copy(), g2.copy()] | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
def test_preimage_iam_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list_iam = [] | |||||
nb_updated_list_random = [] | |||||
nb_updated_k_list_iam = [] | |||||
nb_updated_k_list_random = [] | |||||
g_best = [] | |||||
# for each alpha | |||||
for alpha in alpha_range: | |||||
print('\n-------------------------------------------------------\n') | |||||
print('alpha =', alpha) | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||||
nb_updated_k_iam, nb_updated_k_random = \ | |||||
preimage_iam_random_mix(Gn, [g1, g2], | |||||
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
InitRandomWithAllDk=InitRandomWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat_list) | |||||
nb_updated_list_iam.append(nb_updated_iam) | |||||
nb_updated_list_random.append(nb_updated_random) | |||||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||||
nb_updated_k_list_random.append(nb_updated_k_random) | |||||
# show best graphs and save them to file. | |||||
for idx, item in enumerate(alpha_range): | |||||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||||
+ '_alpha' + str(item) + '.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(g_best[idx][0].nodes(data=True)) | |||||
# print(g_best[idx][0].edges(data=True)) | |||||
# for g in g_best[idx]: | |||||
# draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
## nx.draw_networkx(g) | |||||
## plt.show() | |||||
# print(g.nodes(data=True)) | |||||
# print(g.edges(data=True)) | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 10 # iteration limit for pre-image. | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
k = 5 # k nearest neighbors | |||||
epsilon = 1e-6 | |||||
InitIAMWithAllDk = True | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# parameters for IAM function | |||||
c_ei=1 | |||||
c_er=1 | |||||
c_es=1 | |||||
ite_max_iam = 50 | |||||
epsilon_iam = 0.001 | |||||
removeNodes = True | |||||
connected_iam = False | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list = [] | |||||
nb_updated_k_list = [] | |||||
g_best = [] | |||||
for nb_median in nb_median_range: | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
# compute the corresponding sod in graph space. | |||||
for idx, item in enumerate(alpha_range): | |||||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||||
print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||||
nb_updated_list_random) | |||||
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||||
nb_updated_k_list_iam) | |||||
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||||
nb_updated_k_list_random) | |||||
print('\ntimes:', time_list) | |||||
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||||
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||||
preimage_iam(Gn, Gn_median, | |||||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||||
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||||
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||||
with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||||
content = file.read() | |||||
file.seek(0, 0) | |||||
file.write(str_fw + content) | |||||
time_total = time.time() - time0 + time_km | |||||
print('\ntime: ', time_total) | |||||
time_list.append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat_list) | |||||
print('\nnumber of updates of the best graph: ', nb_updated) | |||||
nb_updated_list.append(nb_updated) | |||||
print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||||
nb_updated_k_list.append(nb_updated_k) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
with_labels=True) | |||||
# plt.show() | |||||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||||
'.png', format="PNG") | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||||
nb_updated_list) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||||
nb_updated_k_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
def test_gkiam_2combination_all_pairs(): | def test_gkiam_2combination_all_pairs(): | ||||
from preimage_iam import preimage_iam, compute_kernel | |||||
from iam import median_distance | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
@@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs(): | |||||
# compute the corresponding sod in graph space. | # compute the corresponding sod in graph space. | ||||
for idx, item in enumerate(alpha_range): | for idx, item in enumerate(alpha_range): | ||||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | ged_method=ged_method, saveGXL=saveGXL) | ||||
sod_gs_list.append(sod_tmp) | sod_gs_list.append(sod_tmp) | ||||
sod_gs_min_list.append(np.min(sod_tmp)) | sod_gs_min_list.append(np.min(sod_tmp)) | ||||
@@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs(): | |||||
def test_gkiam_2combination(): | def test_gkiam_2combination(): | ||||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||||
from iam import median_distance | |||||
from gk_iam import gk_iam_nearest_multi | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | ||||
'extra_params': {}} # node/edge symb | 'extra_params': {}} # node/edge symb | ||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | ||||
@@ -451,7 +578,7 @@ def test_gkiam_2combination(): | |||||
# compute the corresponding sod in graph space. | # compute the corresponding sod in graph space. | ||||
for idx, item in enumerate(alpha_range): | for idx, item in enumerate(alpha_range): | ||||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | ged_method=ged_method, saveGXL=saveGXL) | ||||
sod_gs_list.append(sod_tmp) | sod_gs_list.append(sod_tmp) | ||||
sod_gs_min_list.append(np.min(sod_tmp)) | sod_gs_min_list.append(np.min(sod_tmp)) | ||||
@@ -463,148 +590,6 @@ def test_gkiam_2combination(): | |||||
print('\ntimes:', time_list) | print('\ntimes:', time_list) | ||||
def test_random_preimage_2combination(): | |||||
# from gk_iam import compute_kernel | |||||
from preimage_random import preimage_random | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:12] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||||
# print(dis_max, dis_min, dis_mean) | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 10 # iteration limit for pre-image. | |||||
l = 500 | |||||
alpha_range = np.linspace(0, 1, 11) | |||||
k = 5 # k nearest neighbors | |||||
# randomly select two molecules | |||||
np.random.seed(1) | |||||
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||||
g1 = Gn[idx_gi[0]].copy() | |||||
g2 = Gn[idx_gi[1]].copy() | |||||
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
# plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||||
# plt.show() | |||||
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
# plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||||
# plt.show() | |||||
###################################################################### | |||||
# Gn_mix = [g.copy() for g in Gn] | |||||
# Gn_mix.append(g1.copy()) | |||||
# Gn_mix.append(g2.copy()) | |||||
# | |||||
## g_tmp = iam([g1, g2]) | |||||
## nx.draw_networkx(g_tmp) | |||||
## plt.show() | |||||
# | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
################################################################### | |||||
idx1 = idx_gi[0] | |||||
idx2 = idx_gi[1] | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
km = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
for i in range(len(Gn)): | |||||
km[i, len(Gn)] = km[i, idx1] | |||||
km[i, len(Gn) + 1] = km[i, idx2] | |||||
km[len(Gn), i] = km[i, idx1] | |||||
km[len(Gn) + 1, i] = km[i, idx2] | |||||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
################################################################### | |||||
time_list = [] | |||||
nb_updated_list = [] | |||||
g_best = [] | |||||
dis_ks_min_list = [] | |||||
# for each alpha | |||||
for alpha in alpha_range: | |||||
print('\n-------------------------------------------------------\n') | |||||
print('alpha =', alpha) | |||||
time0 = time.time() | |||||
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||||
range(len(Gn), len(Gn) + 2), km, | |||||
k, r_max, l, gkernel) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat) | |||||
nb_updated_list.append(nb_updated) | |||||
# show best graphs and save them to file. | |||||
for idx, item in enumerate(alpha_range): | |||||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||||
plt.show() | |||||
plt.clf() | |||||
print(g_best[idx].nodes(data=True)) | |||||
print(g_best[idx].edges(data=True)) | |||||
# # compute the corresponding sod in graph space. (alpha range not considered.) | |||||
# sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
# sod_gs_list.append(sod_tmp) | |||||
# sod_gs_min_list.append(np.min(sod_tmp)) | |||||
# sod_ks_min_list.append(sod_ks) | |||||
# nb_updated_list.append(nb_updated) | |||||
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# help functions | |||||
def remove_edges(Gn): | |||||
for G in Gn: | |||||
for _, _, attrs in G.edges(data=True): | |||||
attrs.clear() | |||||
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||||
from gk_iam import compute_kernel | |||||
dis_mat = np.empty((len(Gn), len(Gn))) | |||||
if Kmatrix == None: | |||||
Kmatrix = compute_kernel(Gn, gkernel, True) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||||
if dis < 0: | |||||
if dis > -1e-10: | |||||
dis = 0 | |||||
else: | |||||
raise ValueError('The distance is negative.') | |||||
dis_mat[i, j] = np.sqrt(dis) | |||||
dis_mat[j, i] = dis_mat[i, j] | |||||
dis_max = np.max(np.max(dis_mat)) | |||||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||||
dis_mean = np.mean(np.mean(dis_mat)) | |||||
return dis_mat, dis_max, dis_min, dis_mean | |||||
############################################################################### | ############################################################################### | ||||
@@ -612,7 +597,13 @@ if __name__ == '__main__': | |||||
############################################################################### | ############################################################################### | ||||
# test on the combination of the two randomly chosen graphs. (the same as in the | # test on the combination of the two randomly chosen graphs. (the same as in the | ||||
# random pre-image paper.) | # random pre-image paper.) | ||||
# test_random_preimage_2combination() | |||||
# test_gkiam_2combination() | # test_gkiam_2combination() | ||||
# test_gkiam_2combination_all_pairs() | # test_gkiam_2combination_all_pairs() | ||||
test_preimage_mix_2combination_all_pairs() | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
test_preimage_iam_median_nb() | |||||
############################################################################### | |||||
# tests on different values on grid of median-sets and k. | |||||
# test_preimage_iam_grid_k_median_nb() |
@@ -0,0 +1,542 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Sep 5 15:59:00 2019 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
import matplotlib.pyplot as plt | |||||
import time | |||||
import random | |||||
#from tqdm import tqdm | |||||
#import os | |||||
import sys | |||||
sys.path.insert(0, "../") | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from ged import ged_median | |||||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||||
from preimage_iam import preimage_iam_random_mix | |||||
############################################################################### | |||||
# tests on different values on grid of median-sets and k. | |||||
def test_preimage_mix_grid_k_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 5 # iteration limit for pre-image. | |||||
l_max = 500 # update limit for random generation | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
# k = 5 # k nearest neighbors | |||||
epsilon = 1e-6 | |||||
InitIAMWithAllDk = True | |||||
InitRandomWithAllDk = True | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# parameters for IAM function | |||||
c_ei=1 | |||||
c_er=1 | |||||
c_es=1 | |||||
ite_max_iam = 50 | |||||
epsilon_iam = 0.001 | |||||
removeNodes = True | |||||
connected_iam = False | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# number of nearest neighbors. | |||||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list_iam = [] | |||||
nb_updated_list_random = [] | |||||
nb_updated_k_list_iam = [] | |||||
nb_updated_k_list_random = [] | |||||
g_best = [] | |||||
for idx_nb, nb_median in enumerate(nb_median_range): | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time_list.append([]) | |||||
dis_ks_min_list.append([]) | |||||
sod_gs_list.append([]) | |||||
sod_gs_min_list.append([]) | |||||
nb_updated_list_iam.append([]) | |||||
nb_updated_list_random.append([]) | |||||
nb_updated_k_list_iam.append([]) | |||||
nb_updated_k_list_random.append([]) | |||||
g_best.append([]) | |||||
for k in k_range: | |||||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||||
print('k =', k) | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||||
nb_updated_k_iam, nb_updated_k_random = \ | |||||
preimage_iam_random_mix(Gn, Gn_median, | |||||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
InitRandomWithAllDk=InitRandomWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list[idx_nb].append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list[idx_nb].append(dhat) | |||||
g_best[idx_nb].append(ghat_list) | |||||
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||||
nb_updated_list_iam[idx_nb].append(nb_updated_iam) | |||||
print('\nnumber of updates of the best graph by random generation: ', | |||||
nb_updated_random) | |||||
nb_updated_list_random[idx_nb].append(nb_updated_random) | |||||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||||
nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam) | |||||
print('\nnumber of updates of k nearest graphs by random generation: ', | |||||
nb_updated_k_random) | |||||
nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||||
'_k' + str(k) + '.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list[idx_nb].append(sod_tmp) | |||||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||||
sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||||
nb_updated_list_iam) | |||||
print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', | |||||
nb_updated_list_random) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||||
nb_updated_k_list_iam) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', | |||||
nb_updated_k_list_random) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
def test_preimage_mix_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 5 # iteration limit for pre-image. | |||||
l_max = 500 # update limit for random generation | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
k = 5 # k nearest neighbors | |||||
epsilon = 1e-6 | |||||
InitIAMWithAllDk = True | |||||
InitRandomWithAllDk = True | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# parameters for IAM function | |||||
c_ei=1 | |||||
c_er=1 | |||||
c_es=1 | |||||
ite_max_iam = 50 | |||||
epsilon_iam = 0.001 | |||||
removeNodes = True | |||||
connected_iam = False | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list_iam = [] | |||||
nb_updated_list_random = [] | |||||
nb_updated_k_list_iam = [] | |||||
nb_updated_k_list_random = [] | |||||
g_best = [] | |||||
for nb_median in nb_median_range: | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||||
nb_updated_k_iam, nb_updated_k_random = \ | |||||
preimage_iam_random_mix(Gn, Gn_median, | |||||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
InitRandomWithAllDk=InitRandomWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat_list) | |||||
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||||
nb_updated_list_iam.append(nb_updated_iam) | |||||
print('\nnumber of updates of the best graph by random generation: ', | |||||
nb_updated_random) | |||||
nb_updated_list_random.append(nb_updated_random) | |||||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||||
print('\nnumber of updates of k nearest graphs by random generation: ', | |||||
nb_updated_k_random) | |||||
nb_updated_k_list_random.append(nb_updated_k_random) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||||
'.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||||
nb_updated_list_iam) | |||||
print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', | |||||
nb_updated_list_random) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||||
nb_updated_k_list_iam) | |||||
print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', | |||||
nb_updated_k_list_random) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
def test_preimage_mix_2combination_all_pairs(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 10 # iteration limit for pre-image. | |||||
l_max = 500 # update limit for random generation | |||||
alpha_range = np.linspace(0.5, 0.5, 1) | |||||
k = 5 # k nearest neighbors | |||||
epsilon = 1e-6 | |||||
InitIAMWithAllDk = True | |||||
InitRandomWithAllDk = True | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# parameters for IAM function | |||||
c_ei=1 | |||||
c_er=1 | |||||
c_es=1 | |||||
ite_max_iam = 50 | |||||
epsilon_iam = 0.001 | |||||
removeNodes = True | |||||
connected_iam = False | |||||
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||||
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||||
# test on each pair of graphs. | |||||
# for idx1 in range(len(Gn) - 1, -1, -1): | |||||
# for idx2 in range(idx1, -1, -1): | |||||
for idx1 in range(187, 188): | |||||
for idx2 in range(167, 168): | |||||
g1 = Gn[idx1].copy() | |||||
g2 = Gn[idx2].copy() | |||||
# Gn[10] = [] | |||||
# Gn[10] = [] | |||||
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||||
plt.show() | |||||
plt.clf() | |||||
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||||
plt.show() | |||||
plt.clf() | |||||
################################################################### | |||||
# Gn_mix = [g.copy() for g in Gn] | |||||
# Gn_mix.append(g1.copy()) | |||||
# Gn_mix.append(g2.copy()) | |||||
# | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# | |||||
# # write Gram matrix to file and read it. | |||||
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
km = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
for i in range(len(Gn)): | |||||
km[i, len(Gn)] = km[i, idx1] | |||||
km[i, len(Gn) + 1] = km[i, idx2] | |||||
km[len(Gn), i] = km[i, idx1] | |||||
km[len(Gn) + 1, i] = km[i, idx2] | |||||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
################################################################### | |||||
# # use only the two graphs in median set as candidates. | |||||
# Gn = [g1.copy(), g2.copy()] | |||||
# Gn_mix = Gn + [g1.copy(), g2.copy()] | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list_iam = [] | |||||
nb_updated_list_random = [] | |||||
nb_updated_k_list_iam = [] | |||||
nb_updated_k_list_random = [] | |||||
g_best = [] | |||||
# for each alpha | |||||
for alpha in alpha_range: | |||||
print('\n-------------------------------------------------------\n') | |||||
print('alpha =', alpha) | |||||
time0 = time.time() | |||||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||||
nb_updated_k_iam, nb_updated_k_random = \ | |||||
preimage_iam_random_mix(Gn, [g1, g2], | |||||
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||||
InitRandomWithAllDk=InitRandomWithAllDk, | |||||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||||
'saveGXL': saveGXL}) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat_list) | |||||
nb_updated_list_iam.append(nb_updated_iam) | |||||
nb_updated_list_random.append(nb_updated_random) | |||||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||||
nb_updated_k_list_random.append(nb_updated_k_random) | |||||
# show best graphs and save them to file. | |||||
for idx, item in enumerate(alpha_range): | |||||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||||
+ '_alpha' + str(item) + '.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(g_best[idx][0].nodes(data=True)) | |||||
# print(g_best[idx][0].edges(data=True)) | |||||
# for g in g_best[idx]: | |||||
# draw_Letter_graph(g, savepath='results/gk_iam/') | |||||
## nx.draw_networkx(g) | |||||
## plt.show() | |||||
# print(g.nodes(data=True)) | |||||
# print(g.edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
for idx, item in enumerate(alpha_range): | |||||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||||
print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||||
nb_updated_list_random) | |||||
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||||
nb_updated_k_list_iam) | |||||
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||||
nb_updated_k_list_random) | |||||
print('\ntimes:', time_list) | |||||
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||||
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||||
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||||
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||||
with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||||
content = file.read() | |||||
file.seek(0, 0) | |||||
file.write(str_fw + content) | |||||
############################################################################### | |||||
if __name__ == '__main__': | |||||
############################################################################### | |||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
# test_preimage_mix_2combination_all_pairs() | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
# test_preimage_mix_median_nb() | |||||
############################################################################### | |||||
# tests on different values on grid of median-sets and k. | |||||
test_preimage_mix_grid_k_median_nb() |
@@ -0,0 +1,402 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Sep 5 15:59:00 2019 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
import matplotlib.pyplot as plt | |||||
import time | |||||
import random | |||||
#from tqdm import tqdm | |||||
#import os | |||||
import sys | |||||
sys.path.insert(0, "../") | |||||
from pygraph.utils.graphfiles import loadDataset | |||||
from preimage_random import preimage_random | |||||
from ged import ged_median | |||||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||||
############################################################################### | |||||
# tests on different values on grid of median-sets and k. | |||||
def test_preimage_random_grid_k_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 5 # iteration limit for pre-image. | |||||
l = 500 # update limit for random generation | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
# k = 5 # k nearest neighbors | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# number of nearest neighbors. | |||||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list = [] | |||||
g_best = [] | |||||
for idx_nb, nb_median in enumerate(nb_median_range): | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time_list.append([]) | |||||
dis_ks_min_list.append([]) | |||||
sod_gs_list.append([]) | |||||
sod_gs_min_list.append([]) | |||||
nb_updated_list.append([]) | |||||
g_best.append([]) | |||||
for k in k_range: | |||||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||||
print('k =', k) | |||||
time0 = time.time() | |||||
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||||
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list[idx_nb].append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list[idx_nb].append(dhat) | |||||
g_best[idx_nb].append(ghat) | |||||
print('\nnumber of updates of the best graph: ', nb_updated) | |||||
nb_updated_list[idx_nb].append(nb_updated) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||||
'_k' + str(k) + '.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list[idx_nb].append(sod_tmp) | |||||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||||
sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||||
nb_updated_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# tests on different numbers of median-sets. | |||||
def test_preimage_random_median_nb(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:50] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 5 # iteration limit for pre-image. | |||||
l = 500 # update limit for random generation | |||||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||||
k = 5 # k nearest neighbors | |||||
# parameters for GED function | |||||
ged_cost='CHEM_1' | |||||
ged_method='IPFP' | |||||
saveGXL='gedlib' | |||||
# number of graphs; we what to compute the median of these graphs. | |||||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||||
# find out all the graphs classified to positive group 1. | |||||
idx_dict = get_same_item_indices(y_all) | |||||
Gn = [Gn[i] for i in idx_dict[1]] | |||||
# # compute Gram matrix. | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
# # write Gram matrix to file. | |||||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||||
time_list = [] | |||||
dis_ks_min_list = [] | |||||
sod_gs_list = [] | |||||
sod_gs_min_list = [] | |||||
nb_updated_list = [] | |||||
g_best = [] | |||||
for nb_median in nb_median_range: | |||||
print('\n-------------------------------------------------------') | |||||
print('number of median graphs =', nb_median) | |||||
random.seed(1) | |||||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||||
print('graphs chosen:', idx_rdm) | |||||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||||
# for g in Gn_median: | |||||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||||
# plt.show() | |||||
# plt.clf() | |||||
################################################################### | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||||
km_tmp = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
km[i, j] = km_tmp[i, j] | |||||
km[j, i] = km[i, j] | |||||
for i in range(len(Gn)): | |||||
for j, idx in enumerate(idx_rdm): | |||||
km[i, len(Gn) + j] = km[i, idx] | |||||
km[len(Gn) + j, i] = km[i, idx] | |||||
for i, idx1 in enumerate(idx_rdm): | |||||
for j, idx2 in enumerate(idx_rdm): | |||||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||||
################################################################### | |||||
alpha_range = [1 / nb_median] * nb_median | |||||
time0 = time.time() | |||||
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||||
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
print('\nsmallest distance in kernel space: ', dhat) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat) | |||||
print('\nnumber of updates of the best graph: ', nb_updated) | |||||
nb_updated_list.append(nb_updated) | |||||
# show the best graph and save it to file. | |||||
print('the shortest distance is', dhat) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||||
with_labels=True) | |||||
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||||
'.png', format="PNG") | |||||
# plt.show() | |||||
plt.clf() | |||||
# print(ghat_list[0].nodes(data=True)) | |||||
# print(ghat_list[0].edges(data=True)) | |||||
# compute the corresponding sod in graph space. | |||||
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||||
ged_method=ged_method, saveGXL=saveGXL) | |||||
sod_gs_list.append(sod_tmp) | |||||
sod_gs_min_list.append(np.min(sod_tmp)) | |||||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||||
print('\nsods in graph space: ', sod_gs_list) | |||||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||||
dis_ks_min_list) | |||||
print('\nnumber of updates of the best graph for each set of median graphs: ', | |||||
nb_updated_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
def test_random_preimage_2combination(): | |||||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||||
'extra_params': {}} # node/edge symb | |||||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||||
# Gn = Gn[0:12] | |||||
remove_edges(Gn) | |||||
gkernel = 'marginalizedkernel' | |||||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||||
# print(dis_max, dis_min, dis_mean) | |||||
lmbda = 0.03 # termination probalility | |||||
r_max = 10 # iteration limit for pre-image. | |||||
l = 500 | |||||
alpha_range = np.linspace(0, 1, 11) | |||||
k = 5 # k nearest neighbors | |||||
# randomly select two molecules | |||||
np.random.seed(1) | |||||
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||||
g1 = Gn[idx_gi[0]].copy() | |||||
g2 = Gn[idx_gi[1]].copy() | |||||
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||||
# plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||||
# plt.show() | |||||
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||||
# plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||||
# plt.show() | |||||
###################################################################### | |||||
# Gn_mix = [g.copy() for g in Gn] | |||||
# Gn_mix.append(g1.copy()) | |||||
# Gn_mix.append(g2.copy()) | |||||
# | |||||
## g_tmp = iam([g1, g2]) | |||||
## nx.draw_networkx(g_tmp) | |||||
## plt.show() | |||||
# | |||||
# # compute | |||||
# time0 = time.time() | |||||
# km = compute_kernel(Gn_mix, gkernel, True) | |||||
# time_km = time.time() - time0 | |||||
################################################################### | |||||
idx1 = idx_gi[0] | |||||
idx2 = idx_gi[1] | |||||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||||
km = gmfile['gm'] | |||||
time_km = gmfile['gmtime'] | |||||
# modify mixed gram matrix. | |||||
for i in range(len(Gn)): | |||||
km[i, len(Gn)] = km[i, idx1] | |||||
km[i, len(Gn) + 1] = km[i, idx2] | |||||
km[len(Gn), i] = km[i, idx1] | |||||
km[len(Gn) + 1, i] = km[i, idx2] | |||||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||||
################################################################### | |||||
time_list = [] | |||||
nb_updated_list = [] | |||||
g_best = [] | |||||
dis_ks_min_list = [] | |||||
# for each alpha | |||||
for alpha in alpha_range: | |||||
print('\n-------------------------------------------------------\n') | |||||
print('alpha =', alpha) | |||||
time0 = time.time() | |||||
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||||
range(len(Gn), len(Gn) + 2), km, | |||||
k, r_max, l, gkernel) | |||||
time_total = time.time() - time0 + time_km | |||||
print('time: ', time_total) | |||||
time_list.append(time_total) | |||||
dis_ks_min_list.append(dhat) | |||||
g_best.append(ghat) | |||||
nb_updated_list.append(nb_updated) | |||||
# show best graphs and save them to file. | |||||
for idx, item in enumerate(alpha_range): | |||||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||||
print('one of the possible corresponding pre-images is') | |||||
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||||
with_labels=True) | |||||
plt.show() | |||||
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||||
plt.clf() | |||||
print(g_best[idx].nodes(data=True)) | |||||
print(g_best[idx].edges(data=True)) | |||||
# # compute the corresponding sod in graph space. (alpha range not considered.) | |||||
# sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||||
# sod_gs_list.append(sod_tmp) | |||||
# sod_gs_min_list.append(np.min(sod_tmp)) | |||||
# sod_ks_min_list.append(sod_ks) | |||||
# nb_updated_list.append(nb_updated) | |||||
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||||
print('\nnumber of updates for each alpha: ', nb_updated_list) | |||||
print('\ntimes:', time_list) | |||||
############################################################################### | |||||
if __name__ == '__main__': | |||||
############################################################################### | |||||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||||
# random pre-image paper.) | |||||
# test_random_preimage_2combination() | |||||
############################################################################### | |||||
# tests all algorithms on different numbers of median-sets. | |||||
test_preimage_random_median_nb() | |||||
############################################################################### | |||||
# tests all algorithms on different values on grid of median-sets and k. | |||||
# test_preimage_random_grid_k_median_nb() |
@@ -0,0 +1,109 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Oct 17 19:05:07 2019 | |||||
Useful functions. | |||||
@author: ljia | |||||
""" | |||||
#import networkx as nx | |||||
import multiprocessing | |||||
import numpy as np | |||||
import sys | |||||
sys.path.insert(0, "../") | |||||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||||
from pygraph.kernels.spKernel import spkernel | |||||
import functools | |||||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||||
def remove_edges(Gn): | |||||
for G in Gn: | |||||
for _, _, attrs in G.edges(data=True): | |||||
attrs.clear() | |||||
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||||
term1 = Kmatrix[idx_g, idx_g] | |||||
term2 = 0 | |||||
for i, a in enumerate(alpha): | |||||
term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||||
term2 *= 2 | |||||
if withterm3 == False: | |||||
for i1, a1 in enumerate(alpha): | |||||
for i2, a2 in enumerate(alpha): | |||||
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||||
return np.sqrt(term1 - term2 + term3) | |||||
def compute_kernel(Gn, graph_kernel, verbose): | |||||
if graph_kernel == 'marginalizedkernel': | |||||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'untilhpathkernel': | |||||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||||
depth=10, k_func='MinMax', compute_method='trie', | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'spkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
elif graph_kernel == 'structuralspkernel': | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||||
# normalization | |||||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||||
for i in range(len(Kmatrix)): | |||||
for j in range(i, len(Kmatrix)): | |||||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
return Kmatrix | |||||
def gram2distances(Kmatrix): | |||||
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||||
for i1 in range(len(Kmatrix)): | |||||
for i2 in range(len(Kmatrix)): | |||||
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||||
dmatrix = np.sqrt(dmatrix) | |||||
return dmatrix | |||||
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||||
dis_mat = np.empty((len(Gn), len(Gn))) | |||||
if Kmatrix == None: | |||||
Kmatrix = compute_kernel(Gn, gkernel, True) | |||||
for i in range(len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||||
if dis < 0: | |||||
if dis > -1e-10: | |||||
dis = 0 | |||||
else: | |||||
raise ValueError('The distance is negative.') | |||||
dis_mat[i, j] = np.sqrt(dis) | |||||
dis_mat[j, i] = dis_mat[i, j] | |||||
dis_max = np.max(np.max(dis_mat)) | |||||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||||
dis_mean = np.mean(np.mean(dis_mat)) | |||||
return dis_mat, dis_max, dis_min, dis_mean | |||||
def get_same_item_indices(ls): | |||||
"""Get the indices of the same items in a list. Return a dict keyed by items. | |||||
""" | |||||
idx_dict = {} | |||||
for idx, item in enumerate(ls): | |||||
if item in idx_dict: | |||||
idx_dict[item].append(idx) | |||||
else: | |||||
idx_dict[item] = [idx] | |||||
return idx_dict |