@@ -0,0 +1,103 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Oct 16 14:20:06 2019 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
from tqdm import tqdm | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from ged import GED, get_nb_edit_operations | |||
from utils import kernel_distance_matrix | |||
def fit_GED_to_kernel_distance(Gn, gkernel, itr_max): | |||
c_vi = 1 | |||
c_vr = 1 | |||
c_vs = 1 | |||
c_ei = 1 | |||
c_er = 1 | |||
c_es = 1 | |||
# compute distances in feature space. | |||
dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
dis_k_vec = [] | |||
for i in range(len(dis_k_mat)): | |||
for j in range(i, len(dis_k_mat)): | |||
dis_k_vec.append(dis_k_mat[i, j]) | |||
dis_k_vec = np.array(dis_k_vec) | |||
residual_list = [] | |||
edit_cost_list = [] | |||
for itr in range(itr_max): | |||
print('iteration', itr) | |||
ged_all = [] | |||
n_vi_all = [] | |||
n_vr_all = [] | |||
n_vs_all = [] | |||
n_ei_all = [] | |||
n_er_all = [] | |||
n_es_all = [] | |||
# compute GEDs and numbers of edit operations. | |||
edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es] | |||
edit_cost_list.append(edit_cost_constant) | |||
for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout): | |||
# for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], lib='gedlibpy', | |||
cost='CONSTANT', method='IPFP', | |||
edit_cost_constant=edit_cost_constant, stabilizer='min', | |||
repeat=30) | |||
ged_all.append(dis) | |||
n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(Gn[i], | |||
Gn[j], pi_forward, pi_backward) | |||
n_vi_all.append(n_vi) | |||
n_vr_all.append(n_vr) | |||
n_vs_all.append(n_vs) | |||
n_ei_all.append(n_ei) | |||
n_er_all.append(n_er) | |||
n_es_all.append(n_es) | |||
residual = np.sqrt(np.sum(np.square(np.array(ged_all) - dis_k_vec))) | |||
residual_list.append(residual) | |||
# "fit" geds to distances in feature space by tuning edit costs using the | |||
# Least Squares Method. | |||
nb_cost_mat = np.column_stack((np.array(n_vi_all), np.array(n_vr_all), | |||
np.array(n_vs_all), np.array(n_ei_all), | |||
np.array(n_er_all), np.array(n_es_all))) | |||
edit_costs, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
rcond=None) | |||
for i in range(len(edit_costs)): | |||
if edit_costs[i] < 0: | |||
if edit_costs[i] > -1e-3: | |||
edit_costs[i] = 0 | |||
# else: | |||
# raise ValueError('The edit cost is negative.') | |||
c_vi = edit_costs[0] | |||
c_vr = edit_costs[1] | |||
c_vs = edit_costs[2] | |||
c_ei = edit_costs[3] | |||
c_er = edit_costs[4] | |||
c_es = edit_costs[5] | |||
return c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list | |||
if __name__ == '__main__': | |||
from utils import remove_edges | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
Gn = Gn[0:10] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
itr_max = 10 | |||
c_vi, c_vr, c_vs, c_ei, c_er, c_es, residual_list, edit_cost_list = \ | |||
fit_GED_to_kernel_distance(Gn, gkernel, itr_max) |
@@ -0,0 +1,197 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Oct 17 18:44:59 2019 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
from tqdm import tqdm | |||
import sys | |||
from gedlibpy import librariesImport, gedlibpy | |||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
edit_cost_constant=[], saveGXL='benoit', stabilizer='min', repeat=50): | |||
""" | |||
Compute GED for 2 graphs. | |||
""" | |||
if lib == 'gedlibpy': | |||
def convertGraph(G): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
gedlibpy.restart_env() | |||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
listID = gedlibpy.get_all_graph_ids() | |||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
gedlibpy.init() | |||
gedlibpy.set_method(method, "") | |||
gedlibpy.init_method() | |||
g = listID[0] | |||
h = listID[1] | |||
if stabilizer == None: | |||
gedlibpy.run_method(g, h) | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
upper = gedlibpy.get_upper_bound(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
elif stabilizer == 'min': | |||
upper = np.inf | |||
for itr in range(repeat): | |||
gedlibpy.run_method(g, h) | |||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
if upper_tmp < upper: | |||
upper = upper_tmp | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
if upper == 0: | |||
break | |||
dis = upper | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in g1.nodes()] | |||
nodes2 = [n for n in g2.nodes()] | |||
nb1 = nx.number_of_nodes(g1) | |||
nb2 = nx.number_of_nodes(g2) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
return dis, pi_forward, pi_backward | |||
def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', | |||
edit_cost_constant=[], stabilizer='min', repeat=50): | |||
""" | |||
Compute GEDs for a group of graphs. | |||
""" | |||
if lib == 'gedlibpy': | |||
def convertGraph(G): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
gedlibpy.restart_env() | |||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
listID = gedlibpy.get_all_graph_ids() | |||
gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant) | |||
gedlibpy.init() | |||
gedlibpy.set_method(method, "") | |||
gedlibpy.init_method() | |||
g = listID[0] | |||
h = listID[1] | |||
if stabilizer == None: | |||
gedlibpy.run_method(g, h) | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
upper = gedlibpy.get_upper_bound(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
elif stabilizer == 'min': | |||
upper = np.inf | |||
for itr in range(repeat): | |||
gedlibpy.run_method(g, h) | |||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
if upper_tmp < upper: | |||
upper = upper_tmp | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
if upper == 0: | |||
break | |||
dis = upper | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in g1.nodes()] | |||
nodes2 = [n for n in g2.nodes()] | |||
nb1 = nx.number_of_nodes(g1) | |||
nb2 = nx.number_of_nodes(g2) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
return dis, pi_forward, pi_backward | |||
def ged_median(Gn, Gn_median, measure='ged', verbose=False, | |||
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||
dis_list = [] | |||
pi_forward_list = [] | |||
for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||
file=sys.stdout) if verbose else enumerate(Gn): | |||
dis_sum = 0 | |||
pi_forward_list.append([]) | |||
for G_p in Gn_median: | |||
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||
cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||
pi_forward_list[idx].append(pi_tmp_forward) | |||
dis_sum += dis_tmp | |||
dis_list.append(dis_sum) | |||
return dis_list, pi_forward_list | |||
def get_nb_edit_operations(g1, g2, forward_map, backward_map): | |||
"""Compute the number of each edit operations. | |||
""" | |||
n_vi = 0 | |||
n_vr = 0 | |||
n_vs = 0 | |||
n_ei = 0 | |||
n_er = 0 | |||
n_es = 0 | |||
nodes1 = [n for n in g1.nodes()] | |||
for i, map_i in enumerate(forward_map): | |||
if map_i == np.inf: | |||
n_vr += 1 | |||
elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']: | |||
n_vs += 1 | |||
for map_i in backward_map: | |||
if map_i == np.inf: | |||
n_vi += 1 | |||
# idx_nodes1 = range(0, len(node1)) | |||
edges1 = [e for e in g1.edges()] | |||
nb_edges2_cnted = 0 | |||
for n1, n2 in edges1: | |||
idx1 = nodes1.index(n1) | |||
idx2 = nodes1.index(n2) | |||
# one of the nodes is removed, thus the edge is removed. | |||
if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf: | |||
n_er += 1 | |||
# corresponding edge is in g2. Edge label is not considered. | |||
elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \ | |||
(forward_map[idx2], forward_map[idx1]) in g2.edges(): | |||
nb_edges2_cnted += 1 | |||
# corresponding nodes are in g2, however the edge is removed. | |||
else: | |||
n_er += 1 | |||
n_ei = nx.number_of_edges(g2) - nb_edges2_cnted | |||
return n_vi, n_vr, n_vs, n_ei, n_er, n_es |
@@ -12,10 +12,10 @@ import networkx as nx | |||
from tqdm import tqdm | |||
import sys | |||
from gedlibpy import librariesImport, gedlibpy | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphdataset import get_dataset_attributes | |||
from pygraph.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels | |||
from ged import GED, ged_median | |||
def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
@@ -237,7 +237,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
# # find the best graph generated in this iteration and update pi_p. | |||
# @todo: should we update all graphs generated or just the best ones? | |||
dis_list, pi_forward_list = median_distance(G_new_list, Gn_median, | |||
dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, | |||
**params_ged) | |||
# @todo: should we remove the identical and connectivity check? | |||
# Don't know which is faster. | |||
@@ -362,7 +362,7 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
# phase 1: initilize. | |||
# compute set-median. | |||
dis_min = np.inf | |||
dis_list, pi_forward_all = median_distance(Gn_candidate, Gn_median, | |||
dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median, | |||
**params_ged) | |||
# find all smallest distances. | |||
if allBestInit: # try all best init graphs. | |||
@@ -426,96 +426,6 @@ def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, | |||
############################################################################### | |||
# Useful functions. | |||
def GED(g1, g2, lib='gedlibpy', cost='CHEM_1', method='IPFP', saveGXL='benoit', | |||
stabilizer='min'): | |||
""" | |||
Compute GED. | |||
""" | |||
if lib == 'gedlibpy': | |||
def convertGraph(G): | |||
"""Convert a graph to the proper NetworkX format that can be | |||
recognized by library gedlibpy. | |||
""" | |||
G_new = nx.Graph() | |||
for nd, attrs in G.nodes(data=True): | |||
G_new.add_node(str(nd), chem=attrs['atom']) | |||
for nd1, nd2, attrs in G.edges(data=True): | |||
# G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type']) | |||
G_new.add_edge(str(nd1), str(nd2)) | |||
return G_new | |||
gedlibpy.restart_env() | |||
gedlibpy.add_nx_graph(convertGraph(g1), "") | |||
gedlibpy.add_nx_graph(convertGraph(g2), "") | |||
listID = gedlibpy.get_all_graph_ids() | |||
gedlibpy.set_edit_cost(cost) | |||
gedlibpy.init() | |||
gedlibpy.set_method(method, "") | |||
gedlibpy.init_method() | |||
g = listID[0] | |||
h = listID[1] | |||
if stabilizer == None: | |||
gedlibpy.run_method(g, h) | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
upper = gedlibpy.get_upper_bound(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
elif stabilizer == 'min': | |||
upper = np.inf | |||
for itr in range(50): | |||
gedlibpy.run_method(g, h) | |||
upper_tmp = gedlibpy.get_upper_bound(g, h) | |||
if upper_tmp < upper: | |||
upper = upper_tmp | |||
pi_forward = gedlibpy.get_forward_map(g, h) | |||
pi_backward = gedlibpy.get_backward_map(g, h) | |||
lower = gedlibpy.get_lower_bound(g, h) | |||
if upper == 0: | |||
break | |||
dis = upper | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in g1.nodes()] | |||
nodes2 = [n for n in g2.nodes()] | |||
nb1 = nx.number_of_nodes(g1) | |||
nb2 = nx.number_of_nodes(g2) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
return dis, pi_forward, pi_backward | |||
def median_distance(Gn, Gn_median, measure='ged', verbose=False, | |||
ged_cost='CHEM_1', ged_method='IPFP', saveGXL='benoit'): | |||
dis_list = [] | |||
pi_forward_list = [] | |||
for idx, G in tqdm(enumerate(Gn), desc='computing median distances', | |||
file=sys.stdout) if verbose else enumerate(Gn): | |||
dis_sum = 0 | |||
pi_forward_list.append([]) | |||
for G_p in Gn_median: | |||
dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, | |||
cost=ged_cost, method=ged_method, saveGXL=saveGXL) | |||
pi_forward_list[idx].append(pi_tmp_forward) | |||
dis_sum += dis_tmp | |||
dis_list.append(dis_sum) | |||
return dis_list, pi_forward_list | |||
############################################################################### | |||
# Old implementations. | |||
@@ -13,20 +13,13 @@ and the iterative alternate minimizations (IAM) in reference [2]. | |||
""" | |||
import sys | |||
import numpy as np | |||
import multiprocessing | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import random | |||
from iam import iam_upgraded | |||
sys.path.insert(0, "../") | |||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
from pygraph.kernels.spKernel import spkernel | |||
import functools | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||
from utils import dis_gstar, compute_kernel | |||
def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
@@ -72,13 +65,13 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
# print(g.nodes(data=True)) | |||
# print(g.edges(data=True)) | |||
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||
for gi in Gk: | |||
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
# nx.draw_networkx(gi) | |||
plt.show() | |||
# draw_Letter_graph(g) | |||
print(gi.nodes(data=True)) | |||
print(gi.edges(data=True)) | |||
# for gi in Gk: | |||
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
## nx.draw_networkx(gi) | |||
# plt.show() | |||
## draw_Letter_graph(g) | |||
# print(gi.nodes(data=True)) | |||
# print(gi.edges(data=True)) | |||
# i = 1 | |||
r = 0 | |||
@@ -173,7 +166,7 @@ def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, | |||
print('\nthe k shortest distances are', dis_k) | |||
print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
print('\nthe graph is updated', nb_updated, 'times.') | |||
print('\n\nthe graph is updated', nb_updated, 'times.') | |||
print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.') | |||
print('distances in kernel space:', dis_of_each_itr, '\n') | |||
@@ -227,13 +220,13 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
# print(g.nodes(data=True)) | |||
# print(g.edges(data=True)) | |||
Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors | |||
for gi in Gk: | |||
nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
# nx.draw_networkx(gi) | |||
plt.show() | |||
# draw_Letter_graph(g) | |||
print(gi.nodes(data=True)) | |||
print(gi.edges(data=True)) | |||
# for gi in Gk: | |||
# nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True) | |||
## nx.draw_networkx(gi) | |||
# plt.show() | |||
## draw_Letter_graph(g) | |||
# print(gi.nodes(data=True)) | |||
# print(gi.edges(data=True)) | |||
r = 0 | |||
itr_total = 0 | |||
@@ -394,7 +387,8 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
# compute distance between \psi and the new generated graph. | |||
knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False) | |||
dhat_new = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||
dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), | |||
alpha, knew, withterm3=False) | |||
# @todo: the new distance is smaller or also equal? | |||
if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon: | |||
# check if the new distance is the same as one in D_k. | |||
@@ -448,7 +442,7 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
print('\nthe k shortest distances are', dis_k) | |||
print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
print('\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||
print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation', | |||
nb_updated_random, 'times.') | |||
print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, | |||
'times, and by random generation', nb_updated_k_random, 'times.') | |||
@@ -459,60 +453,6 @@ def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max | |||
############################################################################### | |||
# useful functions. | |||
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
term1 = Kmatrix[idx_g, idx_g] | |||
term2 = 0 | |||
for i, a in enumerate(alpha): | |||
term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||
term2 *= 2 | |||
if withterm3 == False: | |||
for i1, a1 in enumerate(alpha): | |||
for i2, a2 in enumerate(alpha): | |||
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||
return np.sqrt(term1 - term2 + term3) | |||
def compute_kernel(Gn, graph_kernel, verbose): | |||
if graph_kernel == 'marginalizedkernel': | |||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'untilhpathkernel': | |||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
depth=10, k_func='MinMax', compute_method='trie', | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'structuralspkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
def gram2distances(Kmatrix): | |||
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||
for i1 in range(len(Kmatrix)): | |||
for i2 in range(len(Kmatrix)): | |||
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||
dmatrix = np.sqrt(dmatrix) | |||
return dmatrix | |||
############################################################################### | |||
# Old implementations. | |||
#def gk_iam(Gn, alpha): | |||
@@ -10,51 +10,14 @@ pre-image | |||
import sys | |||
import numpy as np | |||
import random | |||
import multiprocessing | |||
from tqdm import tqdm | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
from pygraph.kernels.spKernel import spkernel | |||
import functools | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||
from gk_iam import dis_gstar | |||
def compute_kernel(Gn, graph_kernel, verbose): | |||
if graph_kernel == 'marginalizedkernel': | |||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'untilhpathkernel': | |||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
depth=10, k_func='MinMax', compute_method='trie', | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'structuralspkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
from utils import compute_kernel, dis_gstar | |||
def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel): | |||
@@ -105,6 +68,7 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
r = 0 | |||
# sod_list = [dhat] | |||
# found = False | |||
dis_of_each_itr = [dhat] | |||
nb_updated = 0 | |||
g_best = [] | |||
while r < r_max: | |||
@@ -162,7 +126,8 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
# p_quit=lmbda, n_iteration=20, remove_totters=False, | |||
# n_jobs=multiprocessing.cpu_count(), verbose=False) | |||
knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False) | |||
dnew = dis_gstar(0, [1, 2], alpha, knew, withterm3=False) | |||
dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, | |||
withterm3=False) | |||
if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
if dnew < dhat: | |||
print('\nI am smaller!') | |||
@@ -184,13 +149,19 @@ def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gke | |||
dihat_list = [dhat] | |||
else: | |||
r += 1 | |||
dis_of_each_itr.append(dhat) | |||
print('the shortest distances for previous iterations are', dis_of_each_itr) | |||
# dis_best.append(dhat) | |||
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
print('distances in kernel space:', dis_of_each_itr, '\n') | |||
return dhat, g_best, nb_updated | |||
# return 0, 0, 0 | |||
if __name__ == '__main__': | |||
from pygraph.utils.graphfiles import loadDataset | |||
# ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
# 'extra_params': {}} # node/edge symb | |||
@@ -80,5 +80,6 @@ def testNxGrapĥ(): | |||
print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h)) | |||
print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h))) | |||
#test() | |||
init() | |||
#testNxGrapĥ() |
@@ -0,0 +1,167 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Sep 5 15:59:00 2019 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from iam import iam_upgraded | |||
from utils import remove_edges, compute_kernel, get_same_item_indices | |||
from ged import ged_median | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
def test_iam_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
# lmbda = 0.03 # termination probalility | |||
# r_max = 10 # iteration limit for pre-image. | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
# k = 5 # k nearest neighbors | |||
# epsilon = 1e-6 | |||
# InitIAMWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# parameters for IAM function | |||
c_ei=1 | |||
c_er=1 | |||
c_es=1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.001 | |||
removeNodes = False | |||
connected_iam = False | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list = [] | |||
nb_updated_k_list = [] | |||
g_best = [] | |||
for nb_median in nb_median_range: | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
Gn_candidate = [g.copy() for g in Gn_median] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time0 = time.time() | |||
ghat_new_list, dis_min = iam_upgraded(Gn_median, Gn_candidate, | |||
c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam, | |||
epsilon=epsilon_iam, removeNodes=removeNodes, | |||
connected=connected_iam, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 | |||
print('\ntime: ', time_total) | |||
time_list.append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat_list) | |||
print('\nnumber of updates of the best graph: ', nb_updated) | |||
nb_updated_list.append(nb_updated) | |||
print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||
nb_updated_k_list.append(nb_updated_k) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
with_labels=True) | |||
plt.show() | |||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
'.png', format="PNG") | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
nb_updated_list) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
nb_updated_k_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
if __name__ == '__main__': | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
test_iam_median_nb() |
@@ -15,6 +15,9 @@ import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from median import draw_Letter_graph | |||
from ged import GED, ged_median | |||
from utils import get_same_item_indices, compute_kernel, gram2distances, \ | |||
dis_gstar, remove_edges | |||
# --------------------------- These are tests --------------------------------# | |||
@@ -47,7 +50,6 @@ def test_who_is_the_closest_in_kernel_space(Gn): | |||
def test_who_is_the_closest_in_GED_space(Gn): | |||
from iam import GED | |||
idx_gi = [0, 6] | |||
g1 = Gn[idx_gi[0]] | |||
g2 = Gn[idx_gi[1]] | |||
@@ -142,7 +144,7 @@ def test_new_IAM_allGraph_deleteNodes(Gn): | |||
def test_the_simple_two(Gn, gkernel): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from gk_iam import gk_iam_nearest_multi | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # recursions | |||
l = 500 | |||
@@ -199,7 +201,7 @@ def test_the_simple_two(Gn, gkernel): | |||
def test_remove_bests(Gn, gkernel): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from gk_iam import gk_iam_nearest_multi | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # recursions | |||
l = 500 | |||
@@ -249,8 +251,7 @@ def test_remove_bests(Gn, gkernel): | |||
# Tests on dataset Letter-H. | |||
def test_gkiam_letter_h(): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from iam import median_distance | |||
from gk_iam import gk_iam_nearest_multi | |||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
'extra_params': {}} # node nsymb | |||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
@@ -305,7 +306,7 @@ def test_gkiam_letter_h(): | |||
print(g.edges(data=True)) | |||
# compute the corresponding sod in graph space. (alpha range not considered.) | |||
sod_tmp, _ = median_distance(g_best[0], Gn_let, ged_cost='LETTER', | |||
sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', | |||
ged_method='IPFP', saveGXL='gedlib-letter') | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
@@ -318,19 +319,6 @@ def test_gkiam_letter_h(): | |||
print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) | |||
print('\nnumber of updates for each letter: ', nb_updated_list) | |||
print('\ntimes:', time_list) | |||
def get_same_item_indices(ls): | |||
"""Get the indices of the same items in a list. Return a dict keyed by items. | |||
""" | |||
idx_dict = {} | |||
for idx, item in enumerate(ls): | |||
if item in idx_dict: | |||
idx_dict[item].append(idx) | |||
else: | |||
idx_dict[item] = [idx] | |||
return idx_dict | |||
#def compute_letter_median_by_average(Gn): | |||
# return g_median | |||
@@ -338,7 +326,6 @@ def get_same_item_indices(ls): | |||
def test_iam_letter_h(): | |||
from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations | |||
from gk_iam import dis_gstar, compute_kernel | |||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
'extra_params': {}} # node nsymb | |||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
@@ -402,7 +389,7 @@ def test_iam_letter_h(): | |||
def test_random_preimage_letter_h(): | |||
from preimage_random import preimage_random, compute_kernel | |||
from preimage_random import preimage_random | |||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
'extra_params': {}} # node nsymb | |||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
@@ -463,7 +450,7 @@ def test_random_preimage_letter_h(): | |||
print(g.edges(data=True)) | |||
# compute the corresponding sod in graph space. (alpha range not considered.) | |||
sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||
sod_list.append(sod_tmp) | |||
sod_min_list.append(np.min(sod_tmp)) | |||
@@ -479,8 +466,7 @@ def test_random_preimage_letter_h(): | |||
def test_gkiam_mutag(): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from iam import median_distance | |||
from gk_iam import gk_iam_nearest_multi | |||
ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt', | |||
'extra_params': {}} # node nsymb | |||
# ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt', | |||
@@ -535,7 +521,7 @@ def test_gkiam_mutag(): | |||
print(g.edges(data=True)) | |||
# compute the corresponding sod in graph space. (alpha range not considered.) | |||
sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
sod_tmp, _ = ged_median(g_best[0], Gn_let) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
sod_ks_min_list.append(sod_ks) | |||
@@ -553,9 +539,7 @@ def test_gkiam_mutag(): | |||
# Re-test. | |||
def retest_the_simple_two(): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from iam import median_distance | |||
from test_random_mutag import remove_edges | |||
from gk_iam import gk_iam_nearest_multi | |||
# The two simple graphs. | |||
# g1 = nx.Graph(name='haha') | |||
@@ -653,7 +637,7 @@ def retest_the_simple_two(): | |||
# compute the corresponding sod in graph space. | |||
for idx, item in enumerate(alpha_range): | |||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) |
@@ -10,20 +10,23 @@ import numpy as np | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import time | |||
from tqdm import tqdm | |||
import random | |||
#from tqdm import tqdm | |||
import os | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from utils import remove_edges, compute_kernel, get_same_item_indices | |||
from ged import ged_median | |||
from preimage_iam import preimage_iam | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
# tests on different values on grid of median-sets and k. | |||
def test_preimage_mix_2combination_all_pairs(): | |||
from preimage_iam import preimage_iam_random_mix, compute_kernel | |||
from iam import median_distance | |||
def test_preimage_iam_grid_k_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
@@ -32,13 +35,11 @@ def test_preimage_mix_2combination_all_pairs(): | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # iteration limit for pre-image. | |||
l_max = 500 # update limit for random generation | |||
alpha_range = np.linspace(0.5, 0.5, 1) | |||
k = 5 # k nearest neighbors | |||
r_max = 5 # iteration limit for pre-image. | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
# k = 5 # k nearest neighbors | |||
epsilon = 1e-6 | |||
InitIAMWithAllDk = True | |||
InitRandomWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
@@ -52,153 +53,280 @@ def test_preimage_mix_2combination_all_pairs(): | |||
removeNodes = True | |||
connected_iam = False | |||
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||
# test on each pair of graphs. | |||
# for idx1 in range(len(Gn) - 1, -1, -1): | |||
# for idx2 in range(idx1, -1, -1): | |||
for idx1 in range(187, 188): | |||
for idx2 in range(167, 168): | |||
g1 = Gn[idx1].copy() | |||
g2 = Gn[idx2].copy() | |||
# Gn[10] = [] | |||
# Gn[10] = [] | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# number of nearest neighbors. | |||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list = [] | |||
nb_updated_k_list = [] | |||
g_best = [] | |||
for idx_nb, nb_median in enumerate(nb_median_range): | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time_list.append([]) | |||
dis_ks_min_list.append([]) | |||
sod_gs_list.append([]) | |||
sod_gs_min_list.append([]) | |||
nb_updated_list.append([]) | |||
nb_updated_k_list.append([]) | |||
g_best.append([]) | |||
for k in k_range: | |||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
print('k =', k) | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||
preimage_iam(Gn, Gn_median, | |||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list[idx_nb].append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list[idx_nb].append(dhat) | |||
g_best[idx_nb].append(ghat_list) | |||
print('\nnumber of updates of the best graph by IAM: ', nb_updated) | |||
nb_updated_list[idx_nb].append(nb_updated) | |||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k) | |||
nb_updated_k_list[idx_nb].append(nb_updated_k) | |||
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||
plt.show() | |||
plt.clf() | |||
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||
plt.show() | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
'_k' + str(k) + '.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list[idx_nb].append(sod_tmp) | |||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
nb_updated_list) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||
nb_updated_k_list) | |||
print('\ntimes:', time_list) | |||
################################################################### | |||
# Gn_mix = [g.copy() for g in Gn] | |||
# Gn_mix.append(g1.copy()) | |||
# Gn_mix.append(g2.copy()) | |||
# | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# | |||
# # write Gram matrix to file and read it. | |||
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
km = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
for i in range(len(Gn)): | |||
km[i, len(Gn)] = km[i, idx1] | |||
km[i, len(Gn) + 1] = km[i, idx2] | |||
km[len(Gn), i] = km[i, idx1] | |||
km[len(Gn) + 1, i] = km[i, idx2] | |||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
################################################################### | |||
# # use only the two graphs in median set as candidates. | |||
# Gn = [g1.copy(), g2.copy()] | |||
# Gn_mix = Gn + [g1.copy(), g2.copy()] | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
def test_preimage_iam_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list_iam = [] | |||
nb_updated_list_random = [] | |||
nb_updated_k_list_iam = [] | |||
nb_updated_k_list_random = [] | |||
g_best = [] | |||
# for each alpha | |||
for alpha in alpha_range: | |||
print('\n-------------------------------------------------------\n') | |||
print('alpha =', alpha) | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
nb_updated_k_iam, nb_updated_k_random = \ | |||
preimage_iam_random_mix(Gn, [g1, g2], | |||
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
InitRandomWithAllDk=InitRandomWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat_list) | |||
nb_updated_list_iam.append(nb_updated_iam) | |||
nb_updated_list_random.append(nb_updated_random) | |||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
nb_updated_k_list_random.append(nb_updated_k_random) | |||
# show best graphs and save them to file. | |||
for idx, item in enumerate(alpha_range): | |||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||
+ '_alpha' + str(item) + '.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(g_best[idx][0].nodes(data=True)) | |||
# print(g_best[idx][0].edges(data=True)) | |||
# for g in g_best[idx]: | |||
# draw_Letter_graph(g, savepath='results/gk_iam/') | |||
## nx.draw_networkx(g) | |||
## plt.show() | |||
# print(g.nodes(data=True)) | |||
# print(g.edges(data=True)) | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # iteration limit for pre-image. | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
k = 5 # k nearest neighbors | |||
epsilon = 1e-6 | |||
InitIAMWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# parameters for IAM function | |||
c_ei=1 | |||
c_er=1 | |||
c_es=1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.001 | |||
removeNodes = True | |||
connected_iam = False | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list = [] | |||
nb_updated_k_list = [] | |||
g_best = [] | |||
for nb_median in nb_median_range: | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
# compute the corresponding sod in graph space. | |||
for idx, item in enumerate(alpha_range): | |||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||
print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||
nb_updated_list_random) | |||
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||
nb_updated_k_list_iam) | |||
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||
nb_updated_k_list_random) | |||
print('\ntimes:', time_list) | |||
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \ | |||
preimage_iam(Gn, Gn_median, | |||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||
with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||
content = file.read() | |||
file.seek(0, 0) | |||
file.write(str_fw + content) | |||
time_total = time.time() - time0 + time_km | |||
print('\ntime: ', time_total) | |||
time_list.append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat_list) | |||
print('\nnumber of updates of the best graph: ', nb_updated) | |||
nb_updated_list.append(nb_updated) | |||
print('\nnumber of updates of k nearest graphs: ', nb_updated_k) | |||
nb_updated_k_list.append(nb_updated_k) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
with_labels=True) | |||
# plt.show() | |||
plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + | |||
'.png', format="PNG") | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
nb_updated_list) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
nb_updated_k_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
def test_gkiam_2combination_all_pairs(): | |||
from preimage_iam import preimage_iam, compute_kernel | |||
from iam import median_distance | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
@@ -334,7 +462,7 @@ def test_gkiam_2combination_all_pairs(): | |||
# compute the corresponding sod in graph space. | |||
for idx, item in enumerate(alpha_range): | |||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
@@ -358,8 +486,7 @@ def test_gkiam_2combination_all_pairs(): | |||
def test_gkiam_2combination(): | |||
from gk_iam import gk_iam_nearest_multi, compute_kernel | |||
from iam import median_distance | |||
from gk_iam import gk_iam_nearest_multi | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
@@ -451,7 +578,7 @@ def test_gkiam_2combination(): | |||
# compute the corresponding sod in graph space. | |||
for idx, item in enumerate(alpha_range): | |||
sod_tmp, _ = median_distance(g_best[0], [g1, g2], ged_cost=ged_cost, | |||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
@@ -463,148 +590,6 @@ def test_gkiam_2combination(): | |||
print('\ntimes:', time_list) | |||
def test_random_preimage_2combination(): | |||
# from gk_iam import compute_kernel | |||
from preimage_random import preimage_random | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:12] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
# print(dis_max, dis_min, dis_mean) | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # iteration limit for pre-image. | |||
l = 500 | |||
alpha_range = np.linspace(0, 1, 11) | |||
k = 5 # k nearest neighbors | |||
# randomly select two molecules | |||
np.random.seed(1) | |||
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||
g1 = Gn[idx_gi[0]].copy() | |||
g2 = Gn[idx_gi[1]].copy() | |||
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
# plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||
# plt.show() | |||
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
# plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||
# plt.show() | |||
###################################################################### | |||
# Gn_mix = [g.copy() for g in Gn] | |||
# Gn_mix.append(g1.copy()) | |||
# Gn_mix.append(g2.copy()) | |||
# | |||
## g_tmp = iam([g1, g2]) | |||
## nx.draw_networkx(g_tmp) | |||
## plt.show() | |||
# | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
################################################################### | |||
idx1 = idx_gi[0] | |||
idx2 = idx_gi[1] | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
km = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
for i in range(len(Gn)): | |||
km[i, len(Gn)] = km[i, idx1] | |||
km[i, len(Gn) + 1] = km[i, idx2] | |||
km[len(Gn), i] = km[i, idx1] | |||
km[len(Gn) + 1, i] = km[i, idx2] | |||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
################################################################### | |||
time_list = [] | |||
nb_updated_list = [] | |||
g_best = [] | |||
dis_ks_min_list = [] | |||
# for each alpha | |||
for alpha in alpha_range: | |||
print('\n-------------------------------------------------------\n') | |||
print('alpha =', alpha) | |||
time0 = time.time() | |||
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||
range(len(Gn), len(Gn) + 2), km, | |||
k, r_max, l, gkernel) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat) | |||
nb_updated_list.append(nb_updated) | |||
# show best graphs and save them to file. | |||
for idx, item in enumerate(alpha_range): | |||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||
plt.show() | |||
plt.clf() | |||
print(g_best[idx].nodes(data=True)) | |||
print(g_best[idx].edges(data=True)) | |||
# # compute the corresponding sod in graph space. (alpha range not considered.) | |||
# sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
# sod_gs_list.append(sod_tmp) | |||
# sod_gs_min_list.append(np.min(sod_tmp)) | |||
# sod_ks_min_list.append(sod_ks) | |||
# nb_updated_list.append(nb_updated) | |||
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
print('\nnumber of updates for each alpha: ', nb_updated_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# help functions | |||
def remove_edges(Gn): | |||
for G in Gn: | |||
for _, _, attrs in G.edges(data=True): | |||
attrs.clear() | |||
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||
from gk_iam import compute_kernel | |||
dis_mat = np.empty((len(Gn), len(Gn))) | |||
if Kmatrix == None: | |||
Kmatrix = compute_kernel(Gn, gkernel, True) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||
if dis < 0: | |||
if dis > -1e-10: | |||
dis = 0 | |||
else: | |||
raise ValueError('The distance is negative.') | |||
dis_mat[i, j] = np.sqrt(dis) | |||
dis_mat[j, i] = dis_mat[i, j] | |||
dis_max = np.max(np.max(dis_mat)) | |||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
dis_mean = np.mean(np.mean(dis_mat)) | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
############################################################################### | |||
@@ -612,7 +597,13 @@ if __name__ == '__main__': | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
# test_random_preimage_2combination() | |||
# test_gkiam_2combination() | |||
# test_gkiam_2combination_all_pairs() | |||
test_preimage_mix_2combination_all_pairs() | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
test_preimage_iam_median_nb() | |||
############################################################################### | |||
# tests on different values on grid of median-sets and k. | |||
# test_preimage_iam_grid_k_median_nb() |
@@ -0,0 +1,542 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Sep 5 15:59:00 2019 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from ged import ged_median | |||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||
from preimage_iam import preimage_iam_random_mix | |||
############################################################################### | |||
# tests on different values on grid of median-sets and k. | |||
def test_preimage_mix_grid_k_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 5 # iteration limit for pre-image. | |||
l_max = 500 # update limit for random generation | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
# k = 5 # k nearest neighbors | |||
epsilon = 1e-6 | |||
InitIAMWithAllDk = True | |||
InitRandomWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# parameters for IAM function | |||
c_ei=1 | |||
c_er=1 | |||
c_es=1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.001 | |||
removeNodes = True | |||
connected_iam = False | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# number of nearest neighbors. | |||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list_iam = [] | |||
nb_updated_list_random = [] | |||
nb_updated_k_list_iam = [] | |||
nb_updated_k_list_random = [] | |||
g_best = [] | |||
for idx_nb, nb_median in enumerate(nb_median_range): | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time_list.append([]) | |||
dis_ks_min_list.append([]) | |||
sod_gs_list.append([]) | |||
sod_gs_min_list.append([]) | |||
nb_updated_list_iam.append([]) | |||
nb_updated_list_random.append([]) | |||
nb_updated_k_list_iam.append([]) | |||
nb_updated_k_list_random.append([]) | |||
g_best.append([]) | |||
for k in k_range: | |||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
print('k =', k) | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
nb_updated_k_iam, nb_updated_k_random = \ | |||
preimage_iam_random_mix(Gn, Gn_median, | |||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
InitRandomWithAllDk=InitRandomWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list[idx_nb].append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list[idx_nb].append(dhat) | |||
g_best[idx_nb].append(ghat_list) | |||
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||
nb_updated_list_iam[idx_nb].append(nb_updated_iam) | |||
print('\nnumber of updates of the best graph by random generation: ', | |||
nb_updated_random) | |||
nb_updated_list_random[idx_nb].append(nb_updated_random) | |||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||
nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam) | |||
print('\nnumber of updates of k nearest graphs by random generation: ', | |||
nb_updated_k_random) | |||
nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||
'_k' + str(k) + '.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list[idx_nb].append(sod_tmp) | |||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
nb_updated_list_iam) | |||
print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', | |||
nb_updated_list_random) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', | |||
nb_updated_k_list_iam) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', | |||
nb_updated_k_list_random) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
def test_preimage_mix_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 5 # iteration limit for pre-image. | |||
l_max = 500 # update limit for random generation | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
k = 5 # k nearest neighbors | |||
epsilon = 1e-6 | |||
InitIAMWithAllDk = True | |||
InitRandomWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# parameters for IAM function | |||
c_ei=1 | |||
c_er=1 | |||
c_es=1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.001 | |||
removeNodes = True | |||
connected_iam = False | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list_iam = [] | |||
nb_updated_list_random = [] | |||
nb_updated_k_list_iam = [] | |||
nb_updated_k_list_random = [] | |||
g_best = [] | |||
for nb_median in nb_median_range: | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
nb_updated_k_iam, nb_updated_k_random = \ | |||
preimage_iam_random_mix(Gn, Gn_median, | |||
alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, | |||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
InitRandomWithAllDk=InitRandomWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat_list) | |||
print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam) | |||
nb_updated_list_iam.append(nb_updated_iam) | |||
print('\nnumber of updates of the best graph by random generation: ', | |||
nb_updated_random) | |||
nb_updated_list_random.append(nb_updated_random) | |||
print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam) | |||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
print('\nnumber of updates of k nearest graphs by random generation: ', | |||
nb_updated_k_random) | |||
nb_updated_k_list_random.append(nb_updated_k_random) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + | |||
'.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', | |||
nb_updated_list_iam) | |||
print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', | |||
nb_updated_list_random) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', | |||
nb_updated_k_list_iam) | |||
print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', | |||
nb_updated_k_list_random) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
def test_preimage_mix_2combination_all_pairs(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # iteration limit for pre-image. | |||
l_max = 500 # update limit for random generation | |||
alpha_range = np.linspace(0.5, 0.5, 1) | |||
k = 5 # k nearest neighbors | |||
epsilon = 1e-6 | |||
InitIAMWithAllDk = True | |||
InitRandomWithAllDk = True | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# parameters for IAM function | |||
c_ei=1 | |||
c_er=1 | |||
c_es=1 | |||
ite_max_iam = 50 | |||
epsilon_iam = 0.001 | |||
removeNodes = True | |||
connected_iam = False | |||
nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf) | |||
nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf) | |||
# test on each pair of graphs. | |||
# for idx1 in range(len(Gn) - 1, -1, -1): | |||
# for idx2 in range(idx1, -1, -1): | |||
for idx1 in range(187, 188): | |||
for idx2 in range(167, 168): | |||
g1 = Gn[idx1].copy() | |||
g2 = Gn[idx2].copy() | |||
# Gn[10] = [] | |||
# Gn[10] = [] | |||
nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
plt.savefig("results/preimage_mix/mutag187.png", format="PNG") | |||
plt.show() | |||
plt.clf() | |||
nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
plt.savefig("results/preimage_mix/mutag167.png", format="PNG") | |||
plt.show() | |||
plt.clf() | |||
################################################################### | |||
# Gn_mix = [g.copy() for g in Gn] | |||
# Gn_mix.append(g1.copy()) | |||
# Gn_mix.append(g2.copy()) | |||
# | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# | |||
# # write Gram matrix to file and read it. | |||
# np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km) | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
km = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
for i in range(len(Gn)): | |||
km[i, len(Gn)] = km[i, idx1] | |||
km[i, len(Gn) + 1] = km[i, idx2] | |||
km[len(Gn), i] = km[i, idx1] | |||
km[len(Gn) + 1, i] = km[i, idx2] | |||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
################################################################### | |||
# # use only the two graphs in median set as candidates. | |||
# Gn = [g1.copy(), g2.copy()] | |||
# Gn_mix = Gn + [g1.copy(), g2.copy()] | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list_iam = [] | |||
nb_updated_list_random = [] | |||
nb_updated_k_list_iam = [] | |||
nb_updated_k_list_random = [] | |||
g_best = [] | |||
# for each alpha | |||
for alpha in alpha_range: | |||
print('\n-------------------------------------------------------\n') | |||
print('alpha =', alpha) | |||
time0 = time.time() | |||
dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \ | |||
nb_updated_k_iam, nb_updated_k_random = \ | |||
preimage_iam_random_mix(Gn, [g1, g2], | |||
[alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, | |||
l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, | |||
InitRandomWithAllDk=InitRandomWithAllDk, | |||
params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, | |||
'ite_max': ite_max_iam, 'epsilon': epsilon_iam, | |||
'removeNodes': removeNodes, 'connected': connected_iam}, | |||
params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, | |||
'saveGXL': saveGXL}) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat_list) | |||
nb_updated_list_iam.append(nb_updated_iam) | |||
nb_updated_list_random.append(nb_updated_random) | |||
nb_updated_k_list_iam.append(nb_updated_k_iam) | |||
nb_updated_k_list_random.append(nb_updated_k_random) | |||
# show best graphs and save them to file. | |||
for idx, item in enumerate(alpha_range): | |||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) | |||
+ '_alpha' + str(item) + '.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(g_best[idx][0].nodes(data=True)) | |||
# print(g_best[idx][0].edges(data=True)) | |||
# for g in g_best[idx]: | |||
# draw_Letter_graph(g, savepath='results/gk_iam/') | |||
## nx.draw_networkx(g) | |||
## plt.show() | |||
# print(g.nodes(data=True)) | |||
# print(g.edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
for idx, item in enumerate(alpha_range): | |||
sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam) | |||
print('\nnumber of updates of the best graph for each alpha by random generation: ', | |||
nb_updated_list_random) | |||
print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', | |||
nb_updated_k_list_iam) | |||
print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', | |||
nb_updated_k_list_random) | |||
print('\ntimes:', time_list) | |||
nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0] | |||
nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0] | |||
str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \ | |||
% (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0]) | |||
with open('results/preimage_mix/nb_updates.txt', 'r+') as file: | |||
content = file.read() | |||
file.seek(0, 0) | |||
file.write(str_fw + content) | |||
############################################################################### | |||
if __name__ == '__main__': | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
# test_preimage_mix_2combination_all_pairs() | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
# test_preimage_mix_median_nb() | |||
############################################################################### | |||
# tests on different values on grid of median-sets and k. | |||
test_preimage_mix_grid_k_median_nb() |
@@ -0,0 +1,402 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Sep 5 15:59:00 2019 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
import matplotlib.pyplot as plt | |||
import time | |||
import random | |||
#from tqdm import tqdm | |||
#import os | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.utils.graphfiles import loadDataset | |||
from preimage_random import preimage_random | |||
from ged import ged_median | |||
from utils import compute_kernel, get_same_item_indices, remove_edges | |||
############################################################################### | |||
# tests on different values on grid of median-sets and k. | |||
def test_preimage_random_grid_k_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 5 # iteration limit for pre-image. | |||
l = 500 # update limit for random generation | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
# k = 5 # k nearest neighbors | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# number of nearest neighbors. | |||
k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list = [] | |||
g_best = [] | |||
for idx_nb, nb_median in enumerate(nb_median_range): | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time_list.append([]) | |||
dis_ks_min_list.append([]) | |||
sod_gs_list.append([]) | |||
sod_gs_min_list.append([]) | |||
nb_updated_list.append([]) | |||
g_best.append([]) | |||
for k in k_range: | |||
print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n') | |||
print('k =', k) | |||
time0 = time.time() | |||
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list[idx_nb].append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list[idx_nb].append(dhat) | |||
g_best[idx_nb].append(ghat) | |||
print('\nnumber of updates of the best graph: ', nb_updated) | |||
nb_updated_list[idx_nb].append(nb_updated) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||
'_k' + str(k) + '.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list[idx_nb].append(sod_tmp) | |||
sod_gs_min_list[idx_nb].append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs and k: ', | |||
sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs and k: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', | |||
nb_updated_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# tests on different numbers of median-sets. | |||
def test_preimage_random_median_nb(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:50] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
lmbda = 0.03 # termination probalility | |||
r_max = 5 # iteration limit for pre-image. | |||
l = 500 # update limit for random generation | |||
# alpha_range = np.linspace(0.5, 0.5, 1) | |||
k = 5 # k nearest neighbors | |||
# parameters for GED function | |||
ged_cost='CHEM_1' | |||
ged_method='IPFP' | |||
saveGXL='gedlib' | |||
# number of graphs; we what to compute the median of these graphs. | |||
nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100] | |||
# find out all the graphs classified to positive group 1. | |||
idx_dict = get_same_item_indices(y_all) | |||
Gn = [Gn[i] for i in idx_dict[1]] | |||
# # compute Gram matrix. | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn, gkernel, True) | |||
# time_km = time.time() - time0 | |||
# # write Gram matrix to file. | |||
# np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km) | |||
time_list = [] | |||
dis_ks_min_list = [] | |||
sod_gs_list = [] | |||
sod_gs_min_list = [] | |||
nb_updated_list = [] | |||
g_best = [] | |||
for nb_median in nb_median_range: | |||
print('\n-------------------------------------------------------') | |||
print('number of median graphs =', nb_median) | |||
random.seed(1) | |||
idx_rdm = random.sample(range(len(Gn)), nb_median) | |||
print('graphs chosen:', idx_rdm) | |||
Gn_median = [Gn[idx].copy() for idx in idx_rdm] | |||
# for g in Gn_median: | |||
# nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True) | |||
## plt.savefig("results/preimage_mix/mutag.png", format="PNG") | |||
# plt.show() | |||
# plt.clf() | |||
################################################################### | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz') | |||
km_tmp = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median)) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
km[i, j] = km_tmp[i, j] | |||
km[j, i] = km[i, j] | |||
for i in range(len(Gn)): | |||
for j, idx in enumerate(idx_rdm): | |||
km[i, len(Gn) + j] = km[i, idx] | |||
km[len(Gn) + j, i] = km[i, idx] | |||
for i, idx1 in enumerate(idx_rdm): | |||
for j, idx2 in enumerate(idx_rdm): | |||
km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2] | |||
################################################################### | |||
alpha_range = [1 / nb_median] * nb_median | |||
time0 = time.time() | |||
dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, | |||
range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
print('\nsmallest distance in kernel space: ', dhat) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat) | |||
print('\nnumber of updates of the best graph: ', nb_updated) | |||
nb_updated_list.append(nb_updated) | |||
# show the best graph and save it to file. | |||
print('the shortest distance is', dhat) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), | |||
with_labels=True) | |||
plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + | |||
'.png', format="PNG") | |||
# plt.show() | |||
plt.clf() | |||
# print(ghat_list[0].nodes(data=True)) | |||
# print(ghat_list[0].edges(data=True)) | |||
# compute the corresponding sod in graph space. | |||
sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, | |||
ged_method=ged_method, saveGXL=saveGXL) | |||
sod_gs_list.append(sod_tmp) | |||
sod_gs_min_list.append(np.min(sod_tmp)) | |||
print('\nsmallest sod in graph space: ', np.min(sod_tmp)) | |||
print('\nsods in graph space: ', sod_gs_list) | |||
print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each set of median graphs: ', | |||
dis_ks_min_list) | |||
print('\nnumber of updates of the best graph for each set of median graphs: ', | |||
nb_updated_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
def test_random_preimage_2combination(): | |||
ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt', | |||
'extra_params': {}} # node/edge symb | |||
Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params']) | |||
# Gn = Gn[0:12] | |||
remove_edges(Gn) | |||
gkernel = 'marginalizedkernel' | |||
# dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel) | |||
# print(dis_max, dis_min, dis_mean) | |||
lmbda = 0.03 # termination probalility | |||
r_max = 10 # iteration limit for pre-image. | |||
l = 500 | |||
alpha_range = np.linspace(0, 1, 11) | |||
k = 5 # k nearest neighbors | |||
# randomly select two molecules | |||
np.random.seed(1) | |||
idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2) | |||
g1 = Gn[idx_gi[0]].copy() | |||
g2 = Gn[idx_gi[1]].copy() | |||
# nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True) | |||
# plt.savefig("results/random_preimage/mutag10.png", format="PNG") | |||
# plt.show() | |||
# nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True) | |||
# plt.savefig("results/random_preimage/mutag11.png", format="PNG") | |||
# plt.show() | |||
###################################################################### | |||
# Gn_mix = [g.copy() for g in Gn] | |||
# Gn_mix.append(g1.copy()) | |||
# Gn_mix.append(g2.copy()) | |||
# | |||
## g_tmp = iam([g1, g2]) | |||
## nx.draw_networkx(g_tmp) | |||
## plt.show() | |||
# | |||
# # compute | |||
# time0 = time.time() | |||
# km = compute_kernel(Gn_mix, gkernel, True) | |||
# time_km = time.time() - time0 | |||
################################################################### | |||
idx1 = idx_gi[0] | |||
idx2 = idx_gi[1] | |||
gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz') | |||
km = gmfile['gm'] | |||
time_km = gmfile['gmtime'] | |||
# modify mixed gram matrix. | |||
for i in range(len(Gn)): | |||
km[i, len(Gn)] = km[i, idx1] | |||
km[i, len(Gn) + 1] = km[i, idx2] | |||
km[len(Gn), i] = km[i, idx1] | |||
km[len(Gn) + 1, i] = km[i, idx2] | |||
km[len(Gn), len(Gn)] = km[idx1, idx1] | |||
km[len(Gn), len(Gn) + 1] = km[idx1, idx2] | |||
km[len(Gn) + 1, len(Gn)] = km[idx2, idx1] | |||
km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2] | |||
################################################################### | |||
time_list = [] | |||
nb_updated_list = [] | |||
g_best = [] | |||
dis_ks_min_list = [] | |||
# for each alpha | |||
for alpha in alpha_range: | |||
print('\n-------------------------------------------------------\n') | |||
print('alpha =', alpha) | |||
time0 = time.time() | |||
dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], | |||
range(len(Gn), len(Gn) + 2), km, | |||
k, r_max, l, gkernel) | |||
time_total = time.time() - time0 + time_km | |||
print('time: ', time_total) | |||
time_list.append(time_total) | |||
dis_ks_min_list.append(dhat) | |||
g_best.append(ghat) | |||
nb_updated_list.append(nb_updated) | |||
# show best graphs and save them to file. | |||
for idx, item in enumerate(alpha_range): | |||
print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx]) | |||
print('one of the possible corresponding pre-images is') | |||
nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), | |||
with_labels=True) | |||
plt.show() | |||
plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG") | |||
plt.clf() | |||
print(g_best[idx].nodes(data=True)) | |||
print(g_best[idx].edges(data=True)) | |||
# # compute the corresponding sod in graph space. (alpha range not considered.) | |||
# sod_tmp, _ = median_distance(g_best[0], Gn_let) | |||
# sod_gs_list.append(sod_tmp) | |||
# sod_gs_min_list.append(np.min(sod_tmp)) | |||
# sod_ks_min_list.append(sod_ks) | |||
# nb_updated_list.append(nb_updated) | |||
# print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list) | |||
print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) | |||
print('\nnumber of updates for each alpha: ', nb_updated_list) | |||
print('\ntimes:', time_list) | |||
############################################################################### | |||
if __name__ == '__main__': | |||
############################################################################### | |||
# test on the combination of the two randomly chosen graphs. (the same as in the | |||
# random pre-image paper.) | |||
# test_random_preimage_2combination() | |||
############################################################################### | |||
# tests all algorithms on different numbers of median-sets. | |||
test_preimage_random_median_nb() | |||
############################################################################### | |||
# tests all algorithms on different values on grid of median-sets and k. | |||
# test_preimage_random_grid_k_median_nb() |
@@ -0,0 +1,109 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Oct 17 19:05:07 2019 | |||
Useful functions. | |||
@author: ljia | |||
""" | |||
#import networkx as nx | |||
import multiprocessing | |||
import numpy as np | |||
import sys | |||
sys.path.insert(0, "../") | |||
from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||
from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||
from pygraph.kernels.spKernel import spkernel | |||
import functools | |||
from pygraph.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
from pygraph.kernels.structuralspKernel import structuralspkernel | |||
def remove_edges(Gn): | |||
for G in Gn: | |||
for _, _, attrs in G.edges(data=True): | |||
attrs.clear() | |||
def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True): | |||
term1 = Kmatrix[idx_g, idx_g] | |||
term2 = 0 | |||
for i, a in enumerate(alpha): | |||
term2 += a * Kmatrix[idx_g, idx_gi[i]] | |||
term2 *= 2 | |||
if withterm3 == False: | |||
for i1, a1 in enumerate(alpha): | |||
for i2, a2 in enumerate(alpha): | |||
term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]] | |||
return np.sqrt(term1 - term2 + term3) | |||
def compute_kernel(Gn, graph_kernel, verbose): | |||
if graph_kernel == 'marginalizedkernel': | |||
Kmatrix, _ = marginalizedkernel(Gn, node_label='atom', edge_label=None, | |||
p_quit=0.03, n_iteration=10, remove_totters=False, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'untilhpathkernel': | |||
Kmatrix, _ = untilhpathkernel(Gn, node_label='atom', edge_label=None, | |||
depth=10, k_func='MinMax', compute_method='trie', | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'spkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _, _ = spkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
elif graph_kernel == 'structuralspkernel': | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
Kmatrix, _ = structuralspkernel(Gn, node_label='atom', node_kernels= | |||
{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}, | |||
n_jobs=multiprocessing.cpu_count(), verbose=verbose) | |||
# normalization | |||
Kmatrix_diag = Kmatrix.diagonal().copy() | |||
for i in range(len(Kmatrix)): | |||
for j in range(i, len(Kmatrix)): | |||
Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j]) | |||
Kmatrix[j][i] = Kmatrix[i][j] | |||
return Kmatrix | |||
def gram2distances(Kmatrix): | |||
dmatrix = np.zeros((len(Kmatrix), len(Kmatrix))) | |||
for i1 in range(len(Kmatrix)): | |||
for i2 in range(len(Kmatrix)): | |||
dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2] | |||
dmatrix = np.sqrt(dmatrix) | |||
return dmatrix | |||
def kernel_distance_matrix(Gn, Kmatrix=None, gkernel=None): | |||
dis_mat = np.empty((len(Gn), len(Gn))) | |||
if Kmatrix == None: | |||
Kmatrix = compute_kernel(Gn, gkernel, True) | |||
for i in range(len(Gn)): | |||
for j in range(i, len(Gn)): | |||
dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j] | |||
if dis < 0: | |||
if dis > -1e-10: | |||
dis = 0 | |||
else: | |||
raise ValueError('The distance is negative.') | |||
dis_mat[i, j] = np.sqrt(dis) | |||
dis_mat[j, i] = dis_mat[i, j] | |||
dis_max = np.max(np.max(dis_mat)) | |||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
dis_mean = np.mean(np.mean(dis_mat)) | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
def get_same_item_indices(ls): | |||
"""Get the indices of the same items in a list. Return a dict keyed by items. | |||
""" | |||
idx_dict = {} | |||
for idx, item in enumerate(ls): | |||
if item in idx_dict: | |||
idx_dict[item].append(idx) | |||
else: | |||
idx_dict[item] = [idx] | |||
return idx_dict |