@@ -12,8 +12,8 @@ import matplotlib.pyplot as plt | |||||
from numpy.linalg import eig | from numpy.linalg import eig | ||||
# read gram matrices from file. | # read gram matrices from file. | ||||
results_dir = 'results/marginalizedkernel/myria' | |||||
ds_name = 'MUTAG' | |||||
results_dir = 'results/structuralspkernel/' | |||||
ds_name = 'Letter-med' | |||||
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | ||||
#print('gm time: ', gmfile['gmtime']) | #print('gm time: ', gmfile['gmtime']) | ||||
# a list to store gram matrices for all param_grid_precomputed | # a list to store gram matrices for all param_grid_precomputed | ||||
@@ -32,6 +32,12 @@ for x in gram_matrices: | |||||
print('diag: ', np.diag(x)) | print('diag: ', np.diag(x)) | ||||
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1)) | print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1)) | ||||
print('min, max diag: ', min(np.diag(x)), max(np.diag(x))) | print('min, max diag: ', min(np.diag(x)), max(np.diag(x))) | ||||
print('min, max matrix: ', np.min(x), np.max(x)) | |||||
for i in range(len(x)): | |||||
for j in range(len(x)): | |||||
if x[i][j] > 1: | |||||
print(i, j) | |||||
raise Exception('value bigger than 1 with index', i, j) | |||||
print('mean x: ', np.mean(np.mean(x))) | print('mean x: ', np.mean(np.mean(x))) | ||||
[lamnda, v] = eig(x) | [lamnda, v] = eig(x) | ||||
@@ -18,16 +18,18 @@ dslist = [ | |||||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | ||||
'task': 'regression'}, # node symb | 'task': 'regression'}, # node symb | ||||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | ||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||||
# contains single node graph, node symb | |||||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | ||||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | ||||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | ||||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | ||||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | ||||
# node symb/nsymb | |||||
# node nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | ||||
# node/edge symb | |||||
# node symb/nsymb | |||||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
# # node/edge symb | |||||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | ||||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | ||||
@@ -13,18 +13,20 @@ from pygraph.kernels.marginalizedKernel import marginalizedkernel | |||||
dslist = [ | dslist = [ | ||||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | ||||
'task': 'regression'}, # node symb | |||||
'task': 'regression'}, # node symb | |||||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | ||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||||
# contains single node graph, node symb | |||||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | ||||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | ||||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | ||||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | ||||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | ||||
# node symb/nsymb | |||||
# node nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | ||||
# node/edge symb | |||||
# node symb/nsymb | |||||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
# # node/edge symb | |||||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | ||||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | ||||
@@ -11,16 +11,18 @@ dslist = [ | |||||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | ||||
'task': 'regression'}, # node symb | 'task': 'regression'}, # node symb | ||||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | ||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||||
# contains single node graph, node symb | |||||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | ||||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | ||||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | ||||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | ||||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | ||||
# node symb/nsymb | |||||
# node nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | ||||
# node/edge symb | |||||
# node symb/nsymb | |||||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
# # node/edge symb | |||||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | ||||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | ||||
@@ -18,18 +18,20 @@ dslist = [ | |||||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | # {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | ||||
# 'task': 'regression'}, # node symb | # 'task': 'regression'}, # node symb | ||||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | # {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | ||||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||||
# # contains single node graph, node symb | |||||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||||
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||||
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | ||||
# node symb/nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
# node/edge symb | |||||
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||||
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||||
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||||
# node nsymb | |||||
# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
# # node symb/nsymb | |||||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | |||||
# # node/edge symb | |||||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | |||||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | |||||
# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | # {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb | ||||
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | # # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb | ||||
@@ -38,8 +40,8 @@ dslist = [ | |||||
# | # | ||||
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb | ||||
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb | ||||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb | |||||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb | |||||
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values | |||||
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values | |||||
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | # # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb | ||||
# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | # # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb | ||||
@@ -15,19 +15,21 @@ from pygraph.kernels.untilHPathKernel import untilhpathkernel | |||||
from pygraph.utils.kernels import deltakernel, kernelproduct | from pygraph.utils.kernels import deltakernel, kernelproduct | ||||
dslist = [ | dslist = [ | ||||
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
# 'task': 'regression'}, # node symb | |||||
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb | |||||
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||||
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||||
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||||
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
# # node symb/nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
# node/edge symb | |||||
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds', | |||||
'task': 'regression'}, # node symb | |||||
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression', | |||||
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, | |||||
# contains single node graph, node symb | |||||
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb | |||||
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled | |||||
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat', | |||||
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb | |||||
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'}, | |||||
# node nsymb | |||||
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'}, | |||||
# node symb/nsymb | |||||
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | # {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'}, | ||||
# # node/edge symb | |||||
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | # {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat', | ||||
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | # 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb | ||||
@@ -103,10 +103,6 @@ def structuralspkernel(*args, | |||||
# get shortest path graphs of Gn | # get shortest path graphs of Gn | ||||
getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed']) | getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed']) | ||||
if len(Gn) < 1000 * n_jobs: | if len(Gn) < 1000 * n_jobs: | ||||
# # use default chunksize as pool.map when iterable is less than 100 | |||||
# chunksize, extra = divmod(len(Gn), n_jobs * 4) | |||||
# if extra: | |||||
# chunksize += 1 | |||||
chunksize = int(len(Gn) / n_jobs) + 1 | chunksize = int(len(Gn) / n_jobs) + 1 | ||||
else: | else: | ||||
chunksize = 1000 | chunksize = 1000 | ||||
@@ -198,10 +194,13 @@ def structuralspkernel(*args, | |||||
# # ---- direct running, normally use single CPU core. ---- | # # ---- direct running, normally use single CPU core. ---- | ||||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | # itr = combinations_with_replacement(range(0, len(Gn)), 2) | ||||
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout): | # for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout): | ||||
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs, | |||||
# node_label, edge_label, node_kernels, edge_kernels, gs) | |||||
# Kmatrix[i][j] = kernel | |||||
# Kmatrix[j][i] = kernel | |||||
# if gs[0] == 24 and gs[1] == 411: | |||||
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs, | |||||
# node_label, edge_label, node_kernels, edge_kernels, gs) | |||||
# if(kernel > 1): | |||||
# print("error here ") | |||||
# Kmatrix[i][j] = kernel | |||||
# Kmatrix[j][i] = kernel | |||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | print( | ||||
@@ -222,149 +221,161 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label, | |||||
spl2 = splist[jglobal] | spl2 = splist[jglobal] | ||||
kernel = 0 | kernel = 0 | ||||
try: | |||||
# First, compute shortest path matrices, method borrowed from FCSP. | |||||
if ds_attrs['node_labeled']: | |||||
# node symb and non-synb labeled | |||||
if ds_attrs['node_attr_dim'] > 0: | |||||
kn = node_kernels['mix'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1, n2 in product( | |||||
g1.nodes(data=True), g2.nodes(data=True)): | |||||
vk_dict[(n1[0], n2[0])] = kn( | |||||
n1[1][node_label], n2[1][node_label], | |||||
[n1[1]['attributes']], [n2[1]['attributes']]) | |||||
# node symb labeled | |||||
else: | |||||
kn = node_kernels['symb'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||||
n2[1][node_label]) | |||||
#try: | |||||
# First, compute shortest path matrices, method borrowed from FCSP. | |||||
if ds_attrs['node_labeled']: | |||||
# node symb and non-synb labeled | |||||
if ds_attrs['node_attr_dim'] > 0: | |||||
kn = node_kernels['mix'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1, n2 in product( | |||||
g1.nodes(data=True), g2.nodes(data=True)): | |||||
vk_dict[(n1[0], n2[0])] = kn( | |||||
n1[1][node_label], n2[1][node_label], | |||||
[n1[1]['attributes']], [n2[1]['attributes']]) | |||||
# node symb labeled | |||||
else: | else: | ||||
# node non-synb labeled | |||||
if ds_attrs['node_attr_dim'] > 0: | |||||
kn = node_kernels['nsymb'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']], | |||||
[n2[1]['attributes']]) | |||||
# node unlabeled | |||||
else: | |||||
vk_dict = {} | |||||
# Then, compute kernels between all pairs of edges, which idea is an | |||||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||||
# went though. For dense graphs, it would be slow. | |||||
if ds_attrs['edge_labeled']: | |||||
# edge symb and non-synb labeled | |||||
if ds_attrs['edge_attr_dim'] > 0: | |||||
ke = edge_kernels['mix'] | |||||
ek_dict = {} # dict of edge kernels | |||||
for e1, e2 in product( | |||||
g1.edges(data=True), g2.edges(data=True)): | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke( | |||||
e1[2][edge_label], e2[2][edge_label], | |||||
[e1[2]['attributes']], [e2[2]['attributes']]) | |||||
# edge symb labeled | |||||
else: | |||||
ke = edge_kernels['symb'] | |||||
ek_dict = {} | |||||
for e1 in g1.edges(data=True): | |||||
for e2 in g2.edges(data=True): | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke( | |||||
e1[2][edge_label], e2[2][edge_label]) | |||||
kn = node_kernels['symb'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label], | |||||
n2[1][node_label]) | |||||
else: | |||||
# node non-synb labeled | |||||
if ds_attrs['node_attr_dim'] > 0: | |||||
kn = node_kernels['nsymb'] | |||||
vk_dict = {} # shortest path matrices dict | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']], | |||||
[n2[1]['attributes']]) | |||||
# node unlabeled | |||||
else: | else: | ||||
# edge non-synb labeled | |||||
if ds_attrs['edge_attr_dim'] > 0: | |||||
ke = edge_kernels['nsymb'] | |||||
ek_dict = {} | |||||
for e1 in g1.edges(data=True): | |||||
for e2 in g2.edges(data=True): | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = kn( | |||||
[e1[2]['attributes']], [e2[2]['attributes']]) | |||||
# edge unlabeled | |||||
else: | |||||
ek_dict = {} | |||||
# compute graph kernels | |||||
if vk_dict: | |||||
if ek_dict: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
kpath = vk_dict[(p1[0], p2[0])] | |||||
if kpath: | |||||
for idx in range(1, len(p1)): | |||||
kpath *= vk_dict[(p1[idx], p2[idx])] * \ | |||||
ek_dict[((p1[idx-1], p1[idx]), | |||||
(p2[idx-1], p2[idx]))] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
else: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
kpath = vk_dict[(p1[0], p2[0])] | |||||
if kpath: | |||||
for idx in range(1, len(p1)): | |||||
kpath *= vk_dict[(p1[idx], p2[idx])] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
vk_dict = {} | |||||
# Then, compute kernels between all pairs of edges, which idea is an | |||||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||||
# went though. For dense graphs, it would be slow. | |||||
if ds_attrs['edge_labeled']: | |||||
# edge symb and non-synb labeled | |||||
if ds_attrs['edge_attr_dim'] > 0: | |||||
ke = edge_kernels['mix'] | |||||
ek_dict = {} # dict of edge kernels | |||||
for e1, e2 in product( | |||||
g1.edges(data=True), g2.edges(data=True)): | |||||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label], | |||||
[e1[2]['attributes']], [e2[2]['attributes']]) | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||||
# edge symb labeled | |||||
else: | else: | ||||
if ek_dict: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
if len(p1) == 0: | |||||
kernel += 1 | |||||
else: | |||||
kpath = 1 | |||||
for idx in range(0, len(p1) - 1): | |||||
kpath *= ek_dict[((p1[idx], p1[idx+1]), | |||||
(p2[idx], p2[idx+1]))] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
else: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
ke = edge_kernels['symb'] | |||||
ek_dict = {} | |||||
for e1 in g1.edges(data=True): | |||||
for e2 in g2.edges(data=True): | |||||
ek_temp = ke(e1[2][edge_label], e2[2][edge_label]) | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||||
else: | |||||
# edge non-synb labeled | |||||
if ds_attrs['edge_attr_dim'] > 0: | |||||
ke = edge_kernels['nsymb'] | |||||
ek_dict = {} | |||||
for e1 in g1.edges(data=True): | |||||
for e2 in g2.edges(data=True): | |||||
ek_temp = kn([e1[2]['attributes']], [e2[2]['attributes']]) | |||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||||
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp | |||||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||||
# edge unlabeled | |||||
else: | |||||
ek_dict = {} | |||||
# compute graph kernels | |||||
if vk_dict: | |||||
if ek_dict: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
kpath = vk_dict[(p1[0], p2[0])] | |||||
if kpath: | |||||
for idx in range(1, len(p1)): | |||||
kpath *= vk_dict[(p1[idx], p2[idx])] * \ | |||||
ek_dict[((p1[idx-1], p1[idx]), | |||||
(p2[idx-1], p2[idx]))] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
else: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
kpath = vk_dict[(p1[0], p2[0])] | |||||
if kpath: | |||||
for idx in range(1, len(p1)): | |||||
kpath *= vk_dict[(p1[idx], p2[idx])] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
else: | |||||
if ek_dict: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
if len(p1) == 0: | |||||
kernel += 1 | kernel += 1 | ||||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation | |||||
# # compute vertex kernel matrix | |||||
# try: | |||||
# vk_mat = np.zeros((nx.number_of_nodes(g1), | |||||
# nx.number_of_nodes(g2))) | |||||
# g1nl = enumerate(g1.nodes(data=True)) | |||||
# g2nl = enumerate(g2.nodes(data=True)) | |||||
# for i1, n1 in g1nl: | |||||
# for i2, n2 in g2nl: | |||||
# vk_mat[i1][i2] = kn( | |||||
# n1[1][node_label], n2[1][node_label], | |||||
# [n1[1]['attributes']], [n2[1]['attributes']]) | |||||
# range1 = range(0, len(edge_w_g[i])) | |||||
# range2 = range(0, len(edge_w_g[j])) | |||||
# for i1 in range1: | |||||
# x1 = edge_x_g[i][i1] | |||||
# y1 = edge_y_g[i][i1] | |||||
# w1 = edge_w_g[i][i1] | |||||
# for i2 in range2: | |||||
# x2 = edge_x_g[j][i2] | |||||
# y2 = edge_y_g[j][i2] | |||||
# w2 = edge_w_g[j][i2] | |||||
# ke = (w1 == w2) | |||||
# if ke > 0: | |||||
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] | |||||
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] | |||||
# Kmatrix += kn1 + kn2 | |||||
except KeyError: # missing labels or attributes | |||||
pass | |||||
else: | |||||
kpath = 1 | |||||
for idx in range(0, len(p1) - 1): | |||||
kpath *= ek_dict[((p1[idx], p1[idx+1]), | |||||
(p2[idx], p2[idx+1]))] | |||||
if not kpath: | |||||
break | |||||
kernel += kpath # add up kernels of all paths | |||||
else: | |||||
for p1, p2 in product(spl1, spl2): | |||||
if len(p1) == len(p2): | |||||
kernel += 1 | |||||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation | |||||
# # compute vertex kernel matrix | |||||
# try: | |||||
# vk_mat = np.zeros((nx.number_of_nodes(g1), | |||||
# nx.number_of_nodes(g2))) | |||||
# g1nl = enumerate(g1.nodes(data=True)) | |||||
# g2nl = enumerate(g2.nodes(data=True)) | |||||
# for i1, n1 in g1nl: | |||||
# for i2, n2 in g2nl: | |||||
# vk_mat[i1][i2] = kn( | |||||
# n1[1][node_label], n2[1][node_label], | |||||
# [n1[1]['attributes']], [n2[1]['attributes']]) | |||||
# range1 = range(0, len(edge_w_g[i])) | |||||
# range2 = range(0, len(edge_w_g[j])) | |||||
# for i1 in range1: | |||||
# x1 = edge_x_g[i][i1] | |||||
# y1 = edge_y_g[i][i1] | |||||
# w1 = edge_w_g[i][i1] | |||||
# for i2 in range2: | |||||
# x2 = edge_x_g[j][i2] | |||||
# y2 = edge_y_g[j][i2] | |||||
# w2 = edge_w_g[j][i2] | |||||
# ke = (w1 == w2) | |||||
# if ke > 0: | |||||
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] | |||||
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] | |||||
# Kmatrix += kn1 + kn2 | |||||
#except KeyError: # missing labels or attributes | |||||
# print("toto") | |||||
# pass | |||||
if(kernel > 1): | |||||
print("kernel error : ", ij) | |||||
return iglobal, jglobal, kernel | return iglobal, jglobal, kernel | ||||