Browse Source

Modify structural shortest path kernel.

v0.1
jajupmochi 6 years ago
parent
commit
834407c262
13 changed files with 232570 additions and 197676 deletions
  1. +8
    -2
      notebooks/check_gm.py
  2. +38640
    -25989
      notebooks/check_gm/Acyclic.gm.eps
  3. +38216
    -38216
      notebooks/check_gm/Alkane.gm.eps
  4. +38506
    -38673
      notebooks/check_gm/Letter-med.gm.eps
  5. +38452
    -38472
      notebooks/check_gm/MAO.gm.eps
  6. +38686
    -26893
      notebooks/check_gm/MUTAG.gm.eps
  7. +39859
    -29249
      notebooks/check_gm/PAH.gm.eps
  8. +5
    -3
      notebooks/run_commonwalkkernel.py
  9. +6
    -4
      notebooks/run_marginalizedkernel.py
  10. +5
    -3
      notebooks/run_spkernel.py
  11. +15
    -13
      notebooks/run_structuralspkernel.py
  12. +14
    -12
      notebooks/run_untilhpathkernel.py
  13. +158
    -147
      pygraph/kernels/structuralspKernel.py

+ 8
- 2
notebooks/check_gm.py View File

@@ -12,8 +12,8 @@ import matplotlib.pyplot as plt
from numpy.linalg import eig

# read gram matrices from file.
results_dir = 'results/marginalizedkernel/myria'
ds_name = 'MUTAG'
results_dir = 'results/structuralspkernel/'
ds_name = 'Letter-med'
gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
#print('gm time: ', gmfile['gmtime'])
# a list to store gram matrices for all param_grid_precomputed
@@ -32,6 +32,12 @@ for x in gram_matrices:
print('diag: ', np.diag(x))
print('sum diag < 0.1: ', np.sum(np.diag(x) < 0.1))
print('min, max diag: ', min(np.diag(x)), max(np.diag(x)))
print('min, max matrix: ', np.min(x), np.max(x))
for i in range(len(x)):
for j in range(len(x)):
if x[i][j] > 1:
print(i, j)
raise Exception('value bigger than 1 with index', i, j)
print('mean x: ', np.mean(np.mean(x)))
[lamnda, v] = eig(x)


+ 38640
- 25989
notebooks/check_gm/Acyclic.gm.eps
File diff suppressed because it is too large
View File


+ 38216
- 38216
notebooks/check_gm/Alkane.gm.eps
File diff suppressed because it is too large
View File


+ 38506
- 38673
notebooks/check_gm/Letter-med.gm.eps
File diff suppressed because it is too large
View File


+ 38452
- 38472
notebooks/check_gm/MAO.gm.eps
File diff suppressed because it is too large
View File


+ 38686
- 26893
notebooks/check_gm/MUTAG.gm.eps
File diff suppressed because it is too large
View File


+ 39859
- 29249
notebooks/check_gm/PAH.gm.eps
File diff suppressed because it is too large
View File


+ 5
- 3
notebooks/run_commonwalkkernel.py View File

@@ -18,16 +18,18 @@ dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb



+ 6
- 4
notebooks/run_marginalizedkernel.py View File

@@ -13,18 +13,20 @@ from pygraph.kernels.marginalizedKernel import marginalizedkernel

dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb



+ 5
- 3
notebooks/run_spkernel.py View File

@@ -11,16 +11,18 @@ dslist = [
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb



+ 15
- 13
notebooks/run_structuralspkernel.py View File

@@ -18,18 +18,20 @@ dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
{'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
{'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb
# node nsymb
# {'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# # node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb

# {'name': 'COIL-DEL', 'dataset': '../datasets/COIL-DEL/COIL-DEL_A.txt'}, # edge symb, node nsymb
# # # {'name': 'BZR', 'dataset': '../datasets/BZR_txt/BZR_A_sparse.txt'}, # node symb/nsymb
@@ -38,8 +40,8 @@ dslist = [
#
# # {'name': 'DHFR', 'dataset': '../datasets/DHFR_txt/DHFR_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'SYNTHETIC', 'dataset': '../datasets/SYNTHETIC_txt/SYNTHETIC_A_sparse.txt'}, # node symb/nsymb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb
# # {'name': 'MSRC9', 'dataset': '../datasets/MSRC_9_txt/MSRC_9_A.txt'}, # node symb, missing values
# # {'name': 'MSRC21', 'dataset': '../datasets/MSRC_21_txt/MSRC_21_A.txt'}, # node symb, missing values
# # {'name': 'FIRSTMM_DB', 'dataset': '../datasets/FIRSTMM_DB/FIRSTMM_DB_A.txt'}, # node symb/nsymb ,edge nsymb

# # {'name': 'PROTEINS', 'dataset': '../datasets/PROTEINS_txt/PROTEINS_A_sparse.txt'}, # node symb/nsymb


+ 14
- 12
notebooks/run_untilhpathkernel.py View File

@@ -15,19 +15,21 @@ from pygraph.kernels.untilHPathKernel import untilhpathkernel
from pygraph.utils.kernels import deltakernel, kernelproduct

dslist = [
# {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
# 'task': 'regression'}, # node symb
# {'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
# 'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', }, # contains single node graph, node symb
# {'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
# {'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
# {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
# {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# # node symb/nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node/edge symb
{'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
'task': 'regression'}, # node symb
{'name': 'Alkane', 'dataset': '../datasets/Alkane/dataset.ds', 'task': 'regression',
'dataset_y': '../datasets/Alkane/dataset_boiling_point_names.txt', },
# contains single node graph, node symb
{'name': 'MAO', 'dataset': '../datasets/MAO/dataset.ds', }, # node/edge symb
{'name': 'PAH', 'dataset': '../datasets/PAH/dataset.ds', }, # unlabeled
{'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}, # node/edge symb
{'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt'},
# node nsymb
{'name': 'ENZYMES', 'dataset': '../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'},
# node symb/nsymb
# {'name': 'Mutagenicity', 'dataset': '../datasets/Mutagenicity/Mutagenicity_A.txt'},
# # node/edge symb
# {'name': 'D&D', 'dataset': '../datasets/D&D/DD.mat',
# 'extra_params': {'am_sp_al_nl_el': [0, 1, 2, 1, -1]}}, # node symb



+ 158
- 147
pygraph/kernels/structuralspKernel.py View File

@@ -103,10 +103,6 @@ def structuralspkernel(*args,
# get shortest path graphs of Gn
getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed'])
if len(Gn) < 1000 * n_jobs:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
@@ -198,10 +194,13 @@ def structuralspkernel(*args,
# # ---- direct running, normally use single CPU core. ----
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# if gs[0] == 24 and gs[1] == 411:
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# if(kernel > 1):
# print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel

run_time = time.time() - start_time
print(
@@ -222,149 +221,161 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
spl2 = splist[jglobal]
kernel = 0

try:
# First, compute shortest path matrices, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
#try:
# First, compute shortest path matrices, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
vk_dict = {}

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
# edge symb labeled
else:
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label])
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = kn(
[e1[2]['attributes']], [e2[2]['attributes']])
# edge unlabeled
else:
ek_dict = {}

# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
vk_dict = {}

# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = kn([e1[2]['attributes']], [e2[2]['attributes']])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge unlabeled
else:
ek_dict = {}

# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1

kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average

# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])

# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
except KeyError: # missing labels or attributes
pass

else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kernel += 1

kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average

# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])

# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
#except KeyError: # missing labels or attributes
# print("toto")
# pass

if(kernel > 1):
print("kernel error : ", ij)
return iglobal, jglobal, kernel




Loading…
Cancel
Save