@@ -103,10 +103,6 @@ def structuralspkernel(*args,
# get shortest path graphs of Gn
getsp_partial = partial(wrap_getSP, Gn, weight, ds_attrs['is_directed'])
if len(Gn) < 1000 * n_jobs:
# # use default chunksize as pool.map when iterable is less than 100
# chunksize, extra = divmod(len(Gn), n_jobs * 4)
# if extra:
# chunksize += 1
chunksize = int(len(Gn) / n_jobs) + 1
else:
chunksize = 1000
@@ -198,10 +194,13 @@ def structuralspkernel(*args,
# # ---- direct running, normally use single CPU core. ----
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for gs in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
# if gs[0] == 24 and gs[1] == 411:
# i, j, kernel = structuralspkernel_do(Gn, splist, ds_attrs,
# node_label, edge_label, node_kernels, edge_kernels, gs)
# if(kernel > 1):
# print("error here ")
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
run_time = time.time() - start_time
print(
@@ -222,149 +221,161 @@ def structuralspkernel_do(Gn, splist, ds_attrs, node_label, edge_label,
spl2 = splist[jglobal]
kernel = 0
try:
# First, compute shortest path matrices, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
#try:
# First, compute shortest path matrices, method borrowed from FCSP.
if ds_attrs['node_labeled']:
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['mix']
vk_dict = {} # shortest path matrices dict
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
vk_dict[(n1[0], n2[0])] = kn(
n1[1][node_label], n2[1][node_label],
[n1[1]['attributes']], [n2[1]['attributes']])
# node symb labeled
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
vk_dict = {}
# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
# edge symb labeled
else:
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ke(
e1[2][edge_label], e2[2][edge_label])
kn = node_kernels['symb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn(n1[1][node_label],
n2[1][node_label])
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
kn = node_kernels['nsymb']
vk_dict = {} # shortest path matrices dict
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
vk_dict[(n1[0], n2[0])] = kn([n1[1]['attributes']],
[n2[1]['attributes']])
# node unlabeled
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = kn(
[e1[2]['attributes']], [e2[2]['attributes']])
# edge unlabeled
else:
ek_dict = {}
# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
vk_dict = {}
# Then, compute kernels between all pairs of edges, which idea is an
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, it would be slow.
if ds_attrs['edge_labeled']:
# edge symb and non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['mix']
ek_dict = {} # dict of edge kernels
for e1, e2 in product(
g1.edges(data=True), g2.edges(data=True)):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label],
[e1[2]['attributes']], [e2[2]['attributes']])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
ke = edge_kernels['symb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = ke(e1[2][edge_label], e2[2][edge_label])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else:
# edge non-synb labeled
if ds_attrs['edge_attr_dim'] > 0:
ke = edge_kernels['nsymb']
ek_dict = {}
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
ek_temp = kn([e1[2]['attributes']], [e2[2]['attributes']])
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[0], e1[1]), (e2[1], e2[0]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge unlabeled
else:
ek_dict = {}
# compute graph kernels
if vk_dict:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])] * \
ek_dict[((p1[idx-1], p1[idx]),
(p2[idx-1], p2[idx]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kpath = vk_dict[(p1[0], p2[0])]
if kpath:
for idx in range(1, len(p1)):
kpath *= vk_dict[(p1[idx], p2[idx])]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
if ek_dict:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
if len(p1) == 0:
kernel += 1
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])
# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
except KeyError: # missing labels or attributes
pass
else:
kpath = 1
for idx in range(0, len(p1) - 1):
kpath *= ek_dict[((p1[idx], p1[idx+1]),
(p2[idx], p2[idx+1]))]
if not kpath:
break
kernel += kpath # add up kernels of all paths
else:
for p1, p2 in product(spl1, spl2):
if len(p1) == len(p2):
kernel += 1
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
# # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation
# # compute vertex kernel matrix
# try:
# vk_mat = np.zeros((nx.number_of_nodes(g1),
# nx.number_of_nodes(g2)))
# g1nl = enumerate(g1.nodes(data=True))
# g2nl = enumerate(g2.nodes(data=True))
# for i1, n1 in g1nl:
# for i2, n2 in g2nl:
# vk_mat[i1][i2] = kn(
# n1[1][node_label], n2[1][node_label],
# [n1[1]['attributes']], [n2[1]['attributes']])
# range1 = range(0, len(edge_w_g[i]))
# range2 = range(0, len(edge_w_g[j]))
# for i1 in range1:
# x1 = edge_x_g[i][i1]
# y1 = edge_y_g[i][i1]
# w1 = edge_w_g[i][i1]
# for i2 in range2:
# x2 = edge_x_g[j][i2]
# y2 = edge_y_g[j][i2]
# w2 = edge_w_g[j][i2]
# ke = (w1 == w2)
# if ke > 0:
# kn1 = vk_mat[x1][x2] * vk_mat[y1][y2]
# kn2 = vk_mat[x1][y2] * vk_mat[y1][x2]
# Kmatrix += kn1 + kn2
#except KeyError: # missing labels or attributes
# print("toto")
# pass
if(kernel > 1):
print("kernel error : ", ij)
return iglobal, jglobal, kernel