2. correct an error in the common walk kernel. DON NOT use the old one. 3. improve the method to construct fully-labeled direct product graphs, much faster for sparse graphs.v0.1
@@ -0,0 +1 @@ | |||||
ljia@ljia-Precision-7520.4716:1530265749 |
@@ -24,7 +24,7 @@ def commonwalkkernel(*args, | |||||
edge_label='bond_type', | edge_label='bond_type', | ||||
n=None, | n=None, | ||||
weight=1, | weight=1, | ||||
compute_method='exp'): | |||||
compute_method=None): | |||||
"""Calculate common walk graph kernels up to depth d between graphs. | """Calculate common walk graph kernels up to depth d between graphs. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -40,10 +40,11 @@ def commonwalkkernel(*args, | |||||
n : integer | n : integer | ||||
Longest length of walks. | Longest length of walks. | ||||
weight: integer | weight: integer | ||||
Weight coefficient of different lengths of walks. | |||||
Weight coefficient of different lengths of walks, which represents beta in 'exp' method and gamma in 'geo'. | |||||
compute_method : string | compute_method : string | ||||
Method used to compute walk kernel. The Following choices are available: | Method used to compute walk kernel. The Following choices are available: | ||||
'direct' : direct product graph method, as shown in reference [1]. The time complexity is O(n^6) for unlabeled graphs with n vertices. | |||||
'exp' : exponential serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices. | |||||
'geo' : geometric serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices. | |||||
'brute' : brute force, simply search for all walks and compare them. | 'brute' : brute force, simply search for all walks and compare them. | ||||
Return | Return | ||||
@@ -66,6 +67,8 @@ def commonwalkkernel(*args, | |||||
if not ds_attrs['edge_labeled']: | if not ds_attrs['edge_labeled']: | ||||
for G in Gn: | for G in Gn: | ||||
nx.set_edge_attributes(G, '0', 'bond_type') | nx.set_edge_attributes(G, '0', 'bond_type') | ||||
if not ds_attrs['is_directed']: | |||||
Gn = [G.to_directed() for G in Gn] | |||||
start_time = time.time() | start_time = time.time() | ||||
@@ -77,7 +80,7 @@ def commonwalkkernel(*args, | |||||
file=sys.stdout) | file=sys.stdout) | ||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
Kmatrix[i][j] = _untilnwalkkernel_exp(Gn[i], Gn[j], node_label, | |||||
Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, | |||||
edge_label, weight) | edge_label, weight) | ||||
Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
pbar.update(1) | pbar.update(1) | ||||
@@ -90,7 +93,7 @@ def commonwalkkernel(*args, | |||||
file=sys.stdout) | file=sys.stdout) | ||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
Kmatrix[i][j] = _untilnwalkkernel_geo(Gn[i], Gn[j], node_label, | |||||
Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, | |||||
edge_label, weight) | edge_label, weight) | ||||
Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
pbar.update(1) | pbar.update(1) | ||||
@@ -106,7 +109,7 @@ def commonwalkkernel(*args, | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
Kmatrix[i][j] = _untilnwalkkernel_brute( | |||||
Kmatrix[i][j] = _commonwalkkernel_brute( | |||||
all_walks[i], | all_walks[i], | ||||
all_walks[j], | all_walks[j], | ||||
node_label=node_label, | node_label=node_label, | ||||
@@ -122,7 +125,7 @@ def commonwalkkernel(*args, | |||||
return Kmatrix, run_time | return Kmatrix, run_time | ||||
def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
def _commonwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using exponential series. | """Calculate walk graph kernels up to n between 2 graphs using exponential series. | ||||
Parameters | Parameters | ||||
@@ -168,7 +171,7 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
D = np.zeros((len(ew), len(ew))) | D = np.zeros((len(ew), len(ew))) | ||||
for i in range(len(ew)): | for i in range(len(ew)): | ||||
D[i][i] = np.exp(beta * ew[i]) | D[i][i] = np.exp(beta * ew[i]) | ||||
# print('D: ', D) | |||||
# print('D: ', D) | |||||
# print('hshs: ', T.I * D * T) | # print('hshs: ', T.I * D * T) | ||||
# print(np.exp(-2)) | # print(np.exp(-2)) | ||||
@@ -176,16 +179,16 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta): | |||||
# print(np.exp(weight * D)) | # print(np.exp(weight * D)) | ||||
# print(ev) | # print(ev) | ||||
# print(np.linalg.inv(ev)) | # print(np.linalg.inv(ev)) | ||||
exp_D = ev * D * ev.I | |||||
exp_D = ev * D * ev.T | |||||
# print(exp_D) | # print(exp_D) | ||||
# print(np.exp(weight * A)) | # print(np.exp(weight * A)) | ||||
# print('-------') | # print('-------') | ||||
return np.sum(exp_D.diagonal()) | |||||
return exp_D.sum() | |||||
def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using geometric series. | |||||
def _commonwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
"""Calculate common walk graph kernels up to n between 2 graphs using geometric series. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -207,46 +210,14 @@ def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma): | |||||
# get tensor product / direct product | # get tensor product / direct product | ||||
gp = direct_product(G1, G2, node_label, edge_label) | gp = direct_product(G1, G2, node_label, edge_label) | ||||
A = nx.adjacency_matrix(gp).todense() | A = nx.adjacency_matrix(gp).todense() | ||||
# print(A) | |||||
# from matplotlib import pyplot as plt | |||||
# nx.draw_networkx(G1) | |||||
# plt.show() | |||||
# nx.draw_networkx(G2) | |||||
# plt.show() | |||||
# nx.draw_networkx(gp) | |||||
# plt.show() | |||||
# print(G1.nodes(data=True)) | |||||
# print(G2.nodes(data=True)) | |||||
# print(gp.nodes(data=True)) | |||||
# print(gp.edges(data=True)) | |||||
ew, ev = np.linalg.eig(A) | |||||
# print('ew: ', ew) | |||||
# print(ev) | |||||
# T = np.matrix(ev) | |||||
# print('T: ', T) | |||||
# T = ev.I | |||||
D = np.zeros((len(ew), len(ew))) | |||||
for i in range(len(ew)): | |||||
D[i][i] = np.exp(beta * ew[i]) | |||||
# print('D: ', D) | |||||
# print('hshs: ', T.I * D * T) | |||||
# print(np.exp(-2)) | |||||
# print(D) | |||||
# print(np.exp(weight * D)) | |||||
# print(ev) | |||||
# print(np.linalg.inv(ev)) | |||||
exp_D = ev * D * ev.I | |||||
# print(exp_D) | |||||
# print(np.exp(weight * A)) | |||||
# print('-------') | |||||
return np.sum(exp_D.diagonal()) | |||||
mat = np.identity(len(A)) - gamma * A | |||||
try: | |||||
return mat.I.sum() | |||||
except np.linalg.LinAlgError: | |||||
return np.nan | |||||
def _untilnwalkkernel_brute(walks1, | |||||
def _commonwalkkernel_brute(walks1, | |||||
walks2, | walks2, | ||||
node_label='atom', | node_label='atom', | ||||
edge_label='bond_type', | edge_label='bond_type', | ||||
@@ -19,7 +19,11 @@ from pygraph.utils.graphdataset import get_dataset_attributes | |||||
def randomwalkkernel(*args, | def randomwalkkernel(*args, | ||||
node_label='atom', | node_label='atom', | ||||
edge_label='bond_type', | edge_label='bond_type', | ||||
edge_weight=None, | |||||
h=10, | h=10, | ||||
p=None, | |||||
q=None, | |||||
weight=None, | |||||
compute_method=''): | compute_method=''): | ||||
"""Calculate random walk graph kernels. | """Calculate random walk graph kernels. | ||||
Parameters | Parameters | ||||
@@ -33,7 +37,7 @@ def randomwalkkernel(*args, | |||||
node attribute used as label. The default node label is atom. | node attribute used as label. The default node label is atom. | ||||
edge_label : string | edge_label : string | ||||
edge attribute used as label. The default edge label is bond_type. | edge attribute used as label. The default edge label is bond_type. | ||||
n : integer | |||||
h : integer | |||||
Longest length of walks. | Longest length of walks. | ||||
method : string | method : string | ||||
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'. | Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'. | ||||
@@ -46,7 +50,25 @@ def randomwalkkernel(*args, | |||||
compute_method = compute_method.lower() | compute_method = compute_method.lower() | ||||
h = int(h) | h = int(h) | ||||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
eweight = None | |||||
if edge_weight == None: | |||||
print('\n None edge weight specified. Set all weight to 1.\n') | |||||
else: | |||||
try: | |||||
some_weight = list( | |||||
nx.get_edge_attributes(Gn[0], edge_weight).values())[0] | |||||
if isinstance(some_weight, float) or isinstance(some_weight, int): | |||||
eweight = edge_weight | |||||
else: | |||||
print( | |||||
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
except: | |||||
print( | |||||
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' | |||||
% edge_weight) | |||||
ds_attrs = get_dataset_attributes( | ds_attrs = get_dataset_attributes( | ||||
Gn, | Gn, | ||||
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | ||||
@@ -71,76 +93,224 @@ def randomwalkkernel(*args, | |||||
# labeled=labeled) for i in range(0, len(Gn)) | # labeled=labeled) for i in range(0, len(Gn)) | ||||
# ] | # ] | ||||
pbar = tqdm( | |||||
total=(1 + len(Gn)) * len(Gn) / 2, | |||||
desc='calculating kernels', | |||||
file=sys.stdout) | |||||
if compute_method == 'sylvester': | if compute_method == 'sylvester': | ||||
import warnings | import warnings | ||||
warnings.warn( | warnings.warn( | ||||
'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.' | |||||
'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.' | |||||
) | ) | ||||
from control import dlyap | |||||
Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label, | |||||
edge_label, eweight) | |||||
elif compute_method == 'conjugate': | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
Kmatrix[i][j] = _randomwalkkernel_sylvester( | |||||
all_walks[i], | |||||
all_walks[j], | |||||
node_label=node_label, | |||||
edge_label=edge_label) | |||||
Kmatrix[i][j] = _randomwalkkernel_conjugate( | |||||
Gn[i], Gn[j], node_label, edge_label) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | Kmatrix[j][i] = Kmatrix[i][j] | ||||
pbar.update(1) | pbar.update(1) | ||||
elif compute_method == 'conjugate': | |||||
pass | |||||
elif compute_method == 'fp': | elif compute_method == 'fp': | ||||
pass | |||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label, | |||||
edge_label) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
elif compute_method == 'spectral': | elif compute_method == 'spectral': | ||||
pass | |||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
Kmatrix[i][j] = _randomwalkkernel_spectral( | |||||
Gn[i], Gn[j], node_label, edge_label) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
elif compute_method == 'kron': | elif compute_method == 'kron': | ||||
pass | |||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j], | |||||
node_label, edge_label) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
else: | else: | ||||
raise Exception( | raise Exception( | ||||
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".' | 'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".' | ||||
) | ) | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
Kmatrix[i][j] = _randomwalkkernel_do( | |||||
all_walks[i], | |||||
all_walks[j], | |||||
node_label=node_label, | |||||
edge_label=edge_label, | |||||
labeled=labeled) | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
# for i in range(0, len(Gn)): | |||||
# for j in range(i, len(Gn)): | |||||
# Kmatrix[i][j] = _randomwalkkernel_do( | |||||
# all_walks[i], | |||||
# all_walks[j], | |||||
# node_label=node_label, | |||||
# edge_label=edge_label, | |||||
# labeled=labeled) | |||||
# Kmatrix[j][i] = Kmatrix[i][j] | |||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | print( | ||||
"\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---" | |||||
% (n, len(Gn), run_time)) | |||||
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---" | |||||
% (len(Gn), run_time)) | |||||
return Kmatrix, run_time | return Kmatrix, run_time | ||||
def _randomwalkkernel_sylvester(walks1, | |||||
walks2, | |||||
node_label='atom', | |||||
edge_label='bond_type'): | |||||
def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label, | |||||
eweight): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
walks1, walks2 : list | |||||
List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk. | |||||
G1, G2 : NetworkX graph | |||||
Graphs between which the kernel is calculated. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. The default node label is atom. | |||||
node attribute used as label. | |||||
edge_label : string | edge_label : string | ||||
edge attribute used as label. The default edge label is bond_type. | |||||
edge attribute used as label. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
Kernel between 2 graphs. | |||||
""" | |||||
from control import dlyap | |||||
Kmatrix = np.zeros((len(Gn), len(Gn))) | |||||
if q == None: | |||||
# don't normalize adjacency matrices if q is a uniform vector. | |||||
A_list = [ | |||||
nx.adjacency_matrix(G, eweight).todense() for G in tqdm( | |||||
Gn, desc='compute adjacency matrices', file=sys.stdout) | |||||
] | |||||
if p == None: | |||||
pbar = tqdm( | |||||
total=(1 + len(Gn)) * len(Gn) / 2, | |||||
desc='calculating kernels', | |||||
file=sys.stdout) | |||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
A = lmda * A_list[j] | |||||
Q = A_list[i] | |||||
# use uniform distribution if there is no prior knowledge. | |||||
nb_pd = len(A_list[i]) * len(A_list[j]) | |||||
pd_uni = 1 / nb_pd | |||||
C = np.full((len(A_list[j]), len(A_list[i])), pd_uni) | |||||
try: | |||||
X = dlyap(A, Q, C) | |||||
X = np.reshape(X, (-1, 1), order='F') | |||||
# use uniform distribution if there is no prior knowledge. | |||||
q_direct = np.full((1, nb_pd), pd_uni) | |||||
Kmatrix[i][j] = np.dot(q_direct, X) | |||||
except TypeError: | |||||
# print('sth wrong.') | |||||
Kmatrix[i][j] = np.nan | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
# A_list = [] | |||||
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | |||||
# A_tilde = nx.adjacency_matrix(G, weight=None).todense() | |||||
# # normalized adjacency matrices | |||||
# # A_list.append(A_tilde / A_tilde.sum(axis=0)) | |||||
# A_list.append(A_tilde) | |||||
return Kmatrix | |||||
def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method. | |||||
Parameters | |||||
---------- | |||||
G1, G2 : NetworkX graph | |||||
Graphs between which the kernel is calculated. | |||||
node_label : string | |||||
node attribute used as label. | |||||
edge_label : string | |||||
edge attribute used as label. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
Kernel between 2 graphs. | |||||
""" | |||||
dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
import matplotlib.pyplot as plt | |||||
nx.draw_networkx(G1) | |||||
plt.show() | |||||
nx.draw_networkx(G2) | |||||
plt.show() | |||||
nx.draw_networkx(dpg) | |||||
plt.show() | |||||
X = dlyap(A, Q, C) | |||||
return kernel | |||||
def _randomwalkkernel_fp(G1, G2, node_label, edge_label): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||||
Parameters | |||||
---------- | |||||
G1, G2 : NetworkX graph | |||||
Graphs between which the kernel is calculated. | |||||
node_label : string | |||||
node attribute used as label. | |||||
edge_label : string | |||||
edge attribute used as label. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
Kernel between 2 graphs. | |||||
""" | |||||
dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
X = dlyap(A, Q, C) | |||||
return kernel | |||||
def _randomwalkkernel_spectral(G1, G2, node_label, edge_label): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method. | |||||
Parameters | |||||
---------- | |||||
G1, G2 : NetworkX graph | |||||
Graphs between which the kernel is calculated. | |||||
node_label : string | |||||
node attribute used as label. | |||||
edge_label : string | |||||
edge attribute used as label. | |||||
Return | |||||
------ | |||||
kernel : float | |||||
Kernel between 2 graphs. | |||||
""" | |||||
dpg = nx.tensor_product(G1, G2) # direct product graph | |||||
X = dlyap(A, Q, C) | |||||
return kernel | |||||
def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | |||||
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||||
Parameters | |||||
---------- | |||||
G1, G2 : NetworkX graph | |||||
Graphs between which the kernel is calculated. | |||||
node_label : string | |||||
node attribute used as label. | |||||
edge_label : string | |||||
edge attribute used as label. | |||||
Return | Return | ||||
------ | ------ | ||||
kernel : float | kernel : float | ||||
Treelet Kernel between 2 graphs. | |||||
Kernel between 2 graphs. | |||||
""" | """ | ||||
dpg = nx.tensor_product(G1, G2) # direct product graph | dpg = nx.tensor_product(G1, G2) # direct product graph | ||||
@@ -8,6 +8,7 @@ import pathlib | |||||
sys.path.insert(0, "../") | sys.path.insert(0, "../") | ||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import time | import time | ||||
from itertools import combinations_with_replacement, product | |||||
import networkx as nx | import networkx as nx | ||||
import numpy as np | import numpy as np | ||||
@@ -39,8 +40,6 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None): | |||||
# pre-process | # pre-process | ||||
Gn = args[0] if len(args) == 1 else [args[0], args[1]] | Gn = args[0] if len(args) == 1 else [args[0], args[1]] | ||||
Gn = [nx.to_directed(G) for G in Gn] | |||||
weight = None | weight = None | ||||
if edge_weight == None: | if edge_weight == None: | ||||
print('\n None edge weight specified. Set all weight to 1.\n') | print('\n None edge weight specified. Set all weight to 1.\n') | ||||
@@ -89,174 +88,158 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None): | |||||
# node symb and non-synb labeled | # node symb and non-synb labeled | ||||
if ds_attrs['node_attr_dim'] > 0: | if ds_attrs['node_attr_dim'] > 0: | ||||
if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['mix'] | |||||
try: | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn(n11[node_label], n21[ | |||||
node_label], [n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
n12[node_label], | |||||
n22[node_label], | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing labels or attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['mix'] | |||||
try: | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn(n11[node_label], n21[node_label], [ | |||||
n11['attributes'] | |||||
], [n21['attributes']]) * kn( | |||||
n12[node_label], n22[node_label], | |||||
[n12['attributes']], [n22['attributes']]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing labels or attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
else: | else: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['mix'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn(n11[node_label], n21[ | |||||
node_label], [n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
n12[node_label], | |||||
n22[node_label], | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
kn2 = kn(n11[node_label], n22[ | |||||
node_label], [n11['attributes']], | |||||
[n22['attributes']]) * kn( | |||||
n12[node_label], | |||||
n21[node_label], | |||||
[n12['attributes']], | |||||
[n21['attributes']]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing labels or attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['mix'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn(n11[node_label], n21[node_label], [ | |||||
n11['attributes'] | |||||
], [n21['attributes']]) * kn( | |||||
n12[node_label], n22[node_label], | |||||
[n12['attributes']], [n22['attributes']]) | |||||
kn2 = kn(n11[node_label], n22[node_label], [ | |||||
n11['attributes'] | |||||
], [n22['attributes']]) * kn( | |||||
n12[node_label], n21[node_label], | |||||
[n12['attributes']], [n21['attributes']]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing labels or attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
# node symb labeled | # node symb labeled | ||||
else: | else: | ||||
if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['symb'] | |||||
try: | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn(n11[node_label], | |||||
n21[node_label]) * kn( | |||||
n12[node_label], | |||||
n22[node_label]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing labels | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['symb'] | |||||
try: | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn(n11[node_label], | |||||
n21[node_label]) * kn( | |||||
n12[node_label], n22[node_label]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing labels | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
else: | else: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['symb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn(n11[node_label], | |||||
n21[node_label]) * kn( | |||||
n12[node_label], | |||||
n22[node_label]) | |||||
kn2 = kn(n11[node_label], | |||||
n22[node_label]) * kn( | |||||
n12[node_label], | |||||
n21[node_label]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing labels | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['symb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn(n11[node_label], | |||||
n21[node_label]) * kn( | |||||
n12[node_label], n22[node_label]) | |||||
kn2 = kn(n11[node_label], | |||||
n22[node_label]) * kn( | |||||
n12[node_label], n21[node_label]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing labels | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
else: | else: | ||||
# node non-synb labeled | # node non-synb labeled | ||||
if ds_attrs['node_attr_dim'] > 0: | if ds_attrs['node_attr_dim'] > 0: | ||||
if ds_attrs['is_directed']: | if ds_attrs['is_directed']: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['nsymb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn([n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['nsymb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn([n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
Kmatrix[i][j] += kn1 | |||||
except KeyError: # missing attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
else: | else: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['nsymb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[ | |||||
0]], Gn[i].nodes[e1[1]], Gn[ | |||||
j].nodes[e2[0]], Gn[j].nodes[ | |||||
e2[1]] | |||||
kn1 = kn([n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
kn2 = kn([n11['attributes']], | |||||
[n22['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n21['attributes']]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement( | |||||
range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
kn = node_kernels['nsymb'] | |||||
try: | |||||
# each edge walk is counted twice, starting from both its extreme nodes. | |||||
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[ | |||||
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[ | |||||
j].nodes[e2[1]] | |||||
kn1 = kn([n11['attributes']], | |||||
[n21['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n22['attributes']]) | |||||
kn2 = kn([n11['attributes']], | |||||
[n22['attributes']]) * kn( | |||||
[n12['attributes']], | |||||
[n21['attributes']]) | |||||
Kmatrix[i][j] += kn1 + kn2 | |||||
except KeyError: # missing attributes | |||||
pass | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
# node unlabeled | # node unlabeled | ||||
else: | else: | ||||
for i in range(0, len(Gn)): | |||||
for j in range(i, len(Gn)): | |||||
for e1 in Gn[i].edges(data=True): | |||||
for e2 in Gn[j].edges(data=True): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
Kmatrix[i][j] += 1 | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
for i, j in combinations_with_replacement(range(0, len(Gn)), 2): | |||||
for e1, e2 in product( | |||||
Gn[i].edges(data=True), Gn[j].edges(data=True)): | |||||
if e1[2]['cost'] == e2[2]['cost']: | |||||
Kmatrix[i][j] += 1 | |||||
Kmatrix[j][i] = Kmatrix[i][j] | |||||
pbar.update(1) | |||||
run_time = time.time() - start_time | run_time = time.time() - start_time | ||||
print( | print( | ||||
@@ -119,7 +119,7 @@ def untotterTransformation(G, node_label, edge_label): | |||||
def direct_product(G1, G2, node_label, edge_label): | def direct_product(G1, G2, node_label, edge_label): | ||||
"""Return the direct/tensor product of G1 and G2. | |||||
"""Return the direct/tensor product of directed graphs G1 and G2. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -137,7 +137,7 @@ def direct_product(G1, G2, node_label, edge_label): | |||||
Notes | Notes | ||||
----- | ----- | ||||
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to direct product graph. | |||||
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||||
References | References | ||||
---------- | ---------- | ||||
@@ -147,25 +147,37 @@ def direct_product(G1, G2, node_label, edge_label): | |||||
from itertools import product | from itertools import product | ||||
# G = G.to_directed() | # G = G.to_directed() | ||||
gt = nx.Graph() | |||||
gt = nx.DiGraph() | |||||
# add nodes | # add nodes | ||||
for u, v in product(G1, G2): | for u, v in product(G1, G2): | ||||
if G1.nodes[u][node_label] == G2.nodes[v][node_label]: | if G1.nodes[u][node_label] == G2.nodes[v][node_label]: | ||||
gt.add_node((u, v)) | gt.add_node((u, v)) | ||||
gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]}) | gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]}) | ||||
# add edges | |||||
for u, v in product(gt, gt): | |||||
if (u[0], v[0]) in G1.edges and ( | |||||
u[1], v[1] | |||||
) in G2.edges and G1.edges[u[0], | |||||
v[0]][edge_label] == G2.edges[u[1], | |||||
v[1]][edge_label]: | |||||
gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||||
gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||||
# add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||||
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||||
if (u1, u2) in gt and ( | |||||
v1, v2 | |||||
) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2, | |||||
v2][edge_label]: | |||||
gt.add_edge((u1, u2), (v1, v2)) | |||||
gt.edges[(u1, u2), (v1, v2)].update({ | |||||
edge_label: | edge_label: | ||||
G1.edges[u[0], v[0]][edge_label] | |||||
G1.edges[u1, v1][edge_label] | |||||
}) | }) | ||||
# # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||||
# for u, v in product(gt, gt): | |||||
# if (u[0], v[0]) in G1.edges and ( | |||||
# u[1], v[1] | |||||
# ) in G2.edges and G1.edges[u[0], | |||||
# v[0]][edge_label] == G2.edges[u[1], | |||||
# v[1]][edge_label]: | |||||
# gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||||
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||||
# edge_label: | |||||
# G1.edges[u[0], v[0]][edge_label] | |||||
# }) | |||||
# relabel nodes using consecutive integers for convenience of kernel calculation. | # relabel nodes using consecutive integers for convenience of kernel calculation. | ||||
# gt = nx.convert_node_labels_to_integers( | # gt = nx.convert_node_labels_to_integers( | ||||
# gt, first_label=0, label_attribute='label_orignal') | # gt, first_label=0, label_attribute='label_orignal') | ||||