Browse Source

1. add Sylvester Equation Methods for the generalized random walk kernel.

2. correct an error in the common walk kernel. DON NOT use the old one.
3. improve the method to construct fully-labeled direct product graphs, much faster for sparse graphs.
v0.1
jajupmochi 7 years ago
parent
commit
6aac5b2981
8 changed files with 4220 additions and 342 deletions
  1. +2174
    -44
      notebooks/run_commonwalkkernel.ipynb
  2. +1520
    -19
      notebooks/run_randomwalkkernel.ipynb
  3. +133
    -23
      notebooks/run_spkernel.ipynb
  4. +1
    -0
      pygraph/kernels/.#commonWalkKernel.py
  5. +21
    -50
      pygraph/kernels/commonWalkKernel.py
  6. +208
    -38
      pygraph/kernels/randomWalkKernel.py
  7. +138
    -155
      pygraph/kernels/spKernel.py
  8. +25
    -13
      pygraph/utils/utils.py

+ 2174
- 44
notebooks/run_commonwalkkernel.ipynb
File diff suppressed because it is too large
View File


+ 1520
- 19
notebooks/run_randomwalkkernel.ipynb
File diff suppressed because it is too large
View File


+ 133
- 23
notebooks/run_spkernel.ipynb
File diff suppressed because it is too large
View File


+ 1
- 0
pygraph/kernels/.#commonWalkKernel.py View File

@@ -0,0 +1 @@
ljia@ljia-Precision-7520.4716:1530265749

+ 21
- 50
pygraph/kernels/commonWalkKernel.py View File

@@ -24,7 +24,7 @@ def commonwalkkernel(*args,
edge_label='bond_type',
n=None,
weight=1,
compute_method='exp'):
compute_method=None):
"""Calculate common walk graph kernels up to depth d between graphs.
Parameters
----------
@@ -40,10 +40,11 @@ def commonwalkkernel(*args,
n : integer
Longest length of walks.
weight: integer
Weight coefficient of different lengths of walks.
Weight coefficient of different lengths of walks, which represents beta in 'exp' method and gamma in 'geo'.
compute_method : string
Method used to compute walk kernel. The Following choices are available:
'direct' : direct product graph method, as shown in reference [1]. The time complexity is O(n^6) for unlabeled graphs with n vertices.
'exp' : exponential serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices.
'geo' : geometric serial method applied on the direct product graph, as shown in reference [1]. The time complexity is O(n^6) for graphs with n vertices.
'brute' : brute force, simply search for all walks and compare them.

Return
@@ -66,6 +67,8 @@ def commonwalkkernel(*args,
if not ds_attrs['edge_labeled']:
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
if not ds_attrs['is_directed']:
Gn = [G.to_directed() for G in Gn]

start_time = time.time()

@@ -77,7 +80,7 @@ def commonwalkkernel(*args,
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _untilnwalkkernel_exp(Gn[i], Gn[j], node_label,
Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
edge_label, weight)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
@@ -90,7 +93,7 @@ def commonwalkkernel(*args,
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _untilnwalkkernel_geo(Gn[i], Gn[j], node_label,
Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
edge_label, weight)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
@@ -106,7 +109,7 @@ def commonwalkkernel(*args,

for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _untilnwalkkernel_brute(
Kmatrix[i][j] = _commonwalkkernel_brute(
all_walks[i],
all_walks[j],
node_label=node_label,
@@ -122,7 +125,7 @@ def commonwalkkernel(*args,
return Kmatrix, run_time


def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
def _commonwalkkernel_exp(G1, G2, node_label, edge_label, beta):
"""Calculate walk graph kernels up to n between 2 graphs using exponential series.

Parameters
@@ -168,7 +171,7 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
D = np.zeros((len(ew), len(ew)))
for i in range(len(ew)):
D[i][i] = np.exp(beta * ew[i])
# print('D: ', D)
# print('D: ', D)
# print('hshs: ', T.I * D * T)

# print(np.exp(-2))
@@ -176,16 +179,16 @@ def _untilnwalkkernel_exp(G1, G2, node_label, edge_label, beta):
# print(np.exp(weight * D))
# print(ev)
# print(np.linalg.inv(ev))
exp_D = ev * D * ev.I
exp_D = ev * D * ev.T
# print(exp_D)
# print(np.exp(weight * A))
# print('-------')

return np.sum(exp_D.diagonal())
return exp_D.sum()


def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
"""Calculate walk graph kernels up to n between 2 graphs using geometric series.
def _commonwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
"""Calculate common walk graph kernels up to n between 2 graphs using geometric series.

Parameters
----------
@@ -207,46 +210,14 @@ def _untilnwalkkernel_geo(G1, G2, node_label, edge_label, gamma):
# get tensor product / direct product
gp = direct_product(G1, G2, node_label, edge_label)
A = nx.adjacency_matrix(gp).todense()
# print(A)

# from matplotlib import pyplot as plt
# nx.draw_networkx(G1)
# plt.show()
# nx.draw_networkx(G2)
# plt.show()
# nx.draw_networkx(gp)
# plt.show()
# print(G1.nodes(data=True))
# print(G2.nodes(data=True))
# print(gp.nodes(data=True))
# print(gp.edges(data=True))

ew, ev = np.linalg.eig(A)
# print('ew: ', ew)
# print(ev)
# T = np.matrix(ev)
# print('T: ', T)
# T = ev.I
D = np.zeros((len(ew), len(ew)))
for i in range(len(ew)):
D[i][i] = np.exp(beta * ew[i])
# print('D: ', D)
# print('hshs: ', T.I * D * T)

# print(np.exp(-2))
# print(D)
# print(np.exp(weight * D))
# print(ev)
# print(np.linalg.inv(ev))
exp_D = ev * D * ev.I
# print(exp_D)
# print(np.exp(weight * A))
# print('-------')

return np.sum(exp_D.diagonal())
mat = np.identity(len(A)) - gamma * A
try:
return mat.I.sum()
except np.linalg.LinAlgError:
return np.nan


def _untilnwalkkernel_brute(walks1,
def _commonwalkkernel_brute(walks1,
walks2,
node_label='atom',
edge_label='bond_type',


+ 208
- 38
pygraph/kernels/randomWalkKernel.py View File

@@ -19,7 +19,11 @@ from pygraph.utils.graphdataset import get_dataset_attributes
def randomwalkkernel(*args,
node_label='atom',
edge_label='bond_type',
edge_weight=None,
h=10,
p=None,
q=None,
weight=None,
compute_method=''):
"""Calculate random walk graph kernels.
Parameters
@@ -33,7 +37,7 @@ def randomwalkkernel(*args,
node attribute used as label. The default node label is atom.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
n : integer
h : integer
Longest length of walks.
method : string
Method used to compute the random walk kernel. Available methods are 'sylvester', 'conjugate', 'fp', 'spectral' and 'kron'.
@@ -46,7 +50,25 @@ def randomwalkkernel(*args,
compute_method = compute_method.lower()
h = int(h)
Gn = args[0] if len(args) == 1 else [args[0], args[1]]
Kmatrix = np.zeros((len(Gn), len(Gn)))

eweight = None
if edge_weight == None:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
try:
some_weight = list(
nx.get_edge_attributes(Gn[0], edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = edge_weight
else:
print(
'\n Edge weight with name %s is not float or integer. Set all weight to 1.\n'
% edge_weight)
except:
print(
'\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n'
% edge_weight)

ds_attrs = get_dataset_attributes(
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
@@ -71,76 +93,224 @@ def randomwalkkernel(*args,
# labeled=labeled) for i in range(0, len(Gn))
# ]

pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculating kernels',
file=sys.stdout)
if compute_method == 'sylvester':
import warnings
warnings.warn(
'The Sylvester equation (rather than generalized Sylvester equation) is used; only walks of length 1 is considered.'
'The Sylvester equation (rather than generalized Sylvester equation) is used; edge label number has to smaller than 3.'
)
from control import dlyap
Kmatrix = _randomwalkkernel_sylvester(Gn, weight, p, q, node_label,
edge_label, eweight)

elif compute_method == 'conjugate':
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_sylvester(
all_walks[i],
all_walks[j],
node_label=node_label,
edge_label=edge_label)
Kmatrix[i][j] = _randomwalkkernel_conjugate(
Gn[i], Gn[j], node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

elif compute_method == 'conjugate':
pass
elif compute_method == 'fp':
pass
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_fp(Gn[i], Gn[j], node_label,
edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

elif compute_method == 'spectral':
pass
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_spectral(
Gn[i], Gn[j], node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
elif compute_method == 'kron':
pass
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_kron(Gn[i], Gn[j],
node_label, edge_label)
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
raise Exception(
'compute method name incorrect. Available methods: "sylvester", "conjugate", "fp", "spectral" and "kron".'
)

for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
Kmatrix[i][j] = _randomwalkkernel_do(
all_walks[i],
all_walks[j],
node_label=node_label,
edge_label=edge_label,
labeled=labeled)
Kmatrix[j][i] = Kmatrix[i][j]
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
# Kmatrix[i][j] = _randomwalkkernel_do(
# all_walks[i],
# all_walks[j],
# node_label=node_label,
# edge_label=edge_label,
# labeled=labeled)
# Kmatrix[j][i] = Kmatrix[i][j]

run_time = time.time() - start_time
print(
"\n --- kernel matrix of walk kernel up to %d of size %d built in %s seconds ---"
% (n, len(Gn), run_time))
"\n --- kernel matrix of random walk kernel of size %d built in %s seconds ---"
% (len(Gn), run_time))

return Kmatrix, run_time


def _randomwalkkernel_sylvester(walks1,
walks2,
node_label='atom',
edge_label='bond_type'):
def _randomwalkkernel_sylvester(Gn, lmda, p, q, node_label, edge_label,
eweight):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.

Parameters
----------
walks1, walks2 : list
List of walks in 2 graphs, where for unlabeled graphs, each walk is represented by a list of nodes; while for labeled graphs, each walk is represented by a string consists of labels of nodes and edges on that walk.
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label. The default node label is atom.
node attribute used as label.
edge_label : string
edge attribute used as label. The default edge label is bond_type.
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""
from control import dlyap
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
# don't normalize adjacency matrices if q is a uniform vector.
A_list = [
nx.adjacency_matrix(G, eweight).todense() for G in tqdm(
Gn, desc='compute adjacency matrices', file=sys.stdout)
]
if p == None:
pbar = tqdm(
total=(1 + len(Gn)) * len(Gn) / 2,
desc='calculating kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
A = lmda * A_list[j]
Q = A_list[i]
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_list[i]) * len(A_list[j])
pd_uni = 1 / nb_pd
C = np.full((len(A_list[j]), len(A_list[i])), pd_uni)
try:
X = dlyap(A, Q, C)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_direct = np.full((1, nb_pd), pd_uni)
Kmatrix[i][j] = np.dot(q_direct, X)
except TypeError:
# print('sth wrong.')
Kmatrix[i][j] = np.nan

Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
# A_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, weight=None).todense()
# # normalized adjacency matrices
# # A_list.append(A_tilde / A_tilde.sum(axis=0))
# A_list.append(A_tilde)

return Kmatrix


def _randomwalkkernel_conjugate(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph
import matplotlib.pyplot as plt
nx.draw_networkx(G1)
plt.show()
nx.draw_networkx(G2)
plt.show()
nx.draw_networkx(dpg)
plt.show()
X = dlyap(A, Q, C)

return kernel


def _randomwalkkernel_fp(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)

return kernel


def _randomwalkkernel_spectral(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using spectral decomposition method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph
X = dlyap(A, Q, C)

return kernel


def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
node_label : string
node attribute used as label.
edge_label : string
edge attribute used as label.

Return
------
kernel : float
Treelet Kernel between 2 graphs.
Kernel between 2 graphs.
"""

dpg = nx.tensor_product(G1, G2) # direct product graph


+ 138
- 155
pygraph/kernels/spKernel.py View File

@@ -8,6 +8,7 @@ import pathlib
sys.path.insert(0, "../")
from tqdm import tqdm
import time
from itertools import combinations_with_replacement, product

import networkx as nx
import numpy as np
@@ -39,8 +40,6 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None):
# pre-process
Gn = args[0] if len(args) == 1 else [args[0], args[1]]

Gn = [nx.to_directed(G) for G in Gn]

weight = None
if edge_weight == None:
print('\n None edge weight specified. Set all weight to 1.\n')
@@ -89,174 +88,158 @@ def spkernel(*args, node_label='atom', edge_weight=None, node_kernels=None):
# node symb and non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
if ds_attrs['is_directed']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['mix']
try:
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn(n11[node_label], n21[
node_label], [n11['attributes']],
[n21['attributes']]) * kn(
n12[node_label],
n22[node_label],
[n12['attributes']],
[n22['attributes']])
Kmatrix[i][j] += kn1
except KeyError: # missing labels or attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['mix']
try:
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn(n11[node_label], n21[node_label], [
n11['attributes']
], [n21['attributes']]) * kn(
n12[node_label], n22[node_label],
[n12['attributes']], [n22['attributes']])
Kmatrix[i][j] += kn1
except KeyError: # missing labels or attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['mix']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn(n11[node_label], n21[
node_label], [n11['attributes']],
[n21['attributes']]) * kn(
n12[node_label],
n22[node_label],
[n12['attributes']],
[n22['attributes']])
kn2 = kn(n11[node_label], n22[
node_label], [n11['attributes']],
[n22['attributes']]) * kn(
n12[node_label],
n21[node_label],
[n12['attributes']],
[n21['attributes']])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing labels or attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['mix']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn(n11[node_label], n21[node_label], [
n11['attributes']
], [n21['attributes']]) * kn(
n12[node_label], n22[node_label],
[n12['attributes']], [n22['attributes']])
kn2 = kn(n11[node_label], n22[node_label], [
n11['attributes']
], [n22['attributes']]) * kn(
n12[node_label], n21[node_label],
[n12['attributes']], [n21['attributes']])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing labels or attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
# node symb labeled
else:
if ds_attrs['is_directed']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['symb']
try:
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn(n11[node_label],
n21[node_label]) * kn(
n12[node_label],
n22[node_label])
Kmatrix[i][j] += kn1
except KeyError: # missing labels
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['symb']
try:
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn(n11[node_label],
n21[node_label]) * kn(
n12[node_label], n22[node_label])
Kmatrix[i][j] += kn1
except KeyError: # missing labels
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['symb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn(n11[node_label],
n21[node_label]) * kn(
n12[node_label],
n22[node_label])
kn2 = kn(n11[node_label],
n22[node_label]) * kn(
n12[node_label],
n21[node_label])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing labels
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['symb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn(n11[node_label],
n21[node_label]) * kn(
n12[node_label], n22[node_label])
kn2 = kn(n11[node_label],
n22[node_label]) * kn(
n12[node_label], n21[node_label])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing labels
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
# node non-synb labeled
if ds_attrs['node_attr_dim'] > 0:
if ds_attrs['is_directed']:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['nsymb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn([n11['attributes']],
[n21['attributes']]) * kn(
[n12['attributes']],
[n22['attributes']])
Kmatrix[i][j] += kn1
except KeyError: # missing attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['nsymb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn([n11['attributes']],
[n21['attributes']]) * kn(
[n12['attributes']],
[n22['attributes']])
Kmatrix[i][j] += kn1
except KeyError: # missing attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['nsymb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[
0]], Gn[i].nodes[e1[1]], Gn[
j].nodes[e2[0]], Gn[j].nodes[
e2[1]]
kn1 = kn([n11['attributes']],
[n21['attributes']]) * kn(
[n12['attributes']],
[n22['attributes']])
kn2 = kn([n11['attributes']],
[n22['attributes']]) * kn(
[n12['attributes']],
[n21['attributes']])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(
range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
kn = node_kernels['nsymb']
try:
# each edge walk is counted twice, starting from both its extreme nodes.
n11, n12, n21, n22 = Gn[i].nodes[e1[0]], Gn[
i].nodes[e1[1]], Gn[j].nodes[e2[0]], Gn[
j].nodes[e2[1]]
kn1 = kn([n11['attributes']],
[n21['attributes']]) * kn(
[n12['attributes']],
[n22['attributes']])
kn2 = kn([n11['attributes']],
[n22['attributes']]) * kn(
[n12['attributes']],
[n21['attributes']])
Kmatrix[i][j] += kn1 + kn2
except KeyError: # missing attributes
pass
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

# node unlabeled
else:
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data=True):
for e2 in Gn[j].edges(data=True):
if e1[2]['cost'] == e2[2]['cost']:
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)
for i, j in combinations_with_replacement(range(0, len(Gn)), 2):
for e1, e2 in product(
Gn[i].edges(data=True), Gn[j].edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
Kmatrix[i][j] += 1
Kmatrix[j][i] = Kmatrix[i][j]
pbar.update(1)

run_time = time.time() - start_time
print(


+ 25
- 13
pygraph/utils/utils.py View File

@@ -119,7 +119,7 @@ def untotterTransformation(G, node_label, edge_label):


def direct_product(G1, G2, node_label, edge_label):
"""Return the direct/tensor product of G1 and G2.
"""Return the direct/tensor product of directed graphs G1 and G2.

Parameters
----------
@@ -137,7 +137,7 @@ def direct_product(G1, G2, node_label, edge_label):

Notes
-----
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to direct product graph.
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.

References
----------
@@ -147,25 +147,37 @@ def direct_product(G1, G2, node_label, edge_label):
from itertools import product

# G = G.to_directed()
gt = nx.Graph()
gt = nx.DiGraph()
# add nodes
for u, v in product(G1, G2):
if G1.nodes[u][node_label] == G2.nodes[v][node_label]:
gt.add_node((u, v))
gt.nodes[(u, v)].update({node_label: G1.nodes[u][node_label]})
# add edges
for u, v in product(gt, gt):
if (u[0], v[0]) in G1.edges and (
u[1], v[1]
) in G2.edges and G1.edges[u[0],
v[0]][edge_label] == G2.edges[u[1],
v[1]][edge_label]:
gt.add_edge((u[0], u[1]), (v[0], v[1]))
gt.edges[(u[0], u[1]), (v[0], v[1])].update({
# add edges, faster for sparse graphs (no so many edges), which is the most case for now.
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
if (u1, u2) in gt and (
v1, v2
) in gt and G1.edges[u1, v1][edge_label] == G2.edges[u2,
v2][edge_label]:
gt.add_edge((u1, u2), (v1, v2))
gt.edges[(u1, u2), (v1, v2)].update({
edge_label:
G1.edges[u[0], v[0]][edge_label]
G1.edges[u1, v1][edge_label]
})

# # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
# for u, v in product(gt, gt):
# if (u[0], v[0]) in G1.edges and (
# u[1], v[1]
# ) in G2.edges and G1.edges[u[0],
# v[0]][edge_label] == G2.edges[u[1],
# v[1]][edge_label]:
# gt.add_edge((u[0], u[1]), (v[0], v[1]))
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({
# edge_label:
# G1.edges[u[0], v[0]][edge_label]
# })

# relabel nodes using consecutive integers for convenience of kernel calculation.
# gt = nx.convert_node_labels_to_integers(
# gt, first_label=0, label_attribute='label_orignal')


Loading…
Cancel
Save