Browse Source

Merge pull request #25 from jajupmochi/v0.2

V0.2
tags/v0.2.0
linlin GitHub 4 years ago
parent
commit
0c01cb3e94
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 3598 additions and 706 deletions
  1. +21
    -0
      Problems.md
  2. +2
    -2
      README.md
  3. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py
  4. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_degrees.py
  5. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_el.py
  6. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nl.py
  7. +54
    -0
      gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py
  8. +106
    -0
      gklearn/experiments/papers/PRL_2020/utils.py
  9. +9
    -1
      gklearn/ged/env/ged_env.py
  10. +9
    -0
      gklearn/ged/learning/__init__.py
  11. +148
    -0
      gklearn/ged/learning/cost_matrices_learner.py
  12. +175
    -0
      gklearn/ged/learning/costs_learner.py
  13. +1
    -0
      gklearn/ged/median/__init__.py
  14. +1676
    -0
      gklearn/ged/median/median_graph_estimator_cml.py
  15. +1
    -1
      gklearn/ged/util/__init__.py
  16. +4
    -0
      gklearn/kernels/__init__.py
  17. +282
    -0
      gklearn/kernels/common_walk.py
  18. +1
    -0
      gklearn/kernels/graph_kernel.py
  19. +7
    -7
      gklearn/kernels/marginalized.py
  20. +94
    -0
      gklearn/kernels/random_walk.py
  21. +283
    -0
      gklearn/kernels/spectral_decomposition.py
  22. +245
    -0
      gklearn/kernels/sylvester_equation.py
  23. +1
    -1
      gklearn/kernels/untilHPathKernel.py
  24. +43
    -601
      gklearn/preimage/median_preimage_generator_cml.py
  25. +97
    -87
      gklearn/tests/test_graph_kernels.py
  26. +53
    -0
      gklearn/utils/graph_synthesizer.py
  27. +4
    -4
      gklearn/utils/parallel.py
  28. +64
    -0
      gklearn/utils/utils.py
  29. +1
    -1
      requirements.txt
  30. +1
    -1
      requirements_pypi.txt

+ 21
- 0
Problems.md View File

@@ -0,0 +1,21 @@
# About graph kenrels.

## (Random walk) Sylvester equation kernel.

### ImportError: cannot import name 'frange' from 'matplotlib.mlab'

You are using an outdated `control` with a recent `matplotlib`. `mlab.frange` was removed in `matplotlib-3.1.0`, and `control` removed the call in `control-0.8.2`.

Update your `control` package.

### Intel MKL FATAL ERROR: Cannot load libmkl_avx2.so or libmkl_def.so.

The Intel Math Kernel Library (MKL) is missing or not properly set. I assume the MKL is required by `control` module.

Install MKL. Then add the following to your path:

```
export PATH=/opt/intel/bin:$PATH

export LD_LIBRARY_PATH=/opt/intel/lib/intel64:/opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH
```

+ 2
- 2
README.md View File

@@ -12,12 +12,12 @@ A Python package for graph kernels, graph edit distances and graph pre-image pro
* python>=3.5
* numpy>=1.16.2
* scipy>=1.1.0
* matplotlib>=3.0.0
* matplotlib>=3.1.0
* networkx>=2.2
* scikit-learn>=0.20.0
* tabulate>=0.8.2
* tqdm>=4.26.0
* control==0.8.0 (for generalized random walk kernels only)
* control>=0.8.2 (for generalized random walk kernels only)
* slycot==0.3.3 (for generalized random walk kernels only, which requires a fortran compiler, gfortran for example)

## How to use?


+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_N.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List, compute_graph_kernel


def generate_graphs():
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_dataset_size():
# Generate graphs.
graphs = generate_graphs()
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_N/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_graphs in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]:
print()
print('Number of graphs:', num_graphs)
sub_graphs = [g.copy() for g in graphs[0:num_graphs]]
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_graphs) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_dataset_size()

+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_degrees.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List, compute_graph_kernel


def generate_graphs(degree):
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=int(10*degree), num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_degrees():
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_degrees/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for degree in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
print()
print('Degree:', degree)
# Generate graphs.
graphs = generate_graphs(degree)

# Compute Gram matrix.
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(degree) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_degrees()

+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_el.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List_ESym, compute_graph_kernel


def generate_graphs(num_el_alp):
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=num_el_alp, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_num_edge_label_alphabet():
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_num_edge_label_alphabet/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List_ESym:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_el_alp in [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40]:
print()
print('Number of edge label alphabet:', num_el_alp)
# Generate graphs.
graphs = generate_graphs(num_el_alp)

# Compute Gram matrix.
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_el_alp) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_num_edge_label_alphabet()

+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nl.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List_VSym, compute_graph_kernel


def generate_graphs(num_nl_alp):
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=40, num_node_labels=num_nl_alp, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_num_node_label_alphabet():
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_num_node_label_alphabet/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List_VSym:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_nl_alp in [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]:
print()
print('Number of node label alphabet:', num_nl_alp)
# Generate graphs.
graphs = generate_graphs(num_nl_alp)

# Compute Gram matrix.
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nl_alp) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_num_node_label_alphabet()

+ 54
- 0
gklearn/experiments/papers/PRL_2020/synthesized_graphs_num_nodes.py View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 21 10:34:26 2020

@author: ljia
"""
from utils import Graph_Kernel_List, compute_graph_kernel


def generate_graphs(num_nodes):
from gklearn.utils.graph_synthesizer import GraphSynthesizer
gsyzer = GraphSynthesizer()
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=num_nodes, num_edges=int(num_nodes*2), num_node_labels=0, num_edge_labels=0, seed=None, directed=False)
return graphs


def xp_synthesied_graphs_num_nodes():
# Run and save.
import pickle
import os
save_dir = 'outputs/synthesized_graphs_num_nodes/'
if not os.path.exists(save_dir):
os.makedirs(save_dir)

run_times = {}
for kernel_name in Graph_Kernel_List:
print()
print('Kernel:', kernel_name)
run_times[kernel_name] = []
for num_nodes in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]:
print()
print('Number of nodes:', num_nodes)
# Generate graphs.
graphs = generate_graphs(num_nodes)

# Compute Gram matrix.
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name)
run_times[kernel_name].append(run_time)
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nodes) + '.pkl', 'wb'))
# Save all.
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb'))
return


if __name__ == '__main__':
xp_synthesied_graphs_num_nodes()

+ 106
- 0
gklearn/experiments/papers/PRL_2020/utils.py View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 22 11:33:28 2020

@author: ljia
"""
Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk']
# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree']


Graph_Kernel_List_VSym = ['PathUpToH', 'WLSubtree', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk']


Graph_Kernel_List_ESym = ['PathUpToH', 'Marginalized', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk']


Graph_Kernel_List_VCon = ['ShortestPath', 'ConjugateGradient', 'FixedPoint', 'StructuralSP']


Graph_Kernel_List_ECon = ['ConjugateGradient', 'FixedPoint', 'StructuralSP']


def compute_graph_kernel(graphs, kernel_name):
import multiprocessing
if kernel_name == 'CommonWalk':
from gklearn.kernels.commonWalkKernel import commonwalkkernel
estimator = commonwalkkernel
params = {'compute_method': 'geo', 'weight': 0.1}
elif kernel_name == 'Marginalized':
from gklearn.kernels.marginalizedKernel import marginalizedkernel
estimator = marginalizedkernel
params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False}
elif kernel_name == 'SylvesterEquation':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'sylvester', 'weight': 0.1}
elif kernel_name == 'ConjugateGradient':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'FixedPoint':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel
estimator = randomwalkkernel
params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1}
elif kernel_name == 'ShortestPath':
from gklearn.kernels.spKernel import spkernel
estimator = spkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel}
elif kernel_name == 'StructuralSP':
from gklearn.kernels.structuralspKernel import structuralspkernel
estimator = structuralspkernel
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel}
elif kernel_name == 'PathUpToH':
from gklearn.kernels.untilHPathKernel import untilhpathkernel
estimator = untilhpathkernel
params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'}
elif kernel_name == 'Treelet':
from gklearn.kernels.treeletKernel import treeletkernel
estimator = treeletkernel
from gklearn.utils.kernels import polynomialkernel
import functools
sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8)
params = {'sub_kernel': sub_kernel}
elif kernel_name == 'WLSubtree':
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
estimator = weisfeilerlehmankernel
params = {'base_kernel': 'subtree', 'height': 5}
# params['parallel'] = None
params['n_jobs'] = multiprocessing.cpu_count()
params['verbose'] = True
results = estimator(graphs, **params)
return results[0], results[1]

+ 9
- 1
gklearn/ged/env/ged_env.py View File

@@ -637,6 +637,10 @@ class GEDEnv(object):
return [i for i in self.__internal_to_original_node_ids[graph_id].values()]
def get_node_cost(self, node_label_1, node_label_2):
return self.__ged_data.node_cost(node_label_1, node_label_2)
def get_node_rel_cost(self, node_label_1, node_label_2):
"""
/*!
@@ -650,7 +654,7 @@ class GEDEnv(object):
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0]))
if isinstance(node_label_2, dict):
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2)
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.)
def get_node_del_cost(self, node_label):
@@ -677,6 +681,10 @@ class GEDEnv(object):
if isinstance(node_label, dict):
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label)
def get_edge_cost(self, edge_label_1, edge_label_2):
return self.__ged_data.edge_cost(edge_label_1, edge_label_2)
def get_edge_rel_cost(self, edge_label_1, edge_label_2):


+ 9
- 0
gklearn/ged/learning/__init__.py View File

@@ -0,0 +1,9 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 16:07:25 2020

@author: ljia
"""

from gklearn.ged.learning.cost_matrices_learner import CostMatricesLearner

+ 148
- 0
gklearn/ged/learning/cost_matrices_learner.py View File

@@ -0,0 +1,148 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 11:42:48 2020

@author: ljia
"""
import numpy as np
import cvxpy as cp
import time
from gklearn.ged.learning.costs_learner import CostsLearner
from gklearn.ged.util import compute_geds_cml


class CostMatricesLearner(CostsLearner):
def __init__(self, edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=False, verbose=2):
super().__init__(parallel, verbose)
self._edit_cost = edit_cost
self._triangle_rule = triangle_rule
self._allow_zeros = allow_zeros
def fit(self, X, y):
if self._edit_cost == 'LETTER':
raise Exception('Cannot compute for cost "LETTER".')
elif self._edit_cost == 'LETTER2':
raise Exception('Cannot compute for cost "LETTER2".')
elif self._edit_cost == 'NON_SYMBOLIC':
raise Exception('Cannot compute for cost "NON_SYMBOLIC".')
elif self._edit_cost == 'CONSTANT': # @todo: node/edge may not labeled.
if not self._triangle_rule and self._allow_zeros:
w = cp.Variable(X.shape[1])
cost_fun = cp.sum_squares(X @ w - y)
constraints = [w >= [0.0 for i in range(X.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.execute_cvx(prob)
edit_costs_new = w.value
residual = np.sqrt(prob.value)
elif self._triangle_rule and self._allow_zeros: # @todo
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif not self._triangle_rule and not self._allow_zeros: # @todo
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif self._triangle_rule and not self._allow_zeros: # @todo
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
else:
raise Exception('The edit cost "', self._ged_options['edit_cost'], '" is not supported for update progress.')
self._cost_list.append(edit_costs_new)
def init_geds_and_nb_eo(self, y, graphs):
time0 = time.time()
self._cost_list.append(np.concatenate((self._ged_options['node_label_costs'],
self._ged_options['edge_label_costs'])))
ged_vec, self._nb_eo = self.compute_geds_and_nb_eo(graphs)
self._residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - y))))
self._runtime_list.append(time.time() - time0)

if self._verbose >= 2:
print('Current node label costs:', self._cost_list[-1][0:len(self._ged_options['node_label_costs'])])
print('Current edge label costs:', self._cost_list[-1][len(self._ged_options['node_label_costs']):])
print('Residual list:', self._residual_list)
def update_geds_and_nb_eo(self, y, graphs, time0):
self._ged_options['node_label_costs'] = self._cost_list[-1][0:len(self._ged_options['node_label_costs'])]
self._ged_options['edge_label_costs'] = self._cost_list[-1][len(self._ged_options['node_label_costs']):]
ged_vec, self._nb_eo = self.compute_geds_and_nb_eo(graphs)
self._residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - y))))
self._runtime_list.append(time.time() - time0)
def compute_geds_and_nb_eo(self, graphs):
ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=self._ged_options, parallel=self._parallel, verbose=(self._verbose > 1))
return ged_vec, np.array(n_edit_operations)
def check_convergency(self):
self._ec_changed = False
for i, cost in enumerate(self._cost_list[-1]):
if cost == 0:
if self._cost_list[-2][i] > self._epsilon_ec:
self._ec_changed = True
break
elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec:
self._ec_changed = True
break
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
# ec_changed = True
# break
self._residual_changed = False
if self._residual_list[-1] == 0:
if self._residual_list[-2] > self._epsilon_residual:
self._residual_changed = True
elif abs(self._residual_list[-1] - self._residual_list[-2]) / self._residual_list[-1] > self._epsilon_residual:
self._residual_changed = True
self._converged = not (self._ec_changed or self._residual_changed)
if self._converged:
self._itrs_without_update += 1
else:
self._itrs_without_update = 0
self._num_updates_ecs += 1
def print_current_states(self):
print()
print('-------------------------------------------------------------------------')
print('States of iteration', self._itrs + 1)
print('-------------------------------------------------------------------------')
# print('Time spend:', self.__runtime_optimize_ec)
print('Total number of iterations for optimizing:', self._itrs + 1)
print('Total number of updating edit costs:', self._num_updates_ecs)
print('Was optimization of edit costs converged:', self._converged)
print('Did edit costs change:', self._ec_changed)
print('Did residual change:', self._residual_changed)
print('Iterations without update:', self._itrs_without_update)
print('Current node label costs:', self._cost_list[-1][0:len(self._ged_options['node_label_costs'])])
print('Current edge label costs:', self._cost_list[-1][len(self._ged_options['node_label_costs']):])
print('Residual list:', self._residual_list)
print('-------------------------------------------------------------------------')

+ 175
- 0
gklearn/ged/learning/costs_learner.py View File

@@ -0,0 +1,175 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 7 11:30:31 2020

@author: ljia
"""
import numpy as np
import cvxpy as cp
import time
from gklearn.utils import Timer


class CostsLearner(object):
def __init__(self, parallel, verbose):
### To set.
self._parallel = parallel
self._verbose = verbose
# For update().
self._time_limit_in_sec = 0
self._max_itrs = 100
self._max_itrs_without_update = 3
self._epsilon_residual = 0.01
self._epsilon_ec = 0.1
### To compute.
self._residual_list = []
self._runtime_list = []
self._cost_list = []
self._nb_eo = None
# For update().
self._itrs = 0
self._converged = False
self._num_updates_ecs = 0
self._ec_changed = None
self._residual_changed = None
self._itrs_without_update = 0
### Both set and get.
self._ged_options = None


def fit(self, X, y):
pass
def preprocess(self):
pass # @todo: remove the zero numbers of edit costs.
def postprocess(self):
for i in range(len(self._cost_list[-1])):
if -1e-9 <= self._cost_list[-1][i] <= 1e-9:
self._cost_list[-1][i] = 0
if self._cost_list[-1][i] < 0:
raise ValueError('The edit cost is negative.')
def set_update_params(self, **kwargs):
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', self._time_limit_in_sec)
self._max_itrs = kwargs.get('max_itrs', self._max_itrs)
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', self._max_itrs_without_update)
self._epsilon_residual = kwargs.get('epsilon_residual', self._epsilon_residual)
self._epsilon_ec = kwargs.get('epsilon_ec', self._epsilon_ec)

def update(self, y, graphs, ged_options, **kwargs):
# Set parameters.
self._ged_options = ged_options
if kwargs != {}:
self.set_update_params(**kwargs)
# The initial iteration.
if self._verbose >= 2:
print('\ninitial:')
self.init_geds_and_nb_eo(y, graphs)

self._converged = False
self._itrs_without_update = 0
self._itrs = 0
self._num_updates_ecs = 0
timer = Timer(self._time_limit_in_sec)
# Run iterations from initial edit costs.
while not self.termination_criterion_met(self._converged, timer, self._itrs, self._itrs_without_update):
if self._verbose >= 2:
print('\niteration', self._itrs + 1)
time0 = time.time()
# Fit GED space to the target space.
self.preprocess()
self.fit(self._nb_eo, y)
self.postprocess()
# Compute new GEDs and numbers of edit operations.
self.update_geds_and_nb_eo(y, graphs, time0)
# Check convergency.
self.check_convergency()
# Print current states.
if self._verbose >= 2:
self.print_current_states()
self._itrs += 1
def init_geds_and_nb_eo(self, y, graphs):
pass
def update_geds_and_nb_eo(self, y, graphs, time0):
pass
def compute_geds_and_nb_eo(self, graphs):
pass
def check_convergency(self):
pass
def print_current_states(self):
pass


def termination_criterion_met(self, converged, timer, itr, itrs_without_update):
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
return True
return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)
def execute_cvx(self, prob):
try:
prob.solve(verbose=(self._verbose>=2))
except MemoryError as error0:
if self._verbose >= 2:
print('\nUsing solver "OSQP" caused a memory error.')
print('the original error message is\n', error0)
print('solver status: ', prob.status)
print('trying solver "CVXOPT" instead...\n')
try:
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
except Exception as error1:
if self._verbose >= 2:
print('\nAn error occured when using solver "CVXOPT".')
print('the original error message is\n', error1)
print('solver status: ', prob.status)
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
if self._verbose >= 2:
print()
def get_results(self):
results = {}
results['residual_list'] = self._residual_list
results['runtime_list'] = self._runtime_list
results['cost_list'] = self._cost_list
results['nb_eo'] = self._nb_eo
results['itrs'] = self._itrs
results['converged'] = self._converged
results['num_updates_ecs'] = self._num_updates_ecs
results['ec_changed'] = self._ec_changed
results['residual_changed'] = self._residual_changed
results['itrs_without_update'] = self._itrs_without_update
return results

+ 1
- 0
gklearn/ged/median/__init__.py View File

@@ -1,3 +1,4 @@
from gklearn.ged.median.median_graph_estimator import MedianGraphEstimator
from gklearn.ged.median.median_graph_estimator_py import MedianGraphEstimatorPy
from gklearn.ged.median.median_graph_estimator_cml import MedianGraphEstimatorCML
from gklearn.ged.median.utils import constant_node_costs, mge_options_to_string

+ 1676
- 0
gklearn/ged/median/median_graph_estimator_cml.py
File diff suppressed because it is too large
View File


+ 1
- 1
gklearn/ged/util/__init__.py View File

@@ -1,3 +1,3 @@
from gklearn.ged.util.lsape_solver import LSAPESolver
from gklearn.ged.util.util import compute_geds, ged_options_to_string
from gklearn.ged.util.util import compute_geds_cml
from gklearn.ged.util.util import compute_geds_cml, label_costs_to_matrix

+ 4
- 0
gklearn/kernels/__init__.py View File

@@ -8,7 +8,11 @@ __author__ = "Linlin Jia"
__date__ = "November 2018"

from gklearn.kernels.graph_kernel import GraphKernel
from gklearn.kernels.common_walk import CommonWalk
from gklearn.kernels.marginalized import Marginalized
from gklearn.kernels.random_walk import RandomWalk
from gklearn.kernels.sylvester_equation import SylvesterEquation
from gklearn.kernels.spectral_decomposition import SpectralDecomposition
from gklearn.kernels.shortest_path import ShortestPath
from gklearn.kernels.structural_sp import StructuralSP
from gklearn.kernels.path_up_to_h import PathUpToH


+ 282
- 0
gklearn/kernels/common_walk.py View File

@@ -0,0 +1,282 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 18 11:21:31 2020

@author: ljia

@references:

[1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels:
Hardness results and efficient alternatives. Learning Theory and Kernel
Machines, pages 129–143, 2003.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import direct_product_graph
from gklearn.kernels import GraphKernel


class CommonWalk(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__weight = kwargs.get('weight', 1)
self.__compute_method = kwargs.get('compute_method', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__compute_method = self.__compute_method.lower()


def _compute_gm_series(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
# direct product graph method - exponential
if self.__compute_method == 'exp':
for i, j in iterator:
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
for i, j in iterator:
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
return gram_matrix
def _compute_gm_imap_unordered(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
do_fun = self._wrapper_kernel_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
do_fun = self._wrapper_kernel_do_geo
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
# direct product graph method - exponential
if self.__compute_method == 'exp':
for i in iterator:
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight)
kernel_list[i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
for i in iterator:
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight)
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
# compute kernel list.
kernel_list = [None] * len(g_list)

def init_worker(g1_toshare, g_list_toshare):
global G_g1, G_g_list
G_g1 = g1_toshare
G_g_list = g_list_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
do_fun = self._wrapper_kernel_list_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
do_fun = self._wrapper_kernel_list_do_geo
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
return kernel_list
def _wrapper_kernel_list_do_exp(self, itr):
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight)


def _wrapper_kernel_list_do_geo(self, itr):
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight)
def _compute_single_kernel_series(self, g1, g2):
self.__check_graphs([g1] + [g2])
self.__add_dummy_labels([g1] + [g2])
if not self.__ds_infos['directed']: # convert
g1 = g1.to_directed()
g2 = g2.to_directed()
# direct product graph method - exponential
if self.__compute_method == 'exp':
kernel = self.__kernel_do_exp(g1, g2, self.__weight)
# direct product graph method - geometric
elif self.__compute_method == 'geo':
kernel = self.__kernel_do_geo(g1, g2, self.__weight)

return kernel
def __kernel_do_exp(self, g1, g2, beta):
"""Calculate common walk graph kernel between 2 graphs using exponential
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
beta : integer
Weight.
Return
------
kernel : float
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
ew, ev = np.linalg.eig(A)
# # remove imaginary part if possible.
# # @todo: don't know if it is necessary.
# for i in range(len(ew)):
# if np.abs(ew[i].imag) < 1e-9:
# ew[i] = ew[i].real
# for i in range(ev.shape[0]):
# for j in range(ev.shape[1]):
# if np.abs(ev[i, j].imag) < 1e-9:
# ev[i, j] = ev[i, j].real

D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex?
for i in range(len(ew)):
D[i][i] = np.exp(beta * ew[i])

exp_D = ev * D * ev.T
kernel = exp_D.sum()
if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9:
kernel = kernel.real
return kernel
def _wrapper_kernel_do_exp(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight)
def __kernel_do_geo(self, g1, g2, gamma):
"""Calculate common walk graph kernel between 2 graphs using geometric
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
gamma : integer
Weight.
Return
------
kernel : float
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
A = nx.adjacency_matrix(gp).todense()
mat = np.identity(len(A)) - gamma * A
# try:
return mat.I.sum()
# except np.linalg.LinAlgError:
# return np.nan

def _wrapper_kernel_do_geo(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight)
def __check_graphs(self, Gn):
for g in Gn:
if nx.number_of_nodes(g) == 1:
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.')
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 1
- 0
gklearn/kernels/graph_kernel.py View File

@@ -10,6 +10,7 @@ import networkx as nx
import multiprocessing
import time


class GraphKernel(object):
def __init__(self):


+ 7
- 7
gklearn/kernels/marginalized.py View File

@@ -51,7 +51,7 @@ class Marginalized(GraphKernel):
else:
iterator = self._graphs
# @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -108,13 +108,13 @@ class Marginalized(GraphKernel):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else:
iterator = g_list
# @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator]
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -133,7 +133,7 @@ class Marginalized(GraphKernel):
self.__add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs)
itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs:
@@ -177,8 +177,8 @@ class Marginalized(GraphKernel):
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_label, self.__edge_label)
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels)
kernel = self.__kernel_do(g1, g2)
return kernel
@@ -324,7 +324,7 @@ class Marginalized(GraphKernel):

def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work.
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work.
def __add_dummy_labels(self, Gn):


+ 94
- 0
gklearn/kernels/random_walk.py View File

@@ -0,0 +1,94 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 16:55:17 2020

@author: ljia

@references:

[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import direct_product_graph
from gklearn.kernels import GraphKernel


class RandomWalk(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self._compute_method = kwargs.get('compute_method', None)
self._weight = kwargs.get('weight', 1)
self._p = kwargs.get('p', None)
self._q = kwargs.get('q', None)
self._edge_weight = kwargs.get('edge_weight', None)
self._ds_infos = kwargs.get('ds_infos', {})
self._compute_method = self.__compute_method.lower()
def _compute_gm_series(self):
pass


def _compute_gm_imap_unordered(self):
pass
def _compute_kernel_list_series(self, g1, g_list):
pass

def _compute_kernel_list_imap_unordered(self, g1, g_list):
pass
def _compute_single_kernel_series(self, g1, g2):
pass
def _check_graphs(self, Gn):
# remove graphs with no edges, as no walk can be found in their structures,
# so the weight matrix between such a graph and itself might be zero.
for g in Gn:
if nx.number_of_edges(g) == 0:
raise Exception('Graphs must contain edges to construct weight matrices.')
def _check_edge_weight(self, G0, verbose):
eweight = None
if self._edge_weight == None:
if verbose >= 2:
print('\n None edge weight is specified. Set all weight to 1.\n')
else:
try:
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0]
if isinstance(some_weight, float) or isinstance(some_weight, int):
eweight = self._edge_weight
else:
if verbose >= 2:
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight)
except:
if verbose >= 2:
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight)
self._edge_weight = eweight
def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]

+ 283
- 0
gklearn/kernels/spectral_decomposition.py View File

@@ -0,0 +1,283 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 20 16:12:45 2020

@author: ljia

@references:

[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from scipy.sparse import kron
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalk


class SpectralDecomposition(RandomWalk):
def __init__(self, **kwargs):
RandomWalk.__init__(self, **kwargs)
self._sub_kernel = kwargs.get('sub_kernel', None)

def _compute_gm_series(self):
self._check_edge_weight(self._graphs)
self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout)
else:
iterator = self._graphs
for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

if self._p == None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs]
# q_T_list = [q.T for q in q_list]

from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else: # @todo
pass
else: # @todo
pass
return gram_matrix
def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs)
self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout)
else:
iterator = self._graphs
for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev) # @todo: parallel?

if self._p == None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel?
def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare):
global G_q_T_list, G_P_list, G_D_list
G_q_T_list = q_T_list_toshare
G_P_list = P_list_toshare
G_D_list = D_list_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose)

else: # @todo
pass
else: # @todo
pass
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._q == None:
# precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1)
P_list = []
D_list = []
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout)
else:
iterator = range(len(g_list))
for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev)

if self._p == None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1)
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list]
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
kernel_list[i] = kernel
else: # @todo
pass
else: # @todo
pass
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._q == None:
# precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1)
P_list = []
D_list = []
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout)
else:
iterator = range(len(g_list))
for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix.
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose()
ew, ev = np.linalg.eig(A)
D_list.append(ew)
P_list.append(ev) # @todo: parallel?

if self._p == None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1)
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel?
def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare):
global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list
G_q_T1 = q_T1_toshare
G_P1 = P1_toshare
G_D1 = D1_toshare
G_q_T_list = q_T_list_toshare
G_P_list = P_list_toshare
G_D_list = D_list_toshare

do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
else: # @todo
pass
else: # @todo
pass
return kernel_list


def _wrapper_kernel_list_do(self, itr):
return itr, self._kernel_do(G_q_T1, G_q_T_list[itr], G_P1, G_P_list[itr], G_D1, G_D_list[itr], self._weight, self._sub_kernel)
def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2])
self._check_graphs([g1] + [g2])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.')
if self._q == None:
# precompute the spectral decomposition of each graph.
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
D1, P1 = np.linalg.eig(A1)
A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
D2, P2 = np.linalg.eig(A2)

if self._p == None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1)
q_T2 = 1 / nx.number_of_nodes(g2)
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel)
else: # @todo
pass
else: # @todo
pass
return kernel
def __kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
# use uniform distribution if there is no prior knowledge.
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
# @todo: this is not needed when p = q (kr = kl.T) for undirected graphs.
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense()
if sub_kernel == 'exp':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(np.exp(weight * D_diag))
elif sub_kernel == 'geo':
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2])
kmiddle = np.diag(weight * D_diag)
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle
kmiddle = np.linalg.inv(kmiddle)
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0]

def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel)

+ 245
- 0
gklearn/kernels/sylvester_equation.py View File

@@ -0,0 +1,245 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 19 17:24:46 2020

@author: ljia

@references:

[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010.
"""

import sys
from tqdm import tqdm
import numpy as np
import networkx as nx
from control import dlyap
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import RandomWalk


class SylvesterEquation(RandomWalk):
def __init__(self, **kwargs):
RandomWalk.__init__(self, **kwargs)

def _compute_gm_series(self):
self._check_edge_weight(self._graphs)
self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = self._graphs
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]
# # normalized adjacency matrices
# A_wave_list = []
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout):
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose()
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)

if self._p == None: # p is uniform distribution as default.
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else: # @todo
pass
else: # @todo
pass
return gram_matrix
def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs)
self._check_graphs(self._graphs)
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = self._graphs
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?

if self._p == None: # p is uniform distribution as default.
def init_worker(A_wave_list_toshare):
global G_A_wave_list
G_A_wave_list = A_wave_list_toshare
do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose)

else: # @todo
pass
else: # @todo
pass
return gram_matrix
def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = range(len(g_list))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]

if self._p == None: # p is uniform distribution as default.
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda)
kernel_list[i] = kernel
else: # @todo
pass
else: # @todo
pass
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1])
self._check_graphs(g_list + [g1])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout)
else:
iterator = range(len(g_list))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?

if self._p == None: # p is uniform distribution as default.
def init_worker(A_wave_1_toshare, A_wave_list_toshare):
global G_A_wave_1, G_A_wave_list
G_A_wave_1 = A_wave_1_toshare
G_A_wave_list = A_wave_list_toshare

do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
var_to_assign[result[0]] = result[1]
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
else: # @todo
pass
else: # @todo
pass
return kernel_list


def _wrapper_kernel_list_do(self, itr):
return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2])
self._check_graphs([g1] + [g2])
if self._verbose >= 2:
import warnings
warnings.warn('All labels are ignored.')
lmda = self._weight
if self._q == None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose()
if self._p == None: # p is uniform distribution as default.
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda)
else: # @todo
pass
else: # @todo
pass
return kernel
def __kernel_do(self, A_wave1, A_wave2, lmda):
S = lmda * A_wave2
T_t = A_wave1
# use uniform distribution if there is no prior knowledge.
nb_pd = len(A_wave1) * len(A_wave2)
p_times_uni = 1 / nb_pd
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni)
X = dlyap(S, T_t, M0)
X = np.reshape(X, (-1, 1), order='F')
# use uniform distribution if there is no prior knowledge.
q_times = np.full((1, nb_pd), p_times_uni)
return np.dot(q_times, X)
def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight)

+ 1
- 1
gklearn/kernels/untilHPathKernel.py View File

@@ -649,7 +649,7 @@ def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label):
# path_strs.append(tuple(strlist))
else:
path_strs = [
tuple([G.node[node][node_label] for node in path])
tuple([G.nodes[node][node_label] for node in path])
for path in plist
]
return path_strs


+ 43
- 601
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -10,16 +10,14 @@ import time
import random
import multiprocessing
import networkx as nx
import cvxpy as cp
import itertools
from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds_cml
from gklearn.ged.env import GEDEnv
from gklearn.ged.median import MedianGraphEstimatorPy
from gklearn.ged.learning import CostMatricesLearner
from gklearn.ged.median import MedianGraphEstimatorCML
from gklearn.ged.median import constant_node_costs, mge_options_to_string
from gklearn.utils import Timer, SpecialLabel
from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.ged.util import label_costs_to_matrix


class MedianPreimageGeneratorCML(PreimageGenerator):
@@ -28,7 +26,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
# arguments to set.
### arguments to set.
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
@@ -38,6 +36,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__ds_name = None
# for cml.
self.__time_limit_in_sec = 0
self.__max_itrs = 100
self.__max_itrs_without_update = 3
@@ -45,7 +44,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__epsilon_ec = 0.1
self.__allow_zeros = True
# self.__triangle_rule = True
# values to compute.
### values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_total = None
@@ -57,12 +56,13 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__k_dis_set_median = None
self.__k_dis_gen_median = None
self.__k_dis_dataset = None
self.__itrs = 0
self.__converged = False
self.__num_updates_ecc = 0
self.__node_label_costs = None
self.__edge_label_costs = None
# values that can be set or to be computed.
# for cml.
self.__itrs = 0
self.__converged = False
self.__num_updates_ecs = 0
### values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
@@ -154,7 +154,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('================================================================================')
print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------')
print('The optimized edit cost constants:', self.__edit_cost_constants)
print('The optimized edit costs:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
print('SOD of the generalized median:', self.__sod_gen_median)
print('Distance in kernel space for set median:', self.__k_dis_set_median)
@@ -165,7 +165,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating edit costs:', self.__num_updates_ecc)
print('Total number of updating edit costs:', self.__num_updates_ecs)
print('Is optimization of edit costs converged:', self.__converged)
print('================================================================================')
print()
@@ -185,7 +185,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
results['k_dis_dataset'] = self.__k_dis_dataset
results['itrs'] = self.__itrs
results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecc
results['num_updates_ecc'] = self.__num_updates_ecs
results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
@@ -302,598 +302,33 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
dis_k_vec.append(dis_k_mat[i, j])
dis_k_vec = np.array(dis_k_vec)
# init ged.
if self._verbose >= 2:
print('\ninitial:')
time0 = time.time()
graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
self.__edit_cost_constants = self.__init_ecc
# Set GEDEnv options.
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
# self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed.
options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
# options['node_attrs'] = self._dataset.node_attrs
# options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0]
edit_cost_list = [self.__init_ecc]
nb_cost_mat = np.array(n_edit_operations)
nb_cost_mat_list = [nb_cost_mat]
if self._verbose >= 2:
print('Current edit cost constants:', self.__edit_cost_constants)
print('Residual list:', residual_list)
# run iteration from initial edit costs.
self.__converged = False
itrs_without_update = 0
self.__itrs = 0
self.__num_updates_ecc = 0
timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update):
if self._verbose >= 2:
print('\niteration', self.__itrs + 1)
time0 = time.time()
# "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method.
# np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm',
# nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec,
# n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
# ged_mat=ged_mat)
self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec)
for i in range(len(self.__edit_cost_constants)):
if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9:
self.__edit_cost_constants[i] = 0
if self.__edit_cost_constants[i] < 0:
raise ValueError('The edit cost is negative.')
# for i in range(len(self.__edit_cost_constants)):
# if self.__edit_cost_constants[i] < 0:
# self.__edit_cost_constants[i] = 0
# compute new GEDs and numbers of edit operations.
options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
options['edit_cost_constants'] = self.__edit_cost_constants # @todo
options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
time_list.append(time.time() - time0)
edit_cost_list.append(self.__edit_cost_constants)
nb_cost_mat = np.array(n_edit_operations)
nb_cost_mat_list.append(nb_cost_mat)
# check convergency.
ec_changed = False
for i, cost in enumerate(self.__edit_cost_constants):
if cost == 0:
if edit_cost_list[-2][i] > self.__epsilon_ec:
ec_changed = True
break
elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec:
ec_changed = True
break
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
# ec_changed = True
# break
residual_changed = False
if residual_list[-1] == 0:
if residual_list[-2] > self.__epsilon_residual:
residual_changed = True
elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual:
residual_changed = True
self.__converged = not (ec_changed or residual_changed)
if self.__converged:
itrs_without_update += 1
else:
itrs_without_update = 0
self.__num_updates_ecc += 1
# print current states.
if self._verbose >= 2:
print()
print('-------------------------------------------------------------------------')
print('States of iteration', self.__itrs + 1)
print('-------------------------------------------------------------------------')
# print('Time spend:', self.__runtime_optimize_ec)
print('Total number of iterations for optimizing:', self.__itrs + 1)
print('Total number of updating edit costs:', self.__num_updates_ecc)
print('Was optimization of edit costs converged:', self.__converged)
print('Did edit costs change:', ec_changed)
print('Did residual change:', residual_changed)
print('Iterations without update:', itrs_without_update)
print('Current edit cost constants:', self.__edit_cost_constants)
print('Residual list:', residual_list)
print('-------------------------------------------------------------------------')
self.__itrs += 1


def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
return True
return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)


def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'):
# if self.__ds_name == 'Letter-high':
if self.__ged_options['edit_cost'] == 'LETTER':
raise Exception('Cannot compute for cost "LETTER".')
pass
# # method 1: set alpha automatically, just tune c_vir and c_eir by
# # LMS using cvxpy.
# alpha = 0.5
# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
## if np.count_nonzero(nb_cost_mat[:,4]) == 0:
## alpha = 0.75
## else:
## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
## alpha = alpha * 0.99
# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
# nb_cost_mat_new = np.column_stack((param_vir, param_eir))
# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
#
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
# prob = cp.Problem(cp.Minimize(cost), constraints)
# prob.solve()
# edit_costs_new = x.value
# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
# residual = np.sqrt(prob.value)
# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by
# # scipy.optimize.minimize.
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
# w2 = nb_cost_mat[:,3]
# w3 = dis_k_vec
# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
# + w2 * x[2] - w3 * x[3]) ** 2)
# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
# edit_costs_new = res.x[0:3]
# residual = res.fun
# method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
# # method 4: tune c_vir, c_eir and alpha by QP function
# # scipy.optimize.least_squares. An initial guess is required.
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
# w2 = nb_cost_mat[:,3]
# w3 = dis_k_vec
# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
# + w2 * x[2] - w3 * x[3]) ** 2
# res = optimize.root(func, [0.9, 1.7, 0.75, 100])
# edit_costs_new = res.x
# residual = None
elif self.__ged_options['edit_cost'] == 'LETTER2':
# # 1. if c_vi != c_vr, c_ei != c_er.
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
## # 1.1 no constraints.
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
# # 1.2 c_vs <= c_vi + c_vr.
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
## # 2. if c_vi == c_vr, c_ei == c_er.
## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
## nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
## nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
## x = cp.Variable(nb_cost_mat_new.shape[1])
## cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
## # 2.1 no constraints.
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
### # 2.2 c_vs <= c_vi + c_vr.
### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
### np.array([2.0, -1.0, 0.0]).T@x >= 0.0]
#
# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
# prob.solve()
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
# edit_costs_new = np.array(edit_costs_new)
# residual = np.sqrt(prob.value)
if not self.__triangle_rule and self.__allow_zeros:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif self.__triangle_rule and self.__allow_zeros:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01,
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif not self.__triangle_rule and not self.__allow_zeros:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)
# elif method == 'inequality_modified':
# # c_vs <= c_vi + c_vr.
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
# x = cp.Variable(nb_cost_mat_new.shape[1])
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
# prob = cp.Problem(cp.Minimize(cost_fun), constraints)
# prob.solve()
# # use same costs for insertion and removal rather than the fitted costs.
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
# edit_costs_new = np.array(edit_costs_new)
# residual = np.sqrt(prob.value)
elif self.__triangle_rule and not self.__allow_zeros:
# c_vs <= c_vi + c_vr.
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif rw_constraints == '2constraints': # @todo: rearrange it later.
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
edit_costs_new = x.value
residual = np.sqrt(prob.value)

elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
if self.__ds_name == 'SYNTHETICnew': # @todo: rearrenge this later.
# nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
# np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
# constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
np.array([0.0, 1.0, -1.0]).T@x == 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
prob.solve()
# print(x.value)
edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value,
np.array([0.0])))
residual = np.sqrt(prob.value)
elif not self.__triangle_rule and self.__allow_zeros:
if is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif is_n_attr and not is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value, np.array([0.0])))
residual = np.sqrt(prob.value)
elif not is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
residual = np.sqrt(prob.value)
else:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
x.value[2:], np.array([0.0])))
residual = np.sqrt(prob.value)
elif self.__triangle_rule and self.__allow_zeros:
if is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif is_n_attr and not is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01,
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value, np.array([0.0])))
residual = np.sqrt(prob.value)
elif not is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
residual = np.sqrt(prob.value)
else:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
x.value[2:], np.array([0.0])))
residual = np.sqrt(prob.value)
elif not self.__triangle_rule and not self.__allow_zeros:
if is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif is_n_attr and not is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value, np.array([0.0])))
residual = np.sqrt(prob.value)
elif not is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
residual = np.sqrt(prob.value)
else:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
x.value[2:], np.array([0.0])))
residual = np.sqrt(prob.value)
elif self.__triangle_rule and not self.__allow_zeros:
# c_vs <= c_vi + c_vr.
if is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif is_n_attr and not is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value, np.array([0.0])))
residual = np.sqrt(prob.value)
elif not is_n_attr and is_e_attr:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
residual = np.sqrt(prob.value)
else:
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
x = cp.Variable(nb_cost_mat_new.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]),
x.value[2:], np.array([0.0])))
residual = np.sqrt(prob.value)

elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled.
if not self.__triangle_rule and self.__allow_zeros:
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif self.__triangle_rule and self.__allow_zeros:
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01,
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01,
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif not self.__triangle_rule and not self.__allow_zeros:
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
elif self.__triangle_rule and not self.__allow_zeros:
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
else:
raise Exception('The edit cost "', self.__ged_options['edit_cost'], '" is not supported for update progress.')
# # method 1: simple least square method.
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
# rcond=None)
# # method 2: least square method with x_i >= 0.
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
# method 3: solve as a quadratic program with constraints.
# P = np.dot(nb_cost_mat.T, nb_cost_mat)
# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
# G = -1 * np.identity(nb_cost_mat.shape[1])
# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
# A = np.array([1 for i in range(nb_cost_mat.shape[1])])
# b = 1
# x = cp.Variable(nb_cost_mat.shape[1])
# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
# [G@x <= h])
# prob.solve()
# edit_costs_new = x.value
# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
# G = -1 * np.identity(nb_cost_mat.shape[1])
# h = np.array([0 for i in range(nb_cost_mat.shape[1])])
x = cp.Variable(nb_cost_mat.shape[1])
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
edit_costs_new = x.value
residual = np.sqrt(prob.value)
# method 4:
return edit_costs_new, residual
def __execute_cvx(self, prob):
try:
prob.solve(verbose=(self._verbose>=2))
except MemoryError as error0:
if self._verbose >= 2:
print('\nUsing solver "OSQP" caused a memory error.')
print('the original error message is\n', error0)
print('solver status: ', prob.status)
print('trying solver "CVXOPT" instead...\n')
try:
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
except Exception as error1:
if self._verbose >= 2:
print('\nAn error occured when using solver "CVXOPT".')
print('the original error message is\n', error1)
print('solver status: ', prob.status)
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
else:
if self._verbose >= 2:
print('solver status: ', prob.status)
if self._verbose >= 2:
print()
# Learner cost matrices.
# Initialize cost learner.
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec)
# Run cost learner.
cml.update(dis_k_vec, self._dataset.graphs, options)
# Get results.
results = cml.get_results()
self.__converged = results['converged']
self.__itrs = results['itrs']
self.__num_updates_ecs = results['num_updates_ecs']
cost_list = results['cost_list']
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)]
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):]

def __gmg_bcu(self):
@@ -913,12 +348,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
for g in graphs:
ged_env.add_nx_graph(g, '')
graph_ids = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels))
ged_env.set_label_costs(node_label_costs, edge_label_costs)
set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_type=self.__ged_options['init_option'])
# Set up the madian graph estimator.
self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
if not 'seed' in options:


+ 97
- 87
gklearn/tests/test_graph_kernels.py View File

@@ -52,94 +52,104 @@ def chooseDataset(ds_name):
return dataset


# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
# @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_commonwalkkernel(ds_name, weight, compute_method):
# """Test common walk kernel.
# """
# from gklearn.kernels.commonWalkKernel import commonwalkkernel
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_CommonWalk(ds_name, parallel, weight, compute_method):
"""Test common walk kernel.
"""
from gklearn.kernels import CommonWalk
import networkx as nx
dataset = chooseDataset(ds_name)
dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1])
# Gn, y = chooseDataset(ds_name)
try:
graph_kernel = CommonWalk(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=weight,
compute_method=compute_method)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)

# try:
# Kmatrix, run_time, idx = commonwalkkernel(Gn,
# node_label='atom',
# edge_label='bond_type',
# weight=weight,
# compute_method=compute_method,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
except Exception as exception:
assert False, exception
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
# @pytest.mark.parametrize('remove_totters', [True, False])
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_marginalizedkernel(ds_name, remove_totters):
# """Test marginalized kernel.
# """
# from gklearn.kernels.marginalizedKernel import marginalizedkernel
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS'])
@pytest.mark.parametrize('remove_totters', [False]) #[True, False])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_Marginalized(ds_name, parallel, remove_totters):
"""Test marginalized kernel.
"""
from gklearn.kernels import Marginalized
# Gn, y = chooseDataset(ds_name)
dataset = chooseDataset(ds_name)
try:
graph_kernel = Marginalized(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
p_quit=0.5,
n_iteration=2,
remove_totters=remove_totters)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)

# try:
# Kmatrix, run_time = marginalizedkernel(Gn,
# node_label='atom',
# edge_label='bond_type',
# p_quit=0.5,
# n_iteration=2,
# remove_totters=remove_totters,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
except Exception as exception:
assert False, exception
# @pytest.mark.parametrize(
# 'compute_method,ds_name,sub_kernel',
# [
# 'compute_method,ds_name,sub_kernel',
# [
# # ('sylvester', 'Alkane', None),
# # ('conjugate', 'Alkane', None),
# # ('conjugate', 'AIDS', None),
# # ('fp', 'Alkane', None),
# # ('fp', 'AIDS', None),
# ('spectral', 'Alkane', 'exp'),
# ('spectral', 'Alkane', 'geo'),
# ]
# ('spectral', 'Alkane', 'exp'),
# ('spectral', 'Alkane', 'geo'),
# ]
# )
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
# """Test random walk kernel kernel.
# """
# from gklearn.kernels.randomWalkKernel import randomwalkkernel
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
# import functools
# """Test random walk kernel kernel.
# """
# from gklearn.kernels.randomWalkKernel import randomwalkkernel
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
# import functools
# Gn, y = chooseDataset(ds_name)
# Gn, y = chooseDataset(ds_name)

# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
# try:
# Kmatrix, run_time, idx = randomwalkkernel(Gn,
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# node_label='atom',
# edge_label='bond_type',
# sub_kernel=sub_kernel,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
# try:
# Kmatrix, run_time, idx = randomwalkkernel(Gn,
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# node_label='atom',
# edge_label='bond_type',
# sub_kernel=sub_kernel,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# except Exception as exception:
# assert False, exception

@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint'])
@@ -157,9 +167,9 @@ def test_ShortestPath(ds_name, parallel):
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
graph_kernel = ShortestPath(node_labels=dataset.node_labels,
node_attrs=dataset.node_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels)
node_attrs=dataset.node_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -187,12 +197,12 @@ def test_StructuralSP(ds_name, parallel):
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
try:
graph_kernel = StructuralSP(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
edge_labels=dataset.edge_labels,
node_attrs=dataset.node_attrs,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -218,9 +228,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method):
try:
graph_kernel = PathUpToH(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
depth=2, k_func=k_func, compute_method=compute_method)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
depth=2, k_func=k_func, compute_method=compute_method)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -245,9 +255,9 @@ def test_Treelet(ds_name, parallel):
pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
try:
graph_kernel = Treelet(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
sub_kernel=pkernel)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
sub_kernel=pkernel)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
@@ -271,9 +281,9 @@ def test_WLSubtree(ds_name, parallel):

try:
graph_kernel = WLSubtree(node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
height=2)
edge_labels=dataset.edge_labels,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
height=2)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],


+ 53
- 0
gklearn/utils/graph_synthesizer.py View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 11 18:10:06 2020

@author: ljia
"""
import numpy as np
import networkx as nx
import random


class GraphSynthesizer(object):
def __init__(self):
pass
def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None):
g = nx.Graph()
if num_node_labels > 0:
node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes)
for i in range(0, num_nodes):
g.add_node(str(i), atom=node_labels[i]) # @todo: update "atom".
else:
for i in range(0, num_nodes):
g.add_node(str(i))

if num_edge_labels > 0:
edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges)
for idx, i in enumerate(random.sample(range(0, max_num_edges), num_edges)):
node1, node2 = all_edges[i]
g.add_edge(str(node1), str(node2), bond_type=edge_labels[idx]) # @todo: update "bond_type".
else:
for i in random.sample(range(0, max_num_edges), num_edges):
node1, node2 = all_edges[i]
g.add_edge(str(node1), str(node2))
return g
def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False):
max_num_edges = int((num_nodes - 1) * num_nodes / 2)
if num_edges > max_num_edges:
raise Exception('Too many edges.')
all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs.
graphs = []
for idx in range(0, num_graphs):
graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges))
return graphs

+ 4
- 4
gklearn/utils/parallel.py View File

@@ -12,7 +12,7 @@ import sys

def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None,
glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='',
verbose=2):
verbose=True):
'''
'''
if method == 'imap_unordered':
@@ -30,7 +30,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
else:
chunksize = 100
for result in (tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout) if verbose == 2 else
desc=itr_desc, file=sys.stdout) if verbose else
pool.imap_unordered(func, itr, chunksize)):
func_assign(result, var_to_assign)
pool.close()
@@ -45,7 +45,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker
else:
chunksize = 100
for result in (tqdm(pool.imap_unordered(func, itr, chunksize),
desc=itr_desc, file=sys.stdout) if verbose == 2 else
desc=itr_desc, file=sys.stdout) if verbose else
pool.imap_unordered(func, itr, chunksize)):
func_assign(result, var_to_assign)
pool.close()
@@ -54,7 +54,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker

def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None,
method='imap_unordered', n_jobs=None, chunksize=None,
verbose=True):
verbose=True): # @todo: Gn seems not necessary.
from itertools import combinations_with_replacement
def func_assign(result, var_to_assign):
var_to_assign[result[0]][result[1]] = result[2]


+ 64
- 0
gklearn/utils/utils.py View File

@@ -222,6 +222,70 @@ def direct_product(G1, G2, node_label, edge_label):
return gt


def direct_product_graph(G1, G2, node_labels, edge_labels):
"""Return the direct/tensor product of directed graphs G1 and G2.

Parameters
----------
G1, G2 : NetworkX graph
The original graphs.
node_labels : list
A list of node attributes used as labels.
edge_labels : list
A list of edge attributes used as labels.
Return
------
gt : NetworkX graph
The direct product graph of G1 and G2.

Notes
-----
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph.

References
----------
.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003.
"""
# arrange all graphs in a list
from itertools import product
# G = G.to_directed()
gt = nx.DiGraph()
# add nodes
for u, v in product(G1, G2):
label1 = tuple(G1.nodes[u][nl] for nl in node_labels)
label2 = tuple(G2.nodes[v][nl] for nl in node_labels)
if label1 == label2:
gt.add_node((u, v), node_label=label1)

# add edges, faster for sparse graphs (no so many edges), which is the most case for now.
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges):
if (u1, u2) in gt and (v1, v2) in gt:
label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels)
label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels)
if label1 == label2:
gt.add_edge((u1, u2), (v1, v2), edge_label=label1)


# # add edges, faster for dense graphs (a lot of edges, complete graph would be super).
# for u, v in product(gt, gt):
# if (u[0], v[0]) in G1.edges and (
# u[1], v[1]
# ) in G2.edges and G1.edges[u[0],
# v[0]][edge_label] == G2.edges[u[1],
# v[1]][edge_label]:
# gt.add_edge((u[0], u[1]), (v[0], v[1]))
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({
# edge_label:
# G1.edges[u[0], v[0]][edge_label]
# })

# relabel nodes using consecutive integers for convenience of kernel calculation.
# gt = nx.convert_node_labels_to_integers(
# gt, first_label=0, label_attribute='label_orignal')
return gt


def graph_deepcopy(G):
"""Deep copy a graph, including deep copy of all nodes, edges and
attributes of the graph, nodes and edges.


+ 1
- 1
requirements.txt View File

@@ -1,6 +1,6 @@
numpy>=1.16.2
scipy>=1.1.0
matplotlib>=3.0.0
matplotlib>=3.1.0
networkx>=2.2
scikit-learn>=0.20.0
tabulate>=0.8.2


+ 1
- 1
requirements_pypi.txt View File

@@ -1,6 +1,6 @@
numpy>=1.16.2
scipy>=1.1.0
matplotlib>=3.0.0
matplotlib>=3.1.0
networkx>=2.2
scikit-learn>=0.20.0
tabulate>=0.8.2


Loading…
Cancel
Save