@@ -0,0 +1,21 @@ | |||
# About graph kenrels. | |||
## (Random walk) Sylvester equation kernel. | |||
### ImportError: cannot import name 'frange' from 'matplotlib.mlab' | |||
You are using an outdated `control` with a recent `matplotlib`. `mlab.frange` was removed in `matplotlib-3.1.0`, and `control` removed the call in `control-0.8.2`. | |||
Update your `control` package. | |||
### Intel MKL FATAL ERROR: Cannot load libmkl_avx2.so or libmkl_def.so. | |||
The Intel Math Kernel Library (MKL) is missing or not properly set. I assume the MKL is required by `control` module. | |||
Install MKL. Then add the following to your path: | |||
``` | |||
export PATH=/opt/intel/bin:$PATH | |||
export LD_LIBRARY_PATH=/opt/intel/lib/intel64:/opt/intel/mkl/lib/intel64:$LD_LIBRARY_PATH | |||
``` |
@@ -12,12 +12,12 @@ A Python package for graph kernels, graph edit distances and graph pre-image pro | |||
* python>=3.5 | |||
* numpy>=1.16.2 | |||
* scipy>=1.1.0 | |||
* matplotlib>=3.0.0 | |||
* matplotlib>=3.1.0 | |||
* networkx>=2.2 | |||
* scikit-learn>=0.20.0 | |||
* tabulate>=0.8.2 | |||
* tqdm>=4.26.0 | |||
* control==0.8.0 (for generalized random walk kernels only) | |||
* control>=0.8.2 (for generalized random walk kernels only) | |||
* slycot==0.3.3 (for generalized random walk kernels only, which requires a fortran compiler, gfortran for example) | |||
## How to use? | |||
@@ -0,0 +1,54 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Sep 21 10:34:26 2020 | |||
@author: ljia | |||
""" | |||
from utils import Graph_Kernel_List, compute_graph_kernel | |||
def generate_graphs(): | |||
from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
gsyzer = GraphSynthesizer() | |||
graphs = gsyzer.unified_graphs(num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||
return graphs | |||
def xp_synthesied_graphs_dataset_size(): | |||
# Generate graphs. | |||
graphs = generate_graphs() | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/synthesized_graphs_N/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for num_graphs in [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]: | |||
print() | |||
print('Number of graphs:', num_graphs) | |||
sub_graphs = [g.copy() for g in graphs[0:num_graphs]] | |||
gram_matrix, run_time = compute_graph_kernel(sub_graphs, kernel_name) | |||
run_times[kernel_name].append(run_time) | |||
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_graphs) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_dataset_size() |
@@ -0,0 +1,54 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Sep 21 10:34:26 2020 | |||
@author: ljia | |||
""" | |||
from utils import Graph_Kernel_List, compute_graph_kernel | |||
def generate_graphs(degree): | |||
from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
gsyzer = GraphSynthesizer() | |||
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=int(10*degree), num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||
return graphs | |||
def xp_synthesied_graphs_degrees(): | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/synthesized_graphs_degrees/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for degree in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]: | |||
print() | |||
print('Degree:', degree) | |||
# Generate graphs. | |||
graphs = generate_graphs(degree) | |||
# Compute Gram matrix. | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
run_times[kernel_name].append(run_time) | |||
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(degree) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_degrees() |
@@ -0,0 +1,54 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Sep 21 10:34:26 2020 | |||
@author: ljia | |||
""" | |||
from utils import Graph_Kernel_List_ESym, compute_graph_kernel | |||
def generate_graphs(num_el_alp): | |||
from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
gsyzer = GraphSynthesizer() | |||
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=num_el_alp, seed=None, directed=False) | |||
return graphs | |||
def xp_synthesied_graphs_num_edge_label_alphabet(): | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/synthesized_graphs_num_edge_label_alphabet/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List_ESym: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for num_el_alp in [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40]: | |||
print() | |||
print('Number of edge label alphabet:', num_el_alp) | |||
# Generate graphs. | |||
graphs = generate_graphs(num_el_alp) | |||
# Compute Gram matrix. | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
run_times[kernel_name].append(run_time) | |||
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_el_alp) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_edge_label_alphabet() |
@@ -0,0 +1,54 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Sep 21 10:34:26 2020 | |||
@author: ljia | |||
""" | |||
from utils import Graph_Kernel_List_VSym, compute_graph_kernel | |||
def generate_graphs(num_nl_alp): | |||
from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
gsyzer = GraphSynthesizer() | |||
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=20, num_edges=40, num_node_labels=num_nl_alp, num_edge_labels=0, seed=None, directed=False) | |||
return graphs | |||
def xp_synthesied_graphs_num_node_label_alphabet(): | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/synthesized_graphs_num_node_label_alphabet/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List_VSym: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for num_nl_alp in [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20]: | |||
print() | |||
print('Number of node label alphabet:', num_nl_alp) | |||
# Generate graphs. | |||
graphs = generate_graphs(num_nl_alp) | |||
# Compute Gram matrix. | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
run_times[kernel_name].append(run_time) | |||
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nl_alp) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_node_label_alphabet() |
@@ -0,0 +1,54 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Sep 21 10:34:26 2020 | |||
@author: ljia | |||
""" | |||
from utils import Graph_Kernel_List, compute_graph_kernel | |||
def generate_graphs(num_nodes): | |||
from gklearn.utils.graph_synthesizer import GraphSynthesizer | |||
gsyzer = GraphSynthesizer() | |||
graphs = gsyzer.unified_graphs(num_graphs=100, num_nodes=num_nodes, num_edges=int(num_nodes*2), num_node_labels=0, num_edge_labels=0, seed=None, directed=False) | |||
return graphs | |||
def xp_synthesied_graphs_num_nodes(): | |||
# Run and save. | |||
import pickle | |||
import os | |||
save_dir = 'outputs/synthesized_graphs_num_nodes/' | |||
if not os.path.exists(save_dir): | |||
os.makedirs(save_dir) | |||
run_times = {} | |||
for kernel_name in Graph_Kernel_List: | |||
print() | |||
print('Kernel:', kernel_name) | |||
run_times[kernel_name] = [] | |||
for num_nodes in [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]: | |||
print() | |||
print('Number of nodes:', num_nodes) | |||
# Generate graphs. | |||
graphs = generate_graphs(num_nodes) | |||
# Compute Gram matrix. | |||
gram_matrix, run_time = compute_graph_kernel(graphs, kernel_name) | |||
run_times[kernel_name].append(run_time) | |||
pickle.dump(run_times, open(save_dir + 'run_time.' + kernel_name + '.' + str(num_nodes) + '.pkl', 'wb')) | |||
# Save all. | |||
pickle.dump(run_times, open(save_dir + 'run_times.pkl', 'wb')) | |||
return | |||
if __name__ == '__main__': | |||
xp_synthesied_graphs_num_nodes() |
@@ -0,0 +1,106 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Sep 22 11:33:28 2020 | |||
@author: ljia | |||
""" | |||
Graph_Kernel_List = ['PathUpToH', 'WLSubtree', 'SylvesterEquation', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'StructuralSP', 'CommonWalk'] | |||
# Graph_Kernel_List = ['CommonWalk', 'Marginalized', 'SylvesterEquation', 'ConjugateGradient', 'FixedPoint', 'SpectralDecomposition', 'ShortestPath', 'StructuralSP', 'PathUpToH', 'Treelet', 'WLSubtree'] | |||
Graph_Kernel_List_VSym = ['PathUpToH', 'WLSubtree', 'Marginalized', 'ShortestPath', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk'] | |||
Graph_Kernel_List_ESym = ['PathUpToH', 'Marginalized', 'Treelet', 'ConjugateGradient', 'FixedPoint', 'StructuralSP', 'CommonWalk'] | |||
Graph_Kernel_List_VCon = ['ShortestPath', 'ConjugateGradient', 'FixedPoint', 'StructuralSP'] | |||
Graph_Kernel_List_ECon = ['ConjugateGradient', 'FixedPoint', 'StructuralSP'] | |||
def compute_graph_kernel(graphs, kernel_name): | |||
import multiprocessing | |||
if kernel_name == 'CommonWalk': | |||
from gklearn.kernels.commonWalkKernel import commonwalkkernel | |||
estimator = commonwalkkernel | |||
params = {'compute_method': 'geo', 'weight': 0.1} | |||
elif kernel_name == 'Marginalized': | |||
from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||
estimator = marginalizedkernel | |||
params = {'p_quit': 0.5, 'n_iteration': 5, 'remove_totters': False} | |||
elif kernel_name == 'SylvesterEquation': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
params = {'compute_method': 'sylvester', 'weight': 0.1} | |||
elif kernel_name == 'ConjugateGradient': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
params = {'compute_method': 'conjugate', 'weight': 0.1, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||
elif kernel_name == 'FixedPoint': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
params = {'compute_method': 'fp', 'weight': 1e-3, 'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||
elif kernel_name == 'SpectralDecomposition': | |||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
estimator = randomwalkkernel | |||
params = {'compute_method': 'spectral', 'sub_kernel': 'geo', 'weight': 0.1} | |||
elif kernel_name == 'ShortestPath': | |||
from gklearn.kernels.spKernel import spkernel | |||
estimator = spkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
params = {'node_kernels': sub_kernel} | |||
elif kernel_name == 'StructuralSP': | |||
from gklearn.kernels.structuralspKernel import structuralspkernel | |||
estimator = structuralspkernel | |||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
import functools | |||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
params = {'node_kernels': sub_kernel, 'edge_kernels': sub_kernel} | |||
elif kernel_name == 'PathUpToH': | |||
from gklearn.kernels.untilHPathKernel import untilhpathkernel | |||
estimator = untilhpathkernel | |||
params = {'depth': 5, 'k_func': 'MinMax', 'compute_method': 'trie'} | |||
elif kernel_name == 'Treelet': | |||
from gklearn.kernels.treeletKernel import treeletkernel | |||
estimator = treeletkernel | |||
from gklearn.utils.kernels import polynomialkernel | |||
import functools | |||
sub_kernel = functools.partial(polynomialkernel, d=4, c=1e+8) | |||
params = {'sub_kernel': sub_kernel} | |||
elif kernel_name == 'WLSubtree': | |||
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | |||
estimator = weisfeilerlehmankernel | |||
params = {'base_kernel': 'subtree', 'height': 5} | |||
# params['parallel'] = None | |||
params['n_jobs'] = multiprocessing.cpu_count() | |||
params['verbose'] = True | |||
results = estimator(graphs, **params) | |||
return results[0], results[1] |
@@ -637,6 +637,10 @@ class GEDEnv(object): | |||
return [i for i in self.__internal_to_original_node_ids[graph_id].values()] | |||
def get_node_cost(self, node_label_1, node_label_2): | |||
return self.__ged_data.node_cost(node_label_1, node_label_2) | |||
def get_node_rel_cost(self, node_label_1, node_label_2): | |||
""" | |||
/*! | |||
@@ -650,7 +654,7 @@ class GEDEnv(object): | |||
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0])) | |||
if isinstance(node_label_2, dict): | |||
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) | |||
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.) | |||
def get_node_del_cost(self, node_label): | |||
@@ -677,6 +681,10 @@ class GEDEnv(object): | |||
if isinstance(node_label, dict): | |||
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label) | |||
def get_edge_cost(self, edge_label_1, edge_label_2): | |||
return self.__ged_data.edge_cost(edge_label_1, edge_label_2) | |||
def get_edge_rel_cost(self, edge_label_1, edge_label_2): | |||
@@ -0,0 +1,9 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Jul 7 16:07:25 2020 | |||
@author: ljia | |||
""" | |||
from gklearn.ged.learning.cost_matrices_learner import CostMatricesLearner |
@@ -0,0 +1,148 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Jul 7 11:42:48 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import cvxpy as cp | |||
import time | |||
from gklearn.ged.learning.costs_learner import CostsLearner | |||
from gklearn.ged.util import compute_geds_cml | |||
class CostMatricesLearner(CostsLearner): | |||
def __init__(self, edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=False, verbose=2): | |||
super().__init__(parallel, verbose) | |||
self._edit_cost = edit_cost | |||
self._triangle_rule = triangle_rule | |||
self._allow_zeros = allow_zeros | |||
def fit(self, X, y): | |||
if self._edit_cost == 'LETTER': | |||
raise Exception('Cannot compute for cost "LETTER".') | |||
elif self._edit_cost == 'LETTER2': | |||
raise Exception('Cannot compute for cost "LETTER2".') | |||
elif self._edit_cost == 'NON_SYMBOLIC': | |||
raise Exception('Cannot compute for cost "NON_SYMBOLIC".') | |||
elif self._edit_cost == 'CONSTANT': # @todo: node/edge may not labeled. | |||
if not self._triangle_rule and self._allow_zeros: | |||
w = cp.Variable(X.shape[1]) | |||
cost_fun = cp.sum_squares(X @ w - y) | |||
constraints = [w >= [0.0 for i in range(X.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.execute_cvx(prob) | |||
edit_costs_new = w.value | |||
residual = np.sqrt(prob.value) | |||
elif self._triangle_rule and self._allow_zeros: # @todo | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif not self._triangle_rule and not self._allow_zeros: # @todo | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif self._triangle_rule and not self._allow_zeros: # @todo | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
else: | |||
raise Exception('The edit cost "', self._ged_options['edit_cost'], '" is not supported for update progress.') | |||
self._cost_list.append(edit_costs_new) | |||
def init_geds_and_nb_eo(self, y, graphs): | |||
time0 = time.time() | |||
self._cost_list.append(np.concatenate((self._ged_options['node_label_costs'], | |||
self._ged_options['edge_label_costs']))) | |||
ged_vec, self._nb_eo = self.compute_geds_and_nb_eo(graphs) | |||
self._residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - y)))) | |||
self._runtime_list.append(time.time() - time0) | |||
if self._verbose >= 2: | |||
print('Current node label costs:', self._cost_list[-1][0:len(self._ged_options['node_label_costs'])]) | |||
print('Current edge label costs:', self._cost_list[-1][len(self._ged_options['node_label_costs']):]) | |||
print('Residual list:', self._residual_list) | |||
def update_geds_and_nb_eo(self, y, graphs, time0): | |||
self._ged_options['node_label_costs'] = self._cost_list[-1][0:len(self._ged_options['node_label_costs'])] | |||
self._ged_options['edge_label_costs'] = self._cost_list[-1][len(self._ged_options['node_label_costs']):] | |||
ged_vec, self._nb_eo = self.compute_geds_and_nb_eo(graphs) | |||
self._residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - y)))) | |||
self._runtime_list.append(time.time() - time0) | |||
def compute_geds_and_nb_eo(self, graphs): | |||
ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=self._ged_options, parallel=self._parallel, verbose=(self._verbose > 1)) | |||
return ged_vec, np.array(n_edit_operations) | |||
def check_convergency(self): | |||
self._ec_changed = False | |||
for i, cost in enumerate(self._cost_list[-1]): | |||
if cost == 0: | |||
if self._cost_list[-2][i] > self._epsilon_ec: | |||
self._ec_changed = True | |||
break | |||
elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec: | |||
self._ec_changed = True | |||
break | |||
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: | |||
# ec_changed = True | |||
# break | |||
self._residual_changed = False | |||
if self._residual_list[-1] == 0: | |||
if self._residual_list[-2] > self._epsilon_residual: | |||
self._residual_changed = True | |||
elif abs(self._residual_list[-1] - self._residual_list[-2]) / self._residual_list[-1] > self._epsilon_residual: | |||
self._residual_changed = True | |||
self._converged = not (self._ec_changed or self._residual_changed) | |||
if self._converged: | |||
self._itrs_without_update += 1 | |||
else: | |||
self._itrs_without_update = 0 | |||
self._num_updates_ecs += 1 | |||
def print_current_states(self): | |||
print() | |||
print('-------------------------------------------------------------------------') | |||
print('States of iteration', self._itrs + 1) | |||
print('-------------------------------------------------------------------------') | |||
# print('Time spend:', self.__runtime_optimize_ec) | |||
print('Total number of iterations for optimizing:', self._itrs + 1) | |||
print('Total number of updating edit costs:', self._num_updates_ecs) | |||
print('Was optimization of edit costs converged:', self._converged) | |||
print('Did edit costs change:', self._ec_changed) | |||
print('Did residual change:', self._residual_changed) | |||
print('Iterations without update:', self._itrs_without_update) | |||
print('Current node label costs:', self._cost_list[-1][0:len(self._ged_options['node_label_costs'])]) | |||
print('Current edge label costs:', self._cost_list[-1][len(self._ged_options['node_label_costs']):]) | |||
print('Residual list:', self._residual_list) | |||
print('-------------------------------------------------------------------------') |
@@ -0,0 +1,175 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Jul 7 11:30:31 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import cvxpy as cp | |||
import time | |||
from gklearn.utils import Timer | |||
class CostsLearner(object): | |||
def __init__(self, parallel, verbose): | |||
### To set. | |||
self._parallel = parallel | |||
self._verbose = verbose | |||
# For update(). | |||
self._time_limit_in_sec = 0 | |||
self._max_itrs = 100 | |||
self._max_itrs_without_update = 3 | |||
self._epsilon_residual = 0.01 | |||
self._epsilon_ec = 0.1 | |||
### To compute. | |||
self._residual_list = [] | |||
self._runtime_list = [] | |||
self._cost_list = [] | |||
self._nb_eo = None | |||
# For update(). | |||
self._itrs = 0 | |||
self._converged = False | |||
self._num_updates_ecs = 0 | |||
self._ec_changed = None | |||
self._residual_changed = None | |||
self._itrs_without_update = 0 | |||
### Both set and get. | |||
self._ged_options = None | |||
def fit(self, X, y): | |||
pass | |||
def preprocess(self): | |||
pass # @todo: remove the zero numbers of edit costs. | |||
def postprocess(self): | |||
for i in range(len(self._cost_list[-1])): | |||
if -1e-9 <= self._cost_list[-1][i] <= 1e-9: | |||
self._cost_list[-1][i] = 0 | |||
if self._cost_list[-1][i] < 0: | |||
raise ValueError('The edit cost is negative.') | |||
def set_update_params(self, **kwargs): | |||
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', self._time_limit_in_sec) | |||
self._max_itrs = kwargs.get('max_itrs', self._max_itrs) | |||
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', self._max_itrs_without_update) | |||
self._epsilon_residual = kwargs.get('epsilon_residual', self._epsilon_residual) | |||
self._epsilon_ec = kwargs.get('epsilon_ec', self._epsilon_ec) | |||
def update(self, y, graphs, ged_options, **kwargs): | |||
# Set parameters. | |||
self._ged_options = ged_options | |||
if kwargs != {}: | |||
self.set_update_params(**kwargs) | |||
# The initial iteration. | |||
if self._verbose >= 2: | |||
print('\ninitial:') | |||
self.init_geds_and_nb_eo(y, graphs) | |||
self._converged = False | |||
self._itrs_without_update = 0 | |||
self._itrs = 0 | |||
self._num_updates_ecs = 0 | |||
timer = Timer(self._time_limit_in_sec) | |||
# Run iterations from initial edit costs. | |||
while not self.termination_criterion_met(self._converged, timer, self._itrs, self._itrs_without_update): | |||
if self._verbose >= 2: | |||
print('\niteration', self._itrs + 1) | |||
time0 = time.time() | |||
# Fit GED space to the target space. | |||
self.preprocess() | |||
self.fit(self._nb_eo, y) | |||
self.postprocess() | |||
# Compute new GEDs and numbers of edit operations. | |||
self.update_geds_and_nb_eo(y, graphs, time0) | |||
# Check convergency. | |||
self.check_convergency() | |||
# Print current states. | |||
if self._verbose >= 2: | |||
self.print_current_states() | |||
self._itrs += 1 | |||
def init_geds_and_nb_eo(self, y, graphs): | |||
pass | |||
def update_geds_and_nb_eo(self, y, graphs, time0): | |||
pass | |||
def compute_geds_and_nb_eo(self, graphs): | |||
pass | |||
def check_convergency(self): | |||
pass | |||
def print_current_states(self): | |||
pass | |||
def termination_criterion_met(self, converged, timer, itr, itrs_without_update): | |||
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): | |||
# if self.__state == AlgorithmState.TERMINATED: | |||
# self.__state = AlgorithmState.INITIALIZED | |||
return True | |||
return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) | |||
def execute_cvx(self, prob): | |||
try: | |||
prob.solve(verbose=(self._verbose>=2)) | |||
except MemoryError as error0: | |||
if self._verbose >= 2: | |||
print('\nUsing solver "OSQP" caused a memory error.') | |||
print('the original error message is\n', error0) | |||
print('solver status: ', prob.status) | |||
print('trying solver "CVXOPT" instead...\n') | |||
try: | |||
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2)) | |||
except Exception as error1: | |||
if self._verbose >= 2: | |||
print('\nAn error occured when using solver "CVXOPT".') | |||
print('the original error message is\n', error1) | |||
print('solver status: ', prob.status) | |||
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2)) | |||
else: | |||
if self._verbose >= 2: | |||
print('solver status: ', prob.status) | |||
else: | |||
if self._verbose >= 2: | |||
print('solver status: ', prob.status) | |||
if self._verbose >= 2: | |||
print() | |||
def get_results(self): | |||
results = {} | |||
results['residual_list'] = self._residual_list | |||
results['runtime_list'] = self._runtime_list | |||
results['cost_list'] = self._cost_list | |||
results['nb_eo'] = self._nb_eo | |||
results['itrs'] = self._itrs | |||
results['converged'] = self._converged | |||
results['num_updates_ecs'] = self._num_updates_ecs | |||
results['ec_changed'] = self._ec_changed | |||
results['residual_changed'] = self._residual_changed | |||
results['itrs_without_update'] = self._itrs_without_update | |||
return results |
@@ -1,3 +1,4 @@ | |||
from gklearn.ged.median.median_graph_estimator import MedianGraphEstimator | |||
from gklearn.ged.median.median_graph_estimator_py import MedianGraphEstimatorPy | |||
from gklearn.ged.median.median_graph_estimator_cml import MedianGraphEstimatorCML | |||
from gklearn.ged.median.utils import constant_node_costs, mge_options_to_string |
@@ -1,3 +1,3 @@ | |||
from gklearn.ged.util.lsape_solver import LSAPESolver | |||
from gklearn.ged.util.util import compute_geds, ged_options_to_string | |||
from gklearn.ged.util.util import compute_geds_cml | |||
from gklearn.ged.util.util import compute_geds_cml, label_costs_to_matrix |
@@ -8,7 +8,11 @@ __author__ = "Linlin Jia" | |||
__date__ = "November 2018" | |||
from gklearn.kernels.graph_kernel import GraphKernel | |||
from gklearn.kernels.common_walk import CommonWalk | |||
from gklearn.kernels.marginalized import Marginalized | |||
from gklearn.kernels.random_walk import RandomWalk | |||
from gklearn.kernels.sylvester_equation import SylvesterEquation | |||
from gklearn.kernels.spectral_decomposition import SpectralDecomposition | |||
from gklearn.kernels.shortest_path import ShortestPath | |||
from gklearn.kernels.structural_sp import StructuralSP | |||
from gklearn.kernels.path_up_to_h import PathUpToH | |||
@@ -0,0 +1,282 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Tue Aug 18 11:21:31 2020 | |||
@author: ljia | |||
@references: | |||
[1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: | |||
Hardness results and efficient alternatives. Learning Theory and Kernel | |||
Machines, pages 129–143, 2003. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.utils.utils import direct_product_graph | |||
from gklearn.kernels import GraphKernel | |||
class CommonWalk(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__weight = kwargs.get('weight', 1) | |||
self.__compute_method = kwargs.get('compute_method', None) | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self.__compute_method = self.__compute_method.lower() | |||
def _compute_gm_series(self): | |||
self.__check_graphs(self._graphs) | |||
self.__add_dummy_labels(self._graphs) | |||
if not self.__ds_infos['directed']: # convert | |||
self._graphs = [G.to_directed() for G in self._graphs] | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
for i, j in iterator: | |||
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
for i, j in iterator: | |||
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
return gram_matrix | |||
def _compute_gm_imap_unordered(self): | |||
self.__check_graphs(self._graphs) | |||
self.__add_dummy_labels(self._graphs) | |||
if not self.__ds_infos['directed']: # convert | |||
self._graphs = [G.to_directed() for G in self._graphs] | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
def init_worker(gn_toshare): | |||
global G_gn | |||
G_gn = gn_toshare | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
do_fun = self._wrapper_kernel_do_exp | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
do_fun = self._wrapper_kernel_do_geo | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
return gram_matrix | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__check_graphs(g_list + [g1]) | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if not self.__ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g_list = [G.to_directed() for G in g_list] | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
for i in iterator: | |||
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) | |||
kernel_list[i] = kernel | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
for i in iterator: | |||
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__check_graphs(g_list + [g1]) | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if not self.__ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g_list = [G.to_directed() for G in g_list] | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
def init_worker(g1_toshare, g_list_toshare): | |||
global G_g1, G_g_list | |||
G_g1 = g1_toshare | |||
G_g_list = g_list_toshare | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
do_fun = self._wrapper_kernel_list_do_exp | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
do_fun = self._wrapper_kernel_list_do_geo | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
return kernel_list | |||
def _wrapper_kernel_list_do_exp(self, itr): | |||
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) | |||
def _wrapper_kernel_list_do_geo(self, itr): | |||
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__check_graphs([g1] + [g2]) | |||
self.__add_dummy_labels([g1] + [g2]) | |||
if not self.__ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g2 = g2.to_directed() | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
kernel = self.__kernel_do_exp(g1, g2, self.__weight) | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
kernel = self.__kernel_do_geo(g1, g2, self.__weight) | |||
return kernel | |||
def __kernel_do_exp(self, g1, g2, beta): | |||
"""Calculate common walk graph kernel between 2 graphs using exponential | |||
series. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graphs | |||
Graphs between which the kernels are calculated. | |||
beta : integer | |||
Weight. | |||
Return | |||
------ | |||
kernel : float | |||
The common walk Kernel between 2 graphs. | |||
""" | |||
# get tensor product / direct product | |||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||
# return 0 if the direct product graph have no more than 1 node. | |||
if nx.number_of_nodes(gp) < 2: | |||
return 0 | |||
A = nx.adjacency_matrix(gp).todense() | |||
ew, ev = np.linalg.eig(A) | |||
# # remove imaginary part if possible. | |||
# # @todo: don't know if it is necessary. | |||
# for i in range(len(ew)): | |||
# if np.abs(ew[i].imag) < 1e-9: | |||
# ew[i] = ew[i].real | |||
# for i in range(ev.shape[0]): | |||
# for j in range(ev.shape[1]): | |||
# if np.abs(ev[i, j].imag) < 1e-9: | |||
# ev[i, j] = ev[i, j].real | |||
D = np.zeros((len(ew), len(ew)), dtype=complex) # @todo: use complex? | |||
for i in range(len(ew)): | |||
D[i][i] = np.exp(beta * ew[i]) | |||
exp_D = ev * D * ev.T | |||
kernel = exp_D.sum() | |||
if (kernel.real == 0 and np.abs(kernel.imag) < 1e-9) or np.abs(kernel.imag / kernel.real) < 1e-9: | |||
kernel = kernel.real | |||
return kernel | |||
def _wrapper_kernel_do_exp(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) | |||
def __kernel_do_geo(self, g1, g2, gamma): | |||
"""Calculate common walk graph kernel between 2 graphs using geometric | |||
series. | |||
Parameters | |||
---------- | |||
g1, g2 : NetworkX graphs | |||
Graphs between which the kernels are calculated. | |||
gamma : integer | |||
Weight. | |||
Return | |||
------ | |||
kernel : float | |||
The common walk Kernel between 2 graphs. | |||
""" | |||
# get tensor product / direct product | |||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||
# return 0 if the direct product graph have no more than 1 node. | |||
if nx.number_of_nodes(gp) < 2: | |||
return 0 | |||
A = nx.adjacency_matrix(gp).todense() | |||
mat = np.identity(len(A)) - gamma * A | |||
# try: | |||
return mat.I.sum() | |||
# except np.linalg.LinAlgError: | |||
# return np.nan | |||
def _wrapper_kernel_do_geo(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) | |||
def __check_graphs(self, Gn): | |||
for g in Gn: | |||
if nx.number_of_nodes(g) == 1: | |||
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | |||
def __add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -10,6 +10,7 @@ import networkx as nx | |||
import multiprocessing | |||
import time | |||
class GraphKernel(object): | |||
def __init__(self): | |||
@@ -51,7 +51,7 @@ class Marginalized(GraphKernel): | |||
else: | |||
iterator = self._graphs | |||
# @todo: this may not work. | |||
self._graphs = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
@@ -108,13 +108,13 @@ class Marginalized(GraphKernel): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
# @todo: this may not work. | |||
g_list = [untotterTransformation(G, self.__node_label, self.__edge_label) for G in iterator] | |||
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
@@ -133,7 +133,7 @@ class Marginalized(GraphKernel): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
pool = Pool(self._n_jobs) | |||
itr = range(0, len(g_list)) | |||
if len(g_list) < 100 * self._n_jobs: | |||
@@ -177,8 +177,8 @@ class Marginalized(GraphKernel): | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__add_dummy_labels([g1] + [g2]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_label, self.__edge_label) # @todo: this may not work. | |||
g2 = untotterTransformation(g2, self.__node_label, self.__edge_label) | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) | |||
kernel = self.__kernel_do(g1, g2) | |||
return kernel | |||
@@ -324,7 +324,7 @@ class Marginalized(GraphKernel): | |||
def _wrapper_untotter(self, i): | |||
return i, untotterTransformation(self._graphs[i], self.__node_label, self.__edge_label) # @todo: this may not work. | |||
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
def __add_dummy_labels(self, Gn): | |||
@@ -0,0 +1,94 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Aug 19 16:55:17 2020 | |||
@author: ljia | |||
@references: | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.utils import SpecialLabel | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.utils.utils import direct_product_graph | |||
from gklearn.kernels import GraphKernel | |||
class RandomWalk(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self._compute_method = kwargs.get('compute_method', None) | |||
self._weight = kwargs.get('weight', 1) | |||
self._p = kwargs.get('p', None) | |||
self._q = kwargs.get('q', None) | |||
self._edge_weight = kwargs.get('edge_weight', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
self._compute_method = self.__compute_method.lower() | |||
def _compute_gm_series(self): | |||
pass | |||
def _compute_gm_imap_unordered(self): | |||
pass | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
pass | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
pass | |||
def _compute_single_kernel_series(self, g1, g2): | |||
pass | |||
def _check_graphs(self, Gn): | |||
# remove graphs with no edges, as no walk can be found in their structures, | |||
# so the weight matrix between such a graph and itself might be zero. | |||
for g in Gn: | |||
if nx.number_of_edges(g) == 0: | |||
raise Exception('Graphs must contain edges to construct weight matrices.') | |||
def _check_edge_weight(self, G0, verbose): | |||
eweight = None | |||
if self._edge_weight == None: | |||
if verbose >= 2: | |||
print('\n None edge weight is specified. Set all weight to 1.\n') | |||
else: | |||
try: | |||
some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0] | |||
if isinstance(some_weight, float) or isinstance(some_weight, int): | |||
eweight = self._edge_weight | |||
else: | |||
if verbose >= 2: | |||
print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight) | |||
except: | |||
if verbose >= 2: | |||
print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight) | |||
self._edge_weight = eweight | |||
def _add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] |
@@ -0,0 +1,283 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Aug 20 16:12:45 2020 | |||
@author: ljia | |||
@references: | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from scipy.sparse import kron | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalk | |||
class SpectralDecomposition(RandomWalk): | |||
def __init__(self, **kwargs): | |||
RandomWalk.__init__(self, **kwargs) | |||
self._sub_kernel = kwargs.get('sub_kernel', None) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
for G in iterator: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A actually is the transpose of the adjacency matrix. | |||
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() | |||
ew, ev = np.linalg.eig(A) | |||
D_list.append(ew) | |||
P_list.append(ev) | |||
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | |||
if self._p == None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] | |||
# q_T_list = [q.T for q in q_list] | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# precompute the spectral decomposition of each graph. | |||
P_list = [] | |||
D_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='spectral decompose', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
for G in iterator: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A actually is the transpose of the adjacency matrix. | |||
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() | |||
ew, ev = np.linalg.eig(A) | |||
D_list.append(ew) | |||
P_list.append(ev) # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in self._graphs] # @todo: parallel? | |||
def init_worker(q_T_list_toshare, P_list_toshare, D_list_toshare): | |||
global G_q_T_list, G_P_list, G_D_list | |||
G_q_T_list = q_T_list_toshare | |||
G_P_list = P_list_toshare | |||
G_D_list = D_list_toshare | |||
do_fun = self._wrapper_kernel_do | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
P_list = [] | |||
D_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for G in iterator: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A actually is the transpose of the adjacency matrix. | |||
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() | |||
ew, ev = np.linalg.eig(A) | |||
D_list.append(ew) | |||
P_list.append(ev) | |||
if self._p == None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
P_list = [] | |||
D_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for G in iterator: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A actually is the transpose of the adjacency matrix. | |||
A = nx.adjacency_matrix(G, self._edge_weight).todense().transpose() | |||
ew, ev = np.linalg.eig(A) | |||
D_list.append(ew) | |||
P_list.append(ev) # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] # @todo: parallel? | |||
def init_worker(q_T1_toshare, P1_toshare, D1_toshare, q_T_list_toshare, P_list_toshare, D_list_toshare): | |||
global G_q_T1, G_P1, G_D1, G_q_T_list, G_P_list, G_D_list | |||
G_q_T1 = q_T1_toshare | |||
G_P1 = P1_toshare | |||
G_D1 = D1_toshare | |||
G_q_T_list = q_T_list_toshare | |||
G_P_list = P_list_toshare | |||
G_D_list = D_list_toshare | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self._kernel_do(G_q_T1, G_q_T_list[itr], G_P1, G_P_list[itr], G_D1, G_D_list[itr], self._weight, self._sub_kernel) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2]) | |||
self._check_graphs([g1] + [g2]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored. Only works for undirected graphs.') | |||
if self._q == None: | |||
# precompute the spectral decomposition of each graph. | |||
A1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
D1, P1 = np.linalg.eig(A1) | |||
A2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() | |||
D2, P2 = np.linalg.eig(A2) | |||
if self._p == None: # p is uniform distribution as default. | |||
q_T1 = 1 / nx.number_of_nodes(g1) | |||
q_T2 = 1 / nx.number_of_nodes(g2) | |||
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel | |||
def __kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||
# use uniform distribution if there is no prior knowledge. | |||
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | |||
# @todo: this is not needed when p = q (kr = kl.T) for undirected graphs. | |||
# kr = kron(np.dot(P_inv_list[i], q_list[i]), np.dot(P_inv_list[j], q_list[j])).todense() | |||
if sub_kernel == 'exp': | |||
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2]) | |||
kmiddle = np.diag(np.exp(weight * D_diag)) | |||
elif sub_kernel == 'geo': | |||
D_diag = np.array([d1 * d2 for d1 in D1 for d2 in D2]) | |||
kmiddle = np.diag(weight * D_diag) | |||
kmiddle = np.identity(len(kmiddle)) - weight * kmiddle | |||
kmiddle = np.linalg.inv(kmiddle) | |||
return np.dot(np.dot(kl, kmiddle), kl.T)[0, 0] | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel) |
@@ -0,0 +1,245 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Aug 19 17:24:46 2020 | |||
@author: ljia | |||
@references: | |||
[1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. | |||
""" | |||
import sys | |||
from tqdm import tqdm | |||
import numpy as np | |||
import networkx as nx | |||
from control import dlyap | |||
from gklearn.utils.parallel import parallel_gm, parallel_me | |||
from gklearn.kernels import RandomWalk | |||
class SylvesterEquation(RandomWalk): | |||
def __init__(self, **kwargs): | |||
RandomWalk.__init__(self, **kwargs) | |||
def _compute_gm_series(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | |||
# # normalized adjacency matrices | |||
# A_wave_list = [] | |||
# for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): | |||
# A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() | |||
# norm = A_tilde.sum(axis=0) | |||
# norm[norm == 0] = 1 | |||
# A_wave_list.append(A_tilde / norm) | |||
if self._p == None: # p is uniform distribution as default. | |||
from itertools import combinations_with_replacement | |||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
if self._verbose >= 2: | |||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_gm_imap_unordered(self): | |||
self._check_edge_weight(self._graphs) | |||
self._check_graphs(self._graphs) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
def init_worker(A_wave_list_toshare): | |||
global G_A_wave_list | |||
G_A_wave_list = A_wave_list_toshare | |||
do_fun = self._wrapper_kernel_do | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return gram_matrix | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] | |||
if self._p == None: # p is uniform distribution as default. | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda) | |||
kernel_list[i] = kernel | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self._check_edge_weight(g_list + [g1]) | |||
self._check_graphs(g_list + [g1]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
if self._verbose >= 2: | |||
iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? | |||
if self._p == None: # p is uniform distribution as default. | |||
def init_worker(A_wave_1_toshare, A_wave_list_toshare): | |||
global G_A_wave_1, G_A_wave_list | |||
G_A_wave_1 = A_wave_1_toshare | |||
G_A_wave_list = A_wave_list_toshare | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]] = result[1] | |||
itr = range(len(g_list)) | |||
len_itr = len(g_list) | |||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | |||
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', | |||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel_list | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self._check_edge_weight([g1] + [g2]) | |||
self._check_graphs([g1] + [g2]) | |||
if self._verbose >= 2: | |||
import warnings | |||
warnings.warn('All labels are ignored.') | |||
lmda = self._weight | |||
if self._q == None: | |||
# don't normalize adjacency matrices if q is a uniform vector. Note | |||
# A_wave_list actually contains the transposes of the adjacency matrices. | |||
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() | |||
A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() | |||
if self._p == None: # p is uniform distribution as default. | |||
kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda) | |||
else: # @todo | |||
pass | |||
else: # @todo | |||
pass | |||
return kernel | |||
def __kernel_do(self, A_wave1, A_wave2, lmda): | |||
S = lmda * A_wave2 | |||
T_t = A_wave1 | |||
# use uniform distribution if there is no prior knowledge. | |||
nb_pd = len(A_wave1) * len(A_wave2) | |||
p_times_uni = 1 / nb_pd | |||
M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni) | |||
X = dlyap(S, T_t, M0) | |||
X = np.reshape(X, (-1, 1), order='F') | |||
# use uniform distribution if there is no prior knowledge. | |||
q_times = np.full((1, nb_pd), p_times_uni) | |||
return np.dot(q_times, X) | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight) |
@@ -649,7 +649,7 @@ def paths2labelseqs(plist, G, ds_attrs, node_label, edge_label): | |||
# path_strs.append(tuple(strlist)) | |||
else: | |||
path_strs = [ | |||
tuple([G.node[node][node_label] for node in path]) | |||
tuple([G.nodes[node][node_label] for node in path]) | |||
for path in plist | |||
] | |||
return path_strs | |||
@@ -10,16 +10,14 @@ import time | |||
import random | |||
import multiprocessing | |||
import networkx as nx | |||
import cvxpy as cp | |||
import itertools | |||
from gklearn.preimage import PreimageGenerator | |||
from gklearn.preimage.utils import compute_k_dis | |||
from gklearn.ged.util import compute_geds_cml | |||
from gklearn.ged.env import GEDEnv | |||
from gklearn.ged.median import MedianGraphEstimatorPy | |||
from gklearn.ged.learning import CostMatricesLearner | |||
from gklearn.ged.median import MedianGraphEstimatorCML | |||
from gklearn.ged.median import constant_node_costs, mge_options_to_string | |||
from gklearn.utils import Timer, SpecialLabel | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
from gklearn.ged.util import label_costs_to_matrix | |||
class MedianPreimageGeneratorCML(PreimageGenerator): | |||
@@ -28,7 +26,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
def __init__(self, dataset=None): | |||
PreimageGenerator.__init__(self, dataset=dataset) | |||
# arguments to set. | |||
### arguments to set. | |||
self.__mge = None | |||
self.__ged_options = {} | |||
self.__mge_options = {} | |||
@@ -38,6 +36,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__parallel = True | |||
self.__n_jobs = multiprocessing.cpu_count() | |||
self.__ds_name = None | |||
# for cml. | |||
self.__time_limit_in_sec = 0 | |||
self.__max_itrs = 100 | |||
self.__max_itrs_without_update = 3 | |||
@@ -45,7 +44,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__epsilon_ec = 0.1 | |||
self.__allow_zeros = True | |||
# self.__triangle_rule = True | |||
# values to compute. | |||
### values to compute. | |||
self.__runtime_optimize_ec = None | |||
self.__runtime_generate_preimage = None | |||
self.__runtime_total = None | |||
@@ -57,12 +56,13 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__k_dis_set_median = None | |||
self.__k_dis_gen_median = None | |||
self.__k_dis_dataset = None | |||
self.__itrs = 0 | |||
self.__converged = False | |||
self.__num_updates_ecc = 0 | |||
self.__node_label_costs = None | |||
self.__edge_label_costs = None | |||
# values that can be set or to be computed. | |||
# for cml. | |||
self.__itrs = 0 | |||
self.__converged = False | |||
self.__num_updates_ecs = 0 | |||
### values that can be set or to be computed. | |||
self.__edit_cost_constants = [] | |||
self.__gram_matrix_unnorm = None | |||
self.__runtime_precompute_gm = None | |||
@@ -154,7 +154,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
print('================================================================================') | |||
print('Finished generation of preimages.') | |||
print('--------------------------------------------------------------------------------') | |||
print('The optimized edit cost constants:', self.__edit_cost_constants) | |||
print('The optimized edit costs:', self.__edit_cost_constants) | |||
print('SOD of the set median:', self.__sod_set_median) | |||
print('SOD of the generalized median:', self.__sod_gen_median) | |||
print('Distance in kernel space for set median:', self.__k_dis_set_median) | |||
@@ -165,7 +165,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||
print('Total time:', self.__runtime_total) | |||
print('Total number of iterations for optimizing:', self.__itrs) | |||
print('Total number of updating edit costs:', self.__num_updates_ecc) | |||
print('Total number of updating edit costs:', self.__num_updates_ecs) | |||
print('Is optimization of edit costs converged:', self.__converged) | |||
print('================================================================================') | |||
print() | |||
@@ -185,7 +185,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
results['k_dis_dataset'] = self.__k_dis_dataset | |||
results['itrs'] = self.__itrs | |||
results['converged'] = self.__converged | |||
results['num_updates_ecc'] = self.__num_updates_ecc | |||
results['num_updates_ecc'] = self.__num_updates_ecs | |||
results['mge'] = {} | |||
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||
@@ -302,598 +302,33 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
dis_k_vec.append(dis_k_mat[i, j]) | |||
dis_k_vec = np.array(dis_k_vec) | |||
# init ged. | |||
if self._verbose >= 2: | |||
print('\ninitial:') | |||
time0 = time.time() | |||
graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||
self.__edit_cost_constants = self.__init_ecc | |||
# Set GEDEnv options. | |||
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||
# self.__edit_cost_constants = self.__init_ecc | |||
options = self.__ged_options.copy() | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed. | |||
options['node_labels'] = self._dataset.node_labels | |||
options['edge_labels'] = self._dataset.edge_labels | |||
options['node_attrs'] = self._dataset.node_attrs | |||
options['edge_attrs'] = self._dataset.edge_attrs | |||
# options['node_attrs'] = self._dataset.node_attrs | |||
# options['edge_attrs'] = self._dataset.edge_attrs | |||
options['node_label_costs'] = self.__node_label_costs | |||
options['edge_label_costs'] = self.__edge_label_costs | |||
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) | |||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | |||
time_list = [time.time() - time0] | |||
edit_cost_list = [self.__init_ecc] | |||
nb_cost_mat = np.array(n_edit_operations) | |||
nb_cost_mat_list = [nb_cost_mat] | |||
if self._verbose >= 2: | |||
print('Current edit cost constants:', self.__edit_cost_constants) | |||
print('Residual list:', residual_list) | |||
# run iteration from initial edit costs. | |||
self.__converged = False | |||
itrs_without_update = 0 | |||
self.__itrs = 0 | |||
self.__num_updates_ecc = 0 | |||
timer = Timer(self.__time_limit_in_sec) | |||
while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): | |||
if self._verbose >= 2: | |||
print('\niteration', self.__itrs + 1) | |||
time0 = time.time() | |||
# "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. | |||
# np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', | |||
# nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, | |||
# n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init, | |||
# ged_mat=ged_mat) | |||
self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec) | |||
for i in range(len(self.__edit_cost_constants)): | |||
if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9: | |||
self.__edit_cost_constants[i] = 0 | |||
if self.__edit_cost_constants[i] < 0: | |||
raise ValueError('The edit cost is negative.') | |||
# for i in range(len(self.__edit_cost_constants)): | |||
# if self.__edit_cost_constants[i] < 0: | |||
# self.__edit_cost_constants[i] = 0 | |||
# compute new GEDs and numbers of edit operations. | |||
options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo | |||
options['node_labels'] = self._dataset.node_labels | |||
options['edge_labels'] = self._dataset.edge_labels | |||
options['node_attrs'] = self._dataset.node_attrs | |||
options['edge_attrs'] = self._dataset.edge_attrs | |||
ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) | |||
residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) | |||
time_list.append(time.time() - time0) | |||
edit_cost_list.append(self.__edit_cost_constants) | |||
nb_cost_mat = np.array(n_edit_operations) | |||
nb_cost_mat_list.append(nb_cost_mat) | |||
# check convergency. | |||
ec_changed = False | |||
for i, cost in enumerate(self.__edit_cost_constants): | |||
if cost == 0: | |||
if edit_cost_list[-2][i] > self.__epsilon_ec: | |||
ec_changed = True | |||
break | |||
elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec: | |||
ec_changed = True | |||
break | |||
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: | |||
# ec_changed = True | |||
# break | |||
residual_changed = False | |||
if residual_list[-1] == 0: | |||
if residual_list[-2] > self.__epsilon_residual: | |||
residual_changed = True | |||
elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual: | |||
residual_changed = True | |||
self.__converged = not (ec_changed or residual_changed) | |||
if self.__converged: | |||
itrs_without_update += 1 | |||
else: | |||
itrs_without_update = 0 | |||
self.__num_updates_ecc += 1 | |||
# print current states. | |||
if self._verbose >= 2: | |||
print() | |||
print('-------------------------------------------------------------------------') | |||
print('States of iteration', self.__itrs + 1) | |||
print('-------------------------------------------------------------------------') | |||
# print('Time spend:', self.__runtime_optimize_ec) | |||
print('Total number of iterations for optimizing:', self.__itrs + 1) | |||
print('Total number of updating edit costs:', self.__num_updates_ecc) | |||
print('Was optimization of edit costs converged:', self.__converged) | |||
print('Did edit costs change:', ec_changed) | |||
print('Did residual change:', residual_changed) | |||
print('Iterations without update:', itrs_without_update) | |||
print('Current edit cost constants:', self.__edit_cost_constants) | |||
print('Residual list:', residual_list) | |||
print('-------------------------------------------------------------------------') | |||
self.__itrs += 1 | |||
def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): | |||
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | |||
# if self.__state == AlgorithmState.TERMINATED: | |||
# self.__state = AlgorithmState.INITIALIZED | |||
return True | |||
return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) | |||
def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): | |||
# if self.__ds_name == 'Letter-high': | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
raise Exception('Cannot compute for cost "LETTER".') | |||
pass | |||
# # method 1: set alpha automatically, just tune c_vir and c_eir by | |||
# # LMS using cvxpy. | |||
# alpha = 0.5 | |||
# coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec) | |||
## if np.count_nonzero(nb_cost_mat[:,4]) == 0: | |||
## alpha = 0.75 | |||
## else: | |||
## alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0]) | |||
## alpha = alpha * 0.99 | |||
# param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1]) | |||
# param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5]) | |||
# nb_cost_mat_new = np.column_stack((param_vir, param_eir)) | |||
# dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3] | |||
# | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost = cp.sum_squares(nb_cost_mat_new * x - dis_new) | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
# prob = cp.Problem(cp.Minimize(cost), constraints) | |||
# prob.solve() | |||
# edit_costs_new = x.value | |||
# edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha]) | |||
# residual = np.sqrt(prob.value) | |||
# # method 2: tune c_vir, c_eir and alpha by nonlinear programming by | |||
# # scipy.optimize.minimize. | |||
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] | |||
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] | |||
# w2 = nb_cost_mat[:,3] | |||
# w3 = dis_k_vec | |||
# func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ | |||
# + w2 * x[2] - w3 * x[3]) ** 2) | |||
# bounds = ((0, None), (0., None), (0.5, 0.5), (0, None)) | |||
# res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds) | |||
# edit_costs_new = res.x[0:3] | |||
# residual = res.fun | |||
# method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy. | |||
# # method 4: tune c_vir, c_eir and alpha by QP function | |||
# # scipy.optimize.least_squares. An initial guess is required. | |||
# w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1] | |||
# w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5] | |||
# w2 = nb_cost_mat[:,3] | |||
# w3 = dis_k_vec | |||
# func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \ | |||
# + w2 * x[2] - w3 * x[3]) ** 2 | |||
# res = optimize.root(func, [0.9, 1.7, 0.75, 100]) | |||
# edit_costs_new = res.x | |||
# residual = None | |||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||
# # 1. if c_vi != c_vr, c_ei != c_er. | |||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
## # 1.1 no constraints. | |||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
# # 1.2 c_vs <= c_vi + c_vr. | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
## # 2. if c_vi == c_vr, c_ei == c_er. | |||
## nb_cost_mat_new = nb_cost_mat[:,[0,3,4]] | |||
## nb_cost_mat_new[:,0] += nb_cost_mat[:,1] | |||
## nb_cost_mat_new[:,2] += nb_cost_mat[:,5] | |||
## x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
## cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
## # 2.1 no constraints. | |||
## constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]] | |||
### # 2.2 c_vs <= c_vi + c_vr. | |||
### constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
### np.array([2.0, -1.0, 0.0]).T@x >= 0.0] | |||
# | |||
# prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
# prob.solve() | |||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | |||
# edit_costs_new = np.array(edit_costs_new) | |||
# residual = np.sqrt(prob.value) | |||
if not self.__triangle_rule and self.__allow_zeros: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and self.__allow_zeros: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif not self.__triangle_rule and not self.__allow_zeros: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
# elif method == 'inequality_modified': | |||
# # c_vs <= c_vi + c_vr. | |||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
# x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
# cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
# prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
# prob.solve() | |||
# # use same costs for insertion and removal rather than the fitted costs. | |||
# edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] | |||
# edit_costs_new = np.array(edit_costs_new) | |||
# residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and not self.__allow_zeros: | |||
# c_vs <= c_vi + c_vr. | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif rw_constraints == '2constraints': # @todo: rearrange it later. | |||
# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er. | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||
is_n_attr = np.count_nonzero(nb_cost_mat[:,2]) | |||
is_e_attr = np.count_nonzero(nb_cost_mat[:,5]) | |||
if self.__ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. | |||
# nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
nb_cost_mat_new = nb_cost_mat[:,[2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
# constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
# np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0] | |||
# constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]] | |||
constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([0.0, 1.0, -1.0]).T@x == 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
prob.solve() | |||
# print(x.value) | |||
edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, | |||
np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not self.__triangle_rule and self.__allow_zeros: | |||
if is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif is_n_attr and not is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||
residual = np.sqrt(prob.value) | |||
else: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||
x.value[2:], np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and self.__allow_zeros: | |||
if is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif is_n_attr and not is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||
residual = np.sqrt(prob.value) | |||
else: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||
x.value[2:], np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not self.__triangle_rule and not self.__allow_zeros: | |||
if is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif is_n_attr and not is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||
residual = np.sqrt(prob.value) | |||
else: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||
x.value[2:], np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and not self.__allow_zeros: | |||
# c_vs <= c_vi + c_vr. | |||
if is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif is_n_attr and not is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value, np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif not is_n_attr and is_e_attr: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], | |||
np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) | |||
residual = np.sqrt(prob.value) | |||
else: | |||
nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]] | |||
x = cp.Variable(nb_cost_mat_new.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), | |||
x.value[2:], np.array([0.0]))) | |||
residual = np.sqrt(prob.value) | |||
elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. | |||
if not self.__triangle_rule and self.__allow_zeros: | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and self.__allow_zeros: | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, | |||
np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif not self.__triangle_rule and not self.__allow_zeros: | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
elif self.__triangle_rule and not self.__allow_zeros: | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
else: | |||
raise Exception('The edit cost "', self.__ged_options['edit_cost'], '" is not supported for update progress.') | |||
# # method 1: simple least square method. | |||
# edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, | |||
# rcond=None) | |||
# # method 2: least square method with x_i >= 0. | |||
# edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec) | |||
# method 3: solve as a quadratic program with constraints. | |||
# P = np.dot(nb_cost_mat.T, nb_cost_mat) | |||
# q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat) | |||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||
# A = np.array([1 for i in range(nb_cost_mat.shape[1])]) | |||
# b = 1 | |||
# x = cp.Variable(nb_cost_mat.shape[1]) | |||
# prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x), | |||
# [G@x <= h]) | |||
# prob.solve() | |||
# edit_costs_new = x.value | |||
# residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec) | |||
# G = -1 * np.identity(nb_cost_mat.shape[1]) | |||
# h = np.array([0 for i in range(nb_cost_mat.shape[1])]) | |||
x = cp.Variable(nb_cost_mat.shape[1]) | |||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | |||
constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], | |||
# np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] | |||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | |||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | |||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | |||
self.__execute_cvx(prob) | |||
edit_costs_new = x.value | |||
residual = np.sqrt(prob.value) | |||
# method 4: | |||
return edit_costs_new, residual | |||
def __execute_cvx(self, prob): | |||
try: | |||
prob.solve(verbose=(self._verbose>=2)) | |||
except MemoryError as error0: | |||
if self._verbose >= 2: | |||
print('\nUsing solver "OSQP" caused a memory error.') | |||
print('the original error message is\n', error0) | |||
print('solver status: ', prob.status) | |||
print('trying solver "CVXOPT" instead...\n') | |||
try: | |||
prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2)) | |||
except Exception as error1: | |||
if self._verbose >= 2: | |||
print('\nAn error occured when using solver "CVXOPT".') | |||
print('the original error message is\n', error1) | |||
print('solver status: ', prob.status) | |||
print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n') | |||
prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2)) | |||
else: | |||
if self._verbose >= 2: | |||
print('solver status: ', prob.status) | |||
else: | |||
if self._verbose >= 2: | |||
print('solver status: ', prob.status) | |||
if self._verbose >= 2: | |||
print() | |||
# Learner cost matrices. | |||
# Initialize cost learner. | |||
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo | |||
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec) | |||
# Run cost learner. | |||
cml.update(dis_k_vec, self._dataset.graphs, options) | |||
# Get results. | |||
results = cml.get_results() | |||
self.__converged = results['converged'] | |||
self.__itrs = results['itrs'] | |||
self.__num_updates_ecs = results['num_updates_ecs'] | |||
cost_list = results['cost_list'] | |||
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)] | |||
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):] | |||
def __gmg_bcu(self): | |||
@@ -913,12 +348,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
for g in graphs: | |||
ged_env.add_nx_graph(g, '') | |||
graph_ids = ged_env.get_all_graph_ids() | |||
node_labels = ged_env.get_all_node_labels() | |||
edge_labels = ged_env.get_all_edge_labels() | |||
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels)) | |||
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels)) | |||
ged_env.set_label_costs(node_label_costs, edge_label_costs) | |||
set_median_id = ged_env.add_graph('set_median') | |||
gen_median_id = ged_env.add_graph('gen_median') | |||
ged_env.init(init_type=self.__ged_options['init_option']) | |||
# Set up the madian graph estimator. | |||
self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) | |||
options = self.__mge_options.copy() | |||
if not 'seed' in options: | |||
@@ -52,94 +52,104 @@ def chooseDataset(ds_name): | |||
return dataset | |||
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
# @pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) | |||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
# def test_commonwalkkernel(ds_name, weight, compute_method): | |||
# """Test common walk kernel. | |||
# """ | |||
# from gklearn.kernels.commonWalkKernel import commonwalkkernel | |||
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
@pytest.mark.parametrize('weight,compute_method', [(0.01, 'geo'), (1, 'exp')]) | |||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
def test_CommonWalk(ds_name, parallel, weight, compute_method): | |||
"""Test common walk kernel. | |||
""" | |||
from gklearn.kernels import CommonWalk | |||
import networkx as nx | |||
dataset = chooseDataset(ds_name) | |||
dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1]) | |||
# Gn, y = chooseDataset(ds_name) | |||
try: | |||
graph_kernel = CommonWalk(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
weight=weight, | |||
compute_method=compute_method) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
# try: | |||
# Kmatrix, run_time, idx = commonwalkkernel(Gn, | |||
# node_label='atom', | |||
# edge_label='bond_type', | |||
# weight=weight, | |||
# compute_method=compute_method, | |||
# # parallel=parallel, | |||
# n_jobs=multiprocessing.cpu_count(), | |||
# verbose=True) | |||
# except Exception as exception: | |||
# assert False, exception | |||
except Exception as exception: | |||
assert False, exception | |||
# @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
# @pytest.mark.parametrize('remove_totters', [True, False]) | |||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
# def test_marginalizedkernel(ds_name, remove_totters): | |||
# """Test marginalized kernel. | |||
# """ | |||
# from gklearn.kernels.marginalizedKernel import marginalizedkernel | |||
@pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) | |||
@pytest.mark.parametrize('remove_totters', [False]) #[True, False]) | |||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
def test_Marginalized(ds_name, parallel, remove_totters): | |||
"""Test marginalized kernel. | |||
""" | |||
from gklearn.kernels import Marginalized | |||
# Gn, y = chooseDataset(ds_name) | |||
dataset = chooseDataset(ds_name) | |||
try: | |||
graph_kernel = Marginalized(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
p_quit=0.5, | |||
n_iteration=2, | |||
remove_totters=remove_totters) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
# try: | |||
# Kmatrix, run_time = marginalizedkernel(Gn, | |||
# node_label='atom', | |||
# edge_label='bond_type', | |||
# p_quit=0.5, | |||
# n_iteration=2, | |||
# remove_totters=remove_totters, | |||
# # parallel=parallel, | |||
# n_jobs=multiprocessing.cpu_count(), | |||
# verbose=True) | |||
# except Exception as exception: | |||
# assert False, exception | |||
except Exception as exception: | |||
assert False, exception | |||
# @pytest.mark.parametrize( | |||
# 'compute_method,ds_name,sub_kernel', | |||
# [ | |||
# 'compute_method,ds_name,sub_kernel', | |||
# [ | |||
# # ('sylvester', 'Alkane', None), | |||
# # ('conjugate', 'Alkane', None), | |||
# # ('conjugate', 'AIDS', None), | |||
# # ('fp', 'Alkane', None), | |||
# # ('fp', 'AIDS', None), | |||
# ('spectral', 'Alkane', 'exp'), | |||
# ('spectral', 'Alkane', 'geo'), | |||
# ] | |||
# ('spectral', 'Alkane', 'exp'), | |||
# ('spectral', 'Alkane', 'geo'), | |||
# ] | |||
# ) | |||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel): | |||
# """Test random walk kernel kernel. | |||
# """ | |||
# from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
# import functools | |||
# """Test random walk kernel kernel. | |||
# """ | |||
# from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||
# import functools | |||
# Gn, y = chooseDataset(ds_name) | |||
# Gn, y = chooseDataset(ds_name) | |||
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||
# try: | |||
# Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||
# compute_method=compute_method, | |||
# weight=1e-3, | |||
# p=None, | |||
# q=None, | |||
# edge_weight=None, | |||
# node_kernels=sub_kernels, | |||
# edge_kernels=sub_kernels, | |||
# node_label='atom', | |||
# edge_label='bond_type', | |||
# sub_kernel=sub_kernel, | |||
# # parallel=parallel, | |||
# n_jobs=multiprocessing.cpu_count(), | |||
# verbose=True) | |||
# except Exception as exception: | |||
# assert False, exception | |||
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||
# try: | |||
# Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||
# compute_method=compute_method, | |||
# weight=1e-3, | |||
# p=None, | |||
# q=None, | |||
# edge_weight=None, | |||
# node_kernels=sub_kernels, | |||
# edge_kernels=sub_kernels, | |||
# node_label='atom', | |||
# edge_label='bond_type', | |||
# sub_kernel=sub_kernel, | |||
# # parallel=parallel, | |||
# n_jobs=multiprocessing.cpu_count(), | |||
# verbose=True) | |||
# except Exception as exception: | |||
# assert False, exception | |||
@pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) | |||
@@ -157,9 +167,9 @@ def test_ShortestPath(ds_name, parallel): | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
try: | |||
graph_kernel = ShortestPath(node_labels=dataset.node_labels, | |||
node_attrs=dataset.node_attrs, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
node_kernels=sub_kernels) | |||
node_attrs=dataset.node_attrs, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
node_kernels=sub_kernels) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -187,12 +197,12 @@ def test_StructuralSP(ds_name, parallel): | |||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||
try: | |||
graph_kernel = StructuralSP(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
node_attrs=dataset.node_attrs, | |||
edge_attrs=dataset.edge_attrs, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
node_kernels=sub_kernels, | |||
edge_kernels=sub_kernels) | |||
edge_labels=dataset.edge_labels, | |||
node_attrs=dataset.node_attrs, | |||
edge_attrs=dataset.edge_attrs, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
node_kernels=sub_kernels, | |||
edge_kernels=sub_kernels) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -218,9 +228,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): | |||
try: | |||
graph_kernel = PathUpToH(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
depth=2, k_func=k_func, compute_method=compute_method) | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
depth=2, k_func=k_func, compute_method=compute_method) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -245,9 +255,9 @@ def test_Treelet(ds_name, parallel): | |||
pkernel = functools.partial(polynomialkernel, d=2, c=1e5) | |||
try: | |||
graph_kernel = Treelet(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
sub_kernel=pkernel) | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
sub_kernel=pkernel) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -271,9 +281,9 @@ def test_WLSubtree(ds_name, parallel): | |||
try: | |||
graph_kernel = WLSubtree(node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
height=2) | |||
edge_labels=dataset.edge_labels, | |||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||
height=2) | |||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||
@@ -0,0 +1,53 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Fri Sep 11 18:10:06 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
import random | |||
class GraphSynthesizer(object): | |||
def __init__(self): | |||
pass | |||
def random_graph(self, num_nodes, num_edges, num_node_labels=0, num_edge_labels=0, seed=None, directed=False, max_num_edges=None, all_edges=None): | |||
g = nx.Graph() | |||
if num_node_labels > 0: | |||
node_labels = np.random.randint(0, high=num_node_labels, size=num_nodes) | |||
for i in range(0, num_nodes): | |||
g.add_node(str(i), atom=node_labels[i]) # @todo: update "atom". | |||
else: | |||
for i in range(0, num_nodes): | |||
g.add_node(str(i)) | |||
if num_edge_labels > 0: | |||
edge_labels = np.random.randint(0, high=num_edge_labels, size=num_edges) | |||
for idx, i in enumerate(random.sample(range(0, max_num_edges), num_edges)): | |||
node1, node2 = all_edges[i] | |||
g.add_edge(str(node1), str(node2), bond_type=edge_labels[idx]) # @todo: update "bond_type". | |||
else: | |||
for i in random.sample(range(0, max_num_edges), num_edges): | |||
node1, node2 = all_edges[i] | |||
g.add_edge(str(node1), str(node2)) | |||
return g | |||
def unified_graphs(self, num_graphs=1000, num_nodes=20, num_edges=40, num_node_labels=0, num_edge_labels=0, seed=None, directed=False): | |||
max_num_edges = int((num_nodes - 1) * num_nodes / 2) | |||
if num_edges > max_num_edges: | |||
raise Exception('Too many edges.') | |||
all_edges = [(i, j) for i in range(0, num_nodes) for j in range(i + 1, num_nodes)] # @todo: optimize. No directed graphs. | |||
graphs = [] | |||
for idx in range(0, num_graphs): | |||
graphs.append(self.random_graph(num_nodes, num_edges, num_node_labels=num_node_labels, num_edge_labels=num_edge_labels, seed=seed, directed=directed, max_num_edges=max_num_edges, all_edges=all_edges)) | |||
return graphs |
@@ -12,7 +12,7 @@ import sys | |||
def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker=None, | |||
glbv=None, method=None, n_jobs=None, chunksize=None, itr_desc='', | |||
verbose=2): | |||
verbose=True): | |||
''' | |||
''' | |||
if method == 'imap_unordered': | |||
@@ -30,7 +30,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker | |||
else: | |||
chunksize = 100 | |||
for result in (tqdm(pool.imap_unordered(func, itr, chunksize), | |||
desc=itr_desc, file=sys.stdout) if verbose == 2 else | |||
desc=itr_desc, file=sys.stdout) if verbose else | |||
pool.imap_unordered(func, itr, chunksize)): | |||
func_assign(result, var_to_assign) | |||
pool.close() | |||
@@ -45,7 +45,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker | |||
else: | |||
chunksize = 100 | |||
for result in (tqdm(pool.imap_unordered(func, itr, chunksize), | |||
desc=itr_desc, file=sys.stdout) if verbose == 2 else | |||
desc=itr_desc, file=sys.stdout) if verbose else | |||
pool.imap_unordered(func, itr, chunksize)): | |||
func_assign(result, var_to_assign) | |||
pool.close() | |||
@@ -54,7 +54,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker | |||
def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None, | |||
method='imap_unordered', n_jobs=None, chunksize=None, | |||
verbose=True): | |||
verbose=True): # @todo: Gn seems not necessary. | |||
from itertools import combinations_with_replacement | |||
def func_assign(result, var_to_assign): | |||
var_to_assign[result[0]][result[1]] = result[2] | |||
@@ -222,6 +222,70 @@ def direct_product(G1, G2, node_label, edge_label): | |||
return gt | |||
def direct_product_graph(G1, G2, node_labels, edge_labels): | |||
"""Return the direct/tensor product of directed graphs G1 and G2. | |||
Parameters | |||
---------- | |||
G1, G2 : NetworkX graph | |||
The original graphs. | |||
node_labels : list | |||
A list of node attributes used as labels. | |||
edge_labels : list | |||
A list of edge attributes used as labels. | |||
Return | |||
------ | |||
gt : NetworkX graph | |||
The direct product graph of G1 and G2. | |||
Notes | |||
----- | |||
This method differs from networkx.tensor_product in that this method only adds nodes and edges in G1 and G2 that have the same labels to the direct product graph. | |||
References | |||
---------- | |||
.. [1] Thomas Gärtner, Peter Flach, and Stefan Wrobel. On graph kernels: Hardness results and efficient alternatives. Learning Theory and Kernel Machines, pages 129–143, 2003. | |||
""" | |||
# arrange all graphs in a list | |||
from itertools import product | |||
# G = G.to_directed() | |||
gt = nx.DiGraph() | |||
# add nodes | |||
for u, v in product(G1, G2): | |||
label1 = tuple(G1.nodes[u][nl] for nl in node_labels) | |||
label2 = tuple(G2.nodes[v][nl] for nl in node_labels) | |||
if label1 == label2: | |||
gt.add_node((u, v), node_label=label1) | |||
# add edges, faster for sparse graphs (no so many edges), which is the most case for now. | |||
for (u1, v1), (u2, v2) in product(G1.edges, G2.edges): | |||
if (u1, u2) in gt and (v1, v2) in gt: | |||
label1 = tuple(G1.edges[u1, v1][el] for el in edge_labels) | |||
label2 = tuple(G2.edges[u2, v2][el] for el in edge_labels) | |||
if label1 == label2: | |||
gt.add_edge((u1, u2), (v1, v2), edge_label=label1) | |||
# # add edges, faster for dense graphs (a lot of edges, complete graph would be super). | |||
# for u, v in product(gt, gt): | |||
# if (u[0], v[0]) in G1.edges and ( | |||
# u[1], v[1] | |||
# ) in G2.edges and G1.edges[u[0], | |||
# v[0]][edge_label] == G2.edges[u[1], | |||
# v[1]][edge_label]: | |||
# gt.add_edge((u[0], u[1]), (v[0], v[1])) | |||
# gt.edges[(u[0], u[1]), (v[0], v[1])].update({ | |||
# edge_label: | |||
# G1.edges[u[0], v[0]][edge_label] | |||
# }) | |||
# relabel nodes using consecutive integers for convenience of kernel calculation. | |||
# gt = nx.convert_node_labels_to_integers( | |||
# gt, first_label=0, label_attribute='label_orignal') | |||
return gt | |||
def graph_deepcopy(G): | |||
"""Deep copy a graph, including deep copy of all nodes, edges and | |||
attributes of the graph, nodes and edges. | |||
@@ -1,6 +1,6 @@ | |||
numpy>=1.16.2 | |||
scipy>=1.1.0 | |||
matplotlib>=3.0.0 | |||
matplotlib>=3.1.0 | |||
networkx>=2.2 | |||
scikit-learn>=0.20.0 | |||
tabulate>=0.8.2 | |||
@@ -1,6 +1,6 @@ | |||
numpy>=1.16.2 | |||
scipy>=1.1.0 | |||
matplotlib>=3.0.0 | |||
matplotlib>=3.1.0 | |||
networkx>=2.2 | |||
scikit-learn>=0.20.0 | |||
tabulate>=0.8.2 | |||