@@ -5,31 +5,26 @@ Created on Tue Jun 16 16:04:46 2020
@author: ljia
@author: ljia
"""
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 26 18:27:22 2020
@author: ljia
"""
import numpy as np
import numpy as np
import time
import time
import random
import random
import multiprocessing
import multiprocessing
import networkx as nx
import networkx as nx
import cvxpy as cp
import cvxpy as cp
import itertools
from gklearn.preimage import PreimageGenerator
from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis
from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds_cml, ged_options_to_string
from gklearn.ged.util import compute_geds_cml
from gklearn.ged.env import GEDEnv
from gklearn.ged.env import GEDEnv
from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.utils import Timer
from gklearn.ged.median import MedianGraphEstimatorPy
from gklearn.ged.median import constant_node_costs, mge_options_to_string
from gklearn.utils import Timer, SpecialLabel
from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import get_graph_kernel_by_name
class MedianPreimageGeneratorCML(PreimageGenerator):
class MedianPreimageGeneratorCML(PreimageGenerator):
"""Generator median preimages by cost matrices learning using the pure Python version of GEDEnv. Works only for symbolic labeled graphs.
"""
def __init__(self, dataset=None):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
PreimageGenerator.__init__(self, dataset=dataset)
@@ -37,7 +32,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__mge = None
self.__mge = None
self.__ged_options = {}
self.__ged_options = {}
self.__mge_options = {}
self.__mge_options = {}
self.__fit_method = 'k-graphs'
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__init_ecc = None
self.__parallel = True
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__n_jobs = multiprocessing.cpu_count()
@@ -47,8 +43,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__max_itrs_without_update = 3
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__epsilon_ec = 0.1
self.__allow_zeros = Fals e
self.__triangle_rule = True
self.__allow_zeros = Tru e
# self.__triangle_rule = True
# values to compute.
# values to compute.
self.__runtime_optimize_ec = None
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_generate_preimage = None
@@ -64,6 +60,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__itrs = 0
self.__itrs = 0
self.__converged = False
self.__converged = False
self.__num_updates_ecc = 0
self.__num_updates_ecc = 0
self.__node_label_costs = None
self.__edge_label_costs = None
# values that can be set or to be computed.
# values that can be set or to be computed.
self.__edit_cost_constants = []
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__gram_matrix_unnorm = None
@@ -76,7 +74,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self._verbose = kwargs.get('verbose', 2)
self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
self.__mge_options = kwargs.get('mge_options', {})
self.__fit_method = kwargs.get('fit_method', 'k-graphs')
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
self.__parallel = kwargs.get('parallel', True)
@@ -89,8 +88,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', Fals e)
self.__triangle_rule = kwargs.get('triangle_rule', True)
self.__allow_zeros = kwargs.get('allow_zeros', Tru e)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
def run(self):
def run(self):
@@ -122,10 +121,10 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
end_precompute_gm = time.time()
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self.__runtime_precompute_gm
if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
start = time.time()
self.__runtime_precompute_gm = 0
end_precompute_gm = start
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants.
# 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
self.__optimize_edit_cost_vector()
@@ -197,7 +196,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __optimize_edit_cost_vector(self):
def __optimize_edit_cost_vector(self):
"""Learn edit cost vector.
"""Learn edit cost vector.
"""
"""
if self.__fit_method == 'random': # random
if self.__init_method == 'random': # random
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
self.__node_label_costs = np.zeros((len(nls) + 1, len(nls) + 1))
# Initialize node label cost matrix, each row/column corresponds to a label, the first label is the dummy label. These is the same setting as in GEDData.
i = 0
# Costs of insertions.
for row in range(1, len(nls) + 1):
self.__node_label_costs[row, 0] = rand_costs[i]
i += 1
# Costs of deletions.
for col in range(1, len(nls) + 1):
self.__node_label_costs[0, col] = rand_costs[i]
i += 1
# Costs of substitutions.
for row in range(1, len(nls) + 1):
for col in range(row + 1, len(nls) + 1):
self.__node_label_costs[row, col] = rand_costs[i]
self.__node_label_costs[col, row] = rand_costs[i]
i += 1
# self.__node_label_costs = {}
# for i, (nl1, nl2) in enumerate(itertools.combinations(nls, 2)):
# self.__node_label_costs[(nl1, nl2)] = rand_costs[i]
# # Add costs for deletion.
# for j, nl in enumerate(nls):
# self.__node_label_costs[(nl1, SpecialLabel.DUMMY)] = rand_costs[i + j]
# # Add costs for insertion.
# for k, nl in enumerate(nls):
# self.__node_label_costs[(SpecialLabel.DUMMY, nl1)] = rand_costs[i + j + k]
# # Add self costs.
# for nl in nls:
# self.__node_label_costs[(nl, nl)] = 0
# self.__node_label_costs[(SpecialLabel.DUMMY, SpecialLabel.DUMMY)] = 0
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
@@ -279,6 +319,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
options['edge_labels'] = self._dataset.edge_labels
options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0]
time_list = [time.time() - time0]
@@ -881,8 +922,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_env.init(init_type=self.__ged_options['init_option'])
ged_env.init(init_type=self.__ged_options['init_option'])
# Set up the madian graph estimator.
# Set up the madian graph estimator.
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string( self.__ged_options))
self.__mge = MedianGraphEstimatorPy (ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
options = self.__mge_options.copy()
if not 'seed' in options:
if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
@@ -897,8 +938,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_options = self.__ged_options.copy()
ged_options = self.__ged_options.copy()
if self.__parallel:
if self.__parallel:
ged_options['threads'] = 1
ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options) )
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options) )
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator.
# Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)
self.__mge.run(graph_ids, set_median_id, gen_median_id)