@@ -5,31 +5,26 @@ Created on Tue Jun 16 16:04:46 2020
@author: ljia
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 26 18:27:22 2020
@author: ljia
"""
import numpy as np
import time
import random
import multiprocessing
import networkx as nx
import cvxpy as cp
import itertools
from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds_cml, ged_options_to_string
from gklearn.ged.util import compute_geds_cml
from gklearn.ged.env import GEDEnv
from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.utils import Timer
from gklearn.ged.median import MedianGraphEstimatorPy
from gklearn.ged.median import constant_node_costs, mge_options_to_string
from gklearn.utils import Timer, SpecialLabel
from gklearn.utils.utils import get_graph_kernel_by_name
class MedianPreimageGeneratorCML(PreimageGenerator):
"""Generator median preimages by cost matrices learning using the pure Python version of GEDEnv. Works only for symbolic labeled graphs.
"""
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
@@ -37,7 +32,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
self.__fit_method = 'k-graphs'
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
@@ -47,8 +43,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__allow_zeros = Fals e
self.__triangle_rule = True
self.__allow_zeros = Tru e
# self.__triangle_rule = True
# values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
@@ -64,6 +60,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__itrs = 0
self.__converged = False
self.__num_updates_ecc = 0
self.__node_label_costs = None
self.__edge_label_costs = None
# values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
@@ -76,7 +74,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
self.__fit_method = kwargs.get('fit_method', 'k-graphs')
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
@@ -89,8 +88,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', Fals e)
self.__triangle_rule = kwargs.get('triangle_rule', True)
self.__allow_zeros = kwargs.get('allow_zeros', Tru e)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
def run(self):
@@ -122,10 +121,10 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
start = time.time()
self.__runtime_precompute_gm = 0
end_precompute_gm = start
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
@@ -197,7 +196,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __optimize_edit_cost_vector(self):
"""Learn edit cost vector.
"""
if self.__fit_method == 'random': # random
if self.__init_method == 'random': # random
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
self.__node_label_costs = np.zeros((len(nls) + 1, len(nls) + 1))
# Initialize node label cost matrix, each row/column corresponds to a label, the first label is the dummy label. These is the same setting as in GEDData.
i = 0
# Costs of insertions.
for row in range(1, len(nls) + 1):
self.__node_label_costs[row, 0] = rand_costs[i]
i += 1
# Costs of deletions.
for col in range(1, len(nls) + 1):
self.__node_label_costs[0, col] = rand_costs[i]
i += 1
# Costs of substitutions.
for row in range(1, len(nls) + 1):
for col in range(row + 1, len(nls) + 1):
self.__node_label_costs[row, col] = rand_costs[i]
self.__node_label_costs[col, row] = rand_costs[i]
i += 1
# self.__node_label_costs = {}
# for i, (nl1, nl2) in enumerate(itertools.combinations(nls, 2)):
# self.__node_label_costs[(nl1, nl2)] = rand_costs[i]
# # Add costs for deletion.
# for j, nl in enumerate(nls):
# self.__node_label_costs[(nl1, SpecialLabel.DUMMY)] = rand_costs[i + j]
# # Add costs for insertion.
# for k, nl in enumerate(nls):
# self.__node_label_costs[(SpecialLabel.DUMMY, nl1)] = rand_costs[i + j + k]
# # Add self costs.
# for nl in nls:
# self.__node_label_costs[(nl, nl)] = 0
# self.__node_label_costs[(SpecialLabel.DUMMY, SpecialLabel.DUMMY)] = 0
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
@@ -279,6 +319,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0]
@@ -881,8 +922,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_env.init(init_type=self.__ged_options['init_option'])
# Set up the madian graph estimator.
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string( self.__ged_options))
self.__mge = MedianGraphEstimatorPy (ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
@@ -897,8 +938,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options) )
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options) )
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)