Browse Source

Add ability to GEDEnv to use pre-defined costs between node labels.

v0.2.x
jajupmochi 5 years ago
parent
commit
731ab1d45b
4 changed files with 103 additions and 36 deletions
  1. +28
    -9
      gklearn/ged/env/ged_data.py
  2. +6
    -0
      gklearn/ged/env/ged_env.py
  3. +1
    -0
      gklearn/ged/util/util.py
  4. +68
    -27
      gklearn/preimage/median_preimage_generator_cml.py

+ 28
- 9
gklearn/ged/env/ged_data.py View File

@@ -23,6 +23,7 @@ class GEDData(object):
self._edit_cost = None self._edit_cost = None
self._node_costs = None self._node_costs = None
self._edge_costs = None self._edge_costs = None
self._node_label_costs = None
self._node_labels = [] self._node_labels = []
self._edge_labels = [] self._edge_labels = []
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES
@@ -84,15 +85,21 @@ class GEDData(object):
* and 0 otherwise. * and 0 otherwise.
*/ */
""" """
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
if self._node_label_costs is None:
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
# use pre-computed node label costs.
else:
id1 = 0 if label1 == SpecialLabel.DUMMY else self._node_label_to_id(label1) # @todo: this is slow.
id2 = 0 if label2 == SpecialLabel.DUMMY else self._node_label_to_id(label2)
return self._node_label_costs[id1, id2]
def edge_cost(self, label1, label2): def edge_cost(self, label1, label2):
@@ -198,6 +205,12 @@ class GEDData(object):
self._delete_edit_cost = True self._delete_edit_cost = True
def id_to_node_label(self, label_id):
if label_id > len(self._node_labels) or label_id == 0:
raise Exception('Invalid node label ID', str(label_id), '.')
return self._node_labels[label_id - 1]
def _node_label_to_id(self, node_label): def _node_label_to_id(self, node_label):
n_id = 0 n_id = 0
for n_l in self._node_labels: for n_l in self._node_labels:
@@ -208,6 +221,12 @@ class GEDData(object):
return n_id + 1 return n_id + 1




def id_to_edge_label(self, label_id):
if label_id > len(self._edge_labels) or label_id == 0:
raise Exception('Invalid edge label ID', str(label_id), '.')
return self._edge_labels[label_id - 1]


def _edge_label_to_id(self, edge_label): def _edge_label_to_id(self, edge_label):
e_id = 0 e_id = 0
for e_l in self._edge_labels: for e_l in self._edge_labels:


+ 6
- 0
gklearn/ged/env/ged_env.py View File

@@ -226,6 +226,12 @@ class GEDEnv(object):
*/ */
""" """
return self.__ged_data._init_type return self.__ged_data._init_type
def set_label_costs(self, label_costs):
"""Set the costs between labels.
"""
self.__ged_data._node_label_costs = label_costs
def set_method(self, method, options=''): def set_method(self, method, options=''):


+ 1
- 0
gklearn/ged/util/util.py View File

@@ -55,6 +55,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids() listID = ged_env.get_all_graph_ids()
ged_env.set_label_costs(options['node_label_costs'] if 'node_label_costs' in options else None)
ged_env.init(init_type=options['init_option']) ged_env.init(init_type=options['init_option'])
if parallel: if parallel:
options['threads'] = 1 options['threads'] = 1


+ 68
- 27
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -5,31 +5,26 @@ Created on Tue Jun 16 16:04:46 2020


@author: ljia @author: ljia
""" """

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 26 18:27:22 2020

@author: ljia
"""
import numpy as np import numpy as np
import time import time
import random import random
import multiprocessing import multiprocessing
import networkx as nx import networkx as nx
import cvxpy as cp import cvxpy as cp
import itertools
from gklearn.preimage import PreimageGenerator from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds_cml, ged_options_to_string
from gklearn.ged.util import compute_geds_cml
from gklearn.ged.env import GEDEnv from gklearn.ged.env import GEDEnv
from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.utils import Timer
from gklearn.ged.median import MedianGraphEstimatorPy
from gklearn.ged.median import constant_node_costs, mge_options_to_string
from gklearn.utils import Timer, SpecialLabel
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name




class MedianPreimageGeneratorCML(PreimageGenerator): class MedianPreimageGeneratorCML(PreimageGenerator):
"""Generator median preimages by cost matrices learning using the pure Python version of GEDEnv. Works only for symbolic labeled graphs.
"""
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
@@ -37,7 +32,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__mge = None self.__mge = None
self.__ged_options = {} self.__ged_options = {}
self.__mge_options = {} self.__mge_options = {}
self.__fit_method = 'k-graphs'
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None self.__init_ecc = None
self.__parallel = True self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count() self.__n_jobs = multiprocessing.cpu_count()
@@ -47,8 +43,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__max_itrs_without_update = 3 self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01 self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1 self.__epsilon_ec = 0.1
self.__allow_zeros = False
self.__triangle_rule = True
self.__allow_zeros = True
# self.__triangle_rule = True
# values to compute. # values to compute.
self.__runtime_optimize_ec = None self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None self.__runtime_generate_preimage = None
@@ -64,6 +60,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__itrs = 0 self.__itrs = 0
self.__converged = False self.__converged = False
self.__num_updates_ecc = 0 self.__num_updates_ecc = 0
self.__node_label_costs = None
self.__edge_label_costs = None
# values that can be set or to be computed. # values that can be set or to be computed.
self.__edit_cost_constants = [] self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None self.__gram_matrix_unnorm = None
@@ -76,7 +74,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {}) self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {}) self.__mge_options = kwargs.get('mge_options', {})
self.__fit_method = kwargs.get('fit_method', 'k-graphs')
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None) self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True) self.__parallel = kwargs.get('parallel', True)
@@ -89,8 +88,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', False)
self.__triangle_rule = kwargs.get('triangle_rule', True)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
def run(self): def run(self):
@@ -122,10 +121,10 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm start -= self.__runtime_precompute_gm
if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
start = time.time()
self.__runtime_precompute_gm = 0
end_precompute_gm = start
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants. # 2. optimize edit cost constants.
self.__optimize_edit_cost_vector() self.__optimize_edit_cost_vector()
@@ -197,7 +196,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __optimize_edit_cost_vector(self): def __optimize_edit_cost_vector(self):
"""Learn edit cost vector. """Learn edit cost vector.
""" """
if self.__fit_method == 'random': # random
if self.__init_method == 'random': # random
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
self.__node_label_costs = np.zeros((len(nls) + 1, len(nls) + 1))
# Initialize node label cost matrix, each row/column corresponds to a label, the first label is the dummy label. These is the same setting as in GEDData.
i = 0
# Costs of insertions.
for row in range(1, len(nls) + 1):
self.__node_label_costs[row, 0] = rand_costs[i]
i += 1
# Costs of deletions.
for col in range(1, len(nls) + 1):
self.__node_label_costs[0, col] = rand_costs[i]
i += 1
# Costs of substitutions.
for row in range(1, len(nls) + 1):
for col in range(row + 1, len(nls) + 1):
self.__node_label_costs[row, col] = rand_costs[i]
self.__node_label_costs[col, row] = rand_costs[i]
i += 1
# self.__node_label_costs = {}
# for i, (nl1, nl2) in enumerate(itertools.combinations(nls, 2)):
# self.__node_label_costs[(nl1, nl2)] = rand_costs[i]
# # Add costs for deletion.
# for j, nl in enumerate(nls):
# self.__node_label_costs[(nl1, SpecialLabel.DUMMY)] = rand_costs[i + j]
# # Add costs for insertion.
# for k, nl in enumerate(nls):
# self.__node_label_costs[(SpecialLabel.DUMMY, nl1)] = rand_costs[i + j + k]
# # Add self costs.
# for nl in nls:
# self.__node_label_costs[(nl, nl)] = 0
# self.__node_label_costs[(SpecialLabel.DUMMY, SpecialLabel.DUMMY)] = 0
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER': if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3) self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
@@ -279,6 +319,7 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
options['edge_labels'] = self._dataset.edge_labels options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0] time_list = [time.time() - time0]
@@ -881,8 +922,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_env.init(init_type=self.__ged_options['init_option']) ged_env.init(init_type=self.__ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy() options = self.__mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
@@ -897,8 +938,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_options = self.__ged_options.copy() ged_options = self.__ged_options.copy()
if self.__parallel: if self.__parallel:
ged_options['threads'] = 1 ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator. # Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id) self.__mge.run(graph_ids, set_median_id, gen_median_id)


Loading…
Cancel
Save