Browse Source

Merge pull request #23 from jajupmochi/v0.2

V0.2
tags/v0.2.0
linlin GitHub 5 years ago
parent
commit
b649298c95
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 3700 additions and 71 deletions
  1. +126
    -0
      gklearn/experiments/ged/check_results_of_ged_env.py
  2. +86
    -18
      gklearn/ged/env/ged_data.py
  3. +358
    -2
      gklearn/ged/env/ged_env.py
  4. +1
    -0
      gklearn/ged/median/__init__.py
  5. +1711
    -0
      gklearn/ged/median/median_graph_estimator_py.py
  6. +4
    -3
      gklearn/ged/util/lsape_solver.py
  7. +171
    -20
      gklearn/ged/util/util.py
  8. +2
    -1
      gklearn/preimage/__init__.py
  9. +64
    -27
      gklearn/preimage/median_preimage_generator_cml.py
  10. +1035
    -0
      gklearn/preimage/median_preimage_generator_py.py
  11. +0
    -0
      gklearn/tests/ged/test_ged_env.py
  12. +122
    -0
      gklearn/tests/ged/test_get_nb_edit_operations_symbolic_cml.py
  13. +20
    -0
      gklearn/utils/dataset.py

+ 126
- 0
gklearn/experiments/ged/check_results_of_ged_env.py View File

@@ -0,0 +1,126 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 25 11:31:46 2020

@author: ljia
"""

def xp_check_results_of_GEDEnv():
"""Compare results of GEDEnv to GEDLIB.
"""
"""**1. Get dataset.**"""

from gklearn.utils import Dataset
# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'
# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)

results1 = compute_geds_by_GEDEnv(dataset)
results2 = compute_geds_by_GEDLIB(dataset)
# Show results.
import pprint
pp = pprint.PrettyPrinter(indent=4) # pretty print
print('Restuls using GEDEnv:')
pp.pprint(results1)
print()
print('Restuls using GEDLIB:')
pp.pprint(results2)
return results1, results2
def compute_geds_by_GEDEnv(dataset):
from gklearn.ged.env import GEDEnv
import numpy as np
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
for g in dataset.graphs[0:10]:
ged_env.add_nx_graph(g, '')
# ged_env.add_nx_graph(graph1, '') # add graph1
# ged_env.add_nx_graph(graph2, '') # add graph2
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.
ged_mat = np.empty((10, 10))
for i in range(0, 10):
for j in range(i, 10):
ged_env.run_method(i, j) # run.
ged_mat[i, j] = ged_env.get_upper_bound(i, j)
ged_mat[j, i] = ged_mat[i, j]
results = {}
results['pi_forward'] = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
results['pi_backward'] = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
results['upper_bound'] = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
results['runtime'] = ged_env.get_runtime(listID[0], listID[1])
results['init_time'] = ged_env.get_init_time()
results['ged_mat'] = ged_mat
return results


def compute_geds_by_GEDLIB(dataset):
from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.ged.util import ged_options_to_string
import numpy as np
graph1 = dataset.graphs[5]
graph2 = dataset.graphs[6]
ged_env = gedlibpy.GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constant=[3, 3, 1, 3, 3, 1] # edit costs.
)
# ged_env.add_nx_graph(graph1, '') # add graph1
# ged_env.add_nx_graph(graph2, '') # add graph2
for g in dataset.graphs[0:10]:
ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_option='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization-method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
ged_options_to_string(options) # options for GED method.
)
ged_env.init_method() # initialize GED method.
ged_mat = np.empty((10, 10))
for i in range(0, 10):
for j in range(i, 10):
ged_env.run_method(i, j) # run.
ged_mat[i, j] = ged_env.get_upper_bound(i, j)
ged_mat[j, i] = ged_mat[i, j]
results = {}
results['pi_forward'] = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
results['pi_backward'] = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
results['upper_bound'] = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
results['runtime'] = ged_env.get_runtime(listID[0], listID[1])
results['init_time'] = ged_env.get_init_time()
results['ged_mat'] = ged_mat
return results
if __name__ == '__main__':
results1, results2 = xp_check_results_of_GEDEnv()

+ 86
- 18
gklearn/ged/env/ged_data.py View File

@@ -23,6 +23,8 @@ class GEDData(object):
self._edit_cost = None self._edit_cost = None
self._node_costs = None self._node_costs = None
self._edge_costs = None self._edge_costs = None
self._node_label_costs = None
self._edge_label_costs = None
self._node_labels = [] self._node_labels = []
self._edge_labels = [] self._edge_labels = []
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES
@@ -41,6 +43,17 @@ class GEDData(object):
return len(self._graphs) return len(self._graphs)
def graph(self, graph_id):
"""
/*!
* @brief Provides access to a graph.
* @param[in] graph_id The ID of the graph.
* @return Constant reference to the graph with ID @p graph_id.
*/
"""
return self._graphs[graph_id]
def shuffled_graph_copies_available(self): def shuffled_graph_copies_available(self):
""" """
/*! /*!
@@ -51,6 +64,16 @@ class GEDData(object):
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES)
def num_graphs_without_shuffled_copies(self):
"""
/*!
* @brief Returns the number of graphs in the instance without the shuffled copies.
* @return Number of graphs without shuffled copies contained in the instance.
*/
"""
return self._num_graphs_without_shuffled_copies
def node_cost(self, label1, label2): def node_cost(self, label1, label2):
""" """
/*! /*!
@@ -63,15 +86,21 @@ class GEDData(object):
* and 0 otherwise. * and 0 otherwise.
*/ */
""" """
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
if self._node_label_costs is None:
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
# use pre-computed node label costs.
else:
id1 = 0 if label1 == SpecialLabel.DUMMY else self._node_label_to_id(label1) # @todo: this is slow.
id2 = 0 if label2 == SpecialLabel.DUMMY else self._node_label_to_id(label2)
return self._node_label_costs[id1, id2]
def edge_cost(self, label1, label2): def edge_cost(self, label1, label2):
@@ -86,15 +115,22 @@ class GEDData(object):
* and 0 otherwise. * and 0 otherwise.
*/ */
""" """
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY:
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1])
if label2 == SpecialLabel.DUMMY:
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1])
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1])
if self._edge_label_costs is None:
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY:
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1])
if label2 == SpecialLabel.DUMMY:
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1])
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1])
# use pre-computed edge label costs.
else:
id1 = 0 if label1 == SpecialLabel.DUMMY else self._edge_label_to_id(label1) # @todo: this is slow.
id2 = 0 if label2 == SpecialLabel.DUMMY else self._edge_label_to_id(label2)
return self._edge_label_costs[id1, id2]
def compute_induced_cost(self, g, h, node_map): def compute_induced_cost(self, g, h, node_map):
@@ -177,5 +213,37 @@ class GEDData(object):
self._delete_edit_cost = True self._delete_edit_cost = True
def id_to_node_label(self, label_id):
if label_id > len(self._node_labels) or label_id == 0:
raise Exception('Invalid node label ID', str(label_id), '.')
return self._node_labels[label_id - 1]
def _node_label_to_id(self, node_label):
n_id = 0
for n_l in self._node_labels:
if n_l == node_label:
return n_id + 1
n_id += 1
self._node_labels.append(node_label)
return n_id + 1


def id_to_edge_label(self, label_id):
if label_id > len(self._edge_labels) or label_id == 0:
raise Exception('Invalid edge label ID', str(label_id), '.')
return self._edge_labels[label_id - 1]


def _edge_label_to_id(self, edge_label):
e_id = 0
for e_l in self._edge_labels:
if e_l == edge_label:
return e_id + 1
e_id += 1
self._edge_labels.append(edge_label)
return e_id + 1
def _eager_init(self): def _eager_init(self):
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES)

+ 358
- 2
gklearn/ged/env/ged_env.py View File

@@ -63,6 +63,23 @@ class GEDEnv(object):
return graph_id return graph_id
def clear_graph(self, graph_id):
"""
/*!
* @brief Clears and de-initializes a graph that has previously been added to the environment. Call init() after calling this method.
* @param[in] graph_id ID of graph that has to be cleared.
*/
"""
if graph_id > self.__ged_data.num_graphs_without_shuffled_copies():
raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.')
self.__ged_data._graphs[graph_id].clear()
self.__original_to_internal_node_ids[graph_id].clear()
self.__internal_to_original_node_ids[graph_id].clear()
self.__ged_data._strings_to_internal_node_ids[graph_id].clear()
self.__ged_data._internal_node_ids_to_strings[graph_id].clear()
self.__initialized = False
def add_node(self, graph_id, node_id, node_label): def add_node(self, graph_id, node_id, node_label):
""" """
/*! /*!
@@ -80,7 +97,9 @@ class GEDEnv(object):
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id)
# @todo: node_label_to_id_
self.__ged_data._node_label_to_id(node_label)
label_id = self.__ged_data._node_label_to_id(node_label)
# @todo: ged_data_.graphs_[graph_id].set_label
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True):
@@ -98,7 +117,8 @@ class GEDEnv(object):
self.__initialized = False self.__initialized = False
# @todo: check ignore_duplicates. # @todo: check ignore_duplicates.
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label)
# @todo: edge_id and label_id, edge_label_to_id_.
label_id = self.__ged_data._edge_label_to_id(edge_label)
# @todo: ged_data_.graphs_[graph_id].set_label
def add_nx_graph(self, g, classe, ignore_duplicates=True) : def add_nx_graph(self, g, classe, ignore_duplicates=True) :
@@ -123,6 +143,40 @@ class GEDEnv(object):
return graph_id return graph_id
def load_nx_graph(self, nx_graph, graph_id, graph_name='', graph_class=''):
"""
Loads NetworkX Graph into the GED environment.

Parameters
----------
nx_graph : NetworkX Graph object
The graph that should be loaded.
graph_id : int or None
The ID of a graph contained the environment (overwrite existing graph) or add new graph if `None`.
graph_name : string, optional
The name of newly added graph. The default is ''. Has no effect unless `graph_id` equals `None`.
graph_class : string, optional
The class of newly added graph. The default is ''. Has no effect unless `graph_id` equals `None`.

Returns
-------
int
The ID of the newly loaded graph.
"""
if graph_id is None: # @todo: undefined.
graph_id = self.add_graph(graph_name, graph_class)
else:
self.clear_graph(graph_id)
for node in nx_graph.nodes:
self.add_node(graph_id, node, tuple(sorted(nx_graph.nodes[node].items(), key=lambda kv: kv[0])))
for edge in nx_graph.edges:
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(nx_graph.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])))
return graph_id
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False):
if isinstance(init_type, str): if isinstance(init_type, str):
init_type = OptionsStringMap.InitType[init_type] init_type = OptionsStringMap.InitType[init_type]
@@ -154,6 +208,35 @@ class GEDEnv(object):
self.__new_graph_ids.clear() self.__new_graph_ids.clear()
def is_initialized(self):
"""
/*!
* @brief Check if the environment is initialized.
* @return True if the environment is initialized.
*/
"""
return self.__initialized
def get_init_type(self):
"""
/*!
* @brief Returns the initialization type of the last initialization.
* @return Initialization type.
*/
"""
return self.__ged_data._init_type
def set_label_costs(self, node_label_costs=None, edge_label_costs=None):
"""Set the costs between labels.
"""
if node_label_costs is not None:
self.__ged_data._node_label_costs = node_label_costs
if edge_label_costs is not None:
self.__ged_data._edge_label_costs = edge_label_costs
def set_method(self, method, options=''): def set_method(self, method, options=''):
""" """
/*! /*!
@@ -263,6 +346,80 @@ class GEDEnv(object):
self.__ged_method.init() self.__ged_method.init()
def get_num_node_labels(self):
"""
/*!
* @brief Returns the number of node labels.
* @return Number of pairwise different node labels contained in the environment.
* @note If @p 1 is returned, the nodes are unlabeled.
*/
"""
return len(self.__ged_data._node_labels)
def get_all_node_labels(self):
"""
/*!
* @brief Returns the list of all node labels.
* @return List of pairwise different node labels contained in the environment.
* @note If @p 1 is returned, the nodes are unlabeled.
*/
"""
return self.__ged_data._node_labels
def get_node_label(self, label_id, to_dict=True):
"""
/*!
* @brief Returns node label.
* @param[in] label_id ID of node label that should be returned. Must be between 1 and num_node_labels().
* @return Node label for selected label ID.
*/
"""
if label_id < 1 or label_id > self.get_num_node_labels():
raise Exception('The environment does not contain a node label with ID', str(label_id), '.')
if to_dict:
return dict(self.__ged_data._node_labels[label_id - 1])
return self.__ged_data._node_labels[label_id - 1]
def get_num_edge_labels(self):
"""
/*!
* @brief Returns the number of edge labels.
* @return Number of pairwise different edge labels contained in the environment.
* @note If @p 1 is returned, the edges are unlabeled.
*/
"""
return len(self.__ged_data._edge_labels)
def get_all_edge_labels(self):
"""
/*!
* @brief Returns the list of all edge labels.
* @return List of pairwise different edge labels contained in the environment.
* @note If @p 1 is returned, the edges are unlabeled.
*/
"""
return self.__ged_data._edge_labels
def get_edge_label(self, label_id, to_dict=True):
"""
/*!
* @brief Returns edge label.
* @param[in] label_id ID of edge label that should be returned. Must be between 1 and num_node_labels().
* @return Edge label for selected label ID.
*/
"""
if label_id < 1 or label_id > self.get_num_edge_labels():
raise Exception('The environment does not contain an edge label with ID', str(label_id), '.')
if to_dict:
return dict(self.__ged_data._edge_labels[label_id - 1])
return self.__ged_data._edge_labels[label_id - 1]
def get_upper_bound(self, g_id, h_id): def get_upper_bound(self, g_id, h_id):
""" """
/*! /*!
@@ -363,6 +520,205 @@ class GEDEnv(object):
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! .. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work !
""" """
return self.get_node_map(g_id, h_id).backward_map return self.get_node_map(g_id, h_id).backward_map
def compute_induced_cost(self, g_id, h_id, node_map):
"""
/*!
* @brief Computes the edit cost between two graphs induced by a node map.
* @param[in] g_id ID of input graph.
* @param[in] h_id ID of input graph.
* @param[in,out] node_map Node map whose induced edit cost is to be computed.
*/
"""
self.__ged_data.compute_induced_cost(self.__ged_data._graphs[g_id], self.__ged_data._graphs[h_id], node_map)
def get_nx_graph(self, graph_id):
"""
* @brief Returns NetworkX.Graph() representation.
* @param[in] graph_id ID of the selected graph.
"""
graph = nx.Graph() # @todo: add graph attributes.
graph.graph['id'] = graph_id
nb_nodes = self.get_graph_num_nodes(graph_id)
original_node_ids = self.get_original_node_ids(graph_id)
node_labels = self.get_graph_node_labels(graph_id, to_dict=True)
graph.graph['original_node_ids'] = original_node_ids
for node_id in range(0, nb_nodes):
graph.add_node(node_id, **node_labels[node_id])
edges = self.get_graph_edges(graph_id, to_dict=True)
for (head, tail), labels in edges.items():
graph.add_edge(head, tail, **labels)

return graph
def get_graph_node_labels(self, graph_id, to_dict=True):
"""
Searchs and returns all the labels of nodes on a graph, selected by its ID.
:param graph_id: The ID of the wanted graph
:type graph_id: size_t
:return: The list of nodes' labels on the selected graph
:rtype: list[dict{string : string}]
.. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations.
"""
graph = self.__ged_data.graph(graph_id)
node_labels = []
for n in graph.nodes():
node_labels.append(graph.nodes[n]['label'])
if to_dict:
return [dict(i) for i in node_labels]
return node_labels
def get_graph_edges(self, graph_id, to_dict=True):
"""
Searchs and returns all the edges on a graph, selected by its ID.
:param graph_id: The ID of the wanted graph
:type graph_id: size_t
:return: The list of edges on the selected graph
:rtype: dict{tuple(size_t, size_t) : dict{string : string}}
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations.
"""
graph = self.__ged_data.graph(graph_id)
if to_dict:
edges = {}
for n1, n2, attr in graph.edges(data=True):
edges[(n1, n2)] = dict(attr['label'])
return edges
return {(n1, n2): attr['label'] for n1, n2, attr in graph.edges(data=True)}

def get_graph_name(self, graph_id):
"""
/*!
* @brief Returns the graph name.
* @param[in] graph_id ID of an input graph that has been added to the environment.
* @return Name of the input graph.
*/
"""
return self.__ged_data._graph_names[graph_id]
def get_graph_num_nodes(self, graph_id):
"""
/*!
* @brief Returns the number of nodes.
* @param[in] graph_id ID of an input graph that has been added to the environment.
* @return Number of nodes in the graph.
*/
"""
return nx.number_of_nodes(self.__ged_data.graph(graph_id))
def get_original_node_ids(self, graph_id):
"""
Searchs and returns all th Ids of nodes on a graph, selected by its ID.
:param graph_id: The ID of the wanted graph
:type graph_id: size_t
:return: The list of IDs's nodes on the selected graph
:rtype: list[string]
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations.
"""
return [i for i in self.__internal_to_original_node_ids[graph_id].values()]
def get_node_rel_cost(self, node_label_1, node_label_2):
"""
/*!
* @brief Returns node relabeling cost.
* @param[in] node_label_1 First node label.
* @param[in] node_label_2 Second node label.
* @return Node relabeling cost for the given node labels.
*/
"""
if isinstance(node_label_1, dict):
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0]))
if isinstance(node_label_2, dict):
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2)
def get_node_del_cost(self, node_label):
"""
/*!
* @brief Returns node deletion cost.
* @param[in] node_label Node label.
* @return Cost of deleting node with given label.
*/
"""
if isinstance(node_label, dict):
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_del_cost_fun(node_label)
def get_node_ins_cost(self, node_label):
"""
/*!
* @brief Returns node insertion cost.
* @param[in] node_label Node label.
* @return Cost of inserting node with given label.
*/
"""
if isinstance(node_label, dict):
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label)
def get_edge_rel_cost(self, edge_label_1, edge_label_2):
"""
/*!
* @brief Returns edge relabeling cost.
* @param[in] edge_label_1 First edge label.
* @param[in] edge_label_2 Second edge label.
* @return Edge relabeling cost for the given edge labels.
*/
"""
if isinstance(edge_label_1, dict):
edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0]))
if isinstance(edge_label_2, dict):
edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2)
def get_edge_del_cost(self, edge_label):
"""
/*!
* @brief Returns edge deletion cost.
* @param[in] edge_label Edge label.
* @return Cost of deleting edge with given label.
*/
"""
if isinstance(edge_label, dict):
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_del_cost_fun(edge_label)
def get_edge_ins_cost(self, edge_label):
"""
/*!
* @brief Returns edge insertion cost.
* @param[in] edge_label Edge label.
* @return Cost of inserting edge with given label.
*/
"""
if isinstance(edge_label, dict):
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_ins_cost_fun(edge_label)
def get_all_graph_ids(self): def get_all_graph_ids(self):

+ 1
- 0
gklearn/ged/median/__init__.py View File

@@ -1,2 +1,3 @@
from gklearn.ged.median.median_graph_estimator import MedianGraphEstimator from gklearn.ged.median.median_graph_estimator import MedianGraphEstimator
from gklearn.ged.median.median_graph_estimator_py import MedianGraphEstimatorPy
from gklearn.ged.median.utils import constant_node_costs, mge_options_to_string from gklearn.ged.median.utils import constant_node_costs, mge_options_to_string

+ 1711
- 0
gklearn/ged/median/median_graph_estimator_py.py
File diff suppressed because it is too large
View File


+ 4
- 3
gklearn/ged/util/lsape_solver.py View File

@@ -8,6 +8,7 @@ Created on Mon Jun 22 15:37:36 2020
import numpy as np import numpy as np
from scipy.optimize import linear_sum_assignment from scipy.optimize import linear_sum_assignment



class LSAPESolver(object): class LSAPESolver(object):
@@ -61,9 +62,9 @@ class LSAPESolver(object):
""" """
self.clear_solution() self.clear_solution()
if self.__solve_optimally: if self.__solve_optimally:
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self.__row_to_col_assignments[0] = col_id
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id
row_ind, col_ind = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self.__row_to_col_assignments[0] = col_ind
self.__col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind
self.__compute_cost_from_assignments() self.__compute_cost_from_assignments()
if num_solutions > 1: if num_solutions > 1:
pass # @todo: pass # @todo:


+ 171
- 20
gklearn/ged/util/util.py View File

@@ -49,12 +49,19 @@ def compute_ged(g1, g2, options):




def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):

# initialize ged env. # initialize ged env.
ged_env = GEDEnv() ged_env = GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
listID = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None
edge_label_costs = label_costs_to_matrix(options['edge_label_costs'], len(edge_labels)) if 'edge_label_costs' in options else None
ged_env.set_label_costs(node_label_costs, edge_label_costs)
ged_env.init(init_type=options['init_option']) ged_env.init(init_type=options['init_option'])
if parallel: if parallel:
options['threads'] = 1 options['threads'] = 1
@@ -62,9 +69,11 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
ged_env.init_method() ged_env.init_method()


# compute ged. # compute ged.
# options used to compute numbers of edit operations.
neo_options = {'edit_cost': options['edit_cost'], neo_options = {'edit_cost': options['edit_cost'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
'is_cml': True,
'node_labels': node_labels,
'edge_labels': edge_labels}
ged_mat = np.zeros((len(graphs), len(graphs))) ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel: if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -120,8 +129,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp) n_edit_operations.append(n_eo_tmp)


return ged_vec, ged_mat, n_edit_operations

return ged_vec, ged_mat, n_edit_operations




def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
@@ -235,21 +243,164 @@ def _compute_ged(env, gid1, gid2, g1, g2):
return dis, pi_forward, pi_backward return dis, pi_forward, pi_backward




def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
elif edit_cost == 'NON_SYMBOLIC':
node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs)
elif edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def label_costs_to_matrix(costs, nb_labels):
"""Reform a label cost vector to a matrix.

Parameters
----------
costs : numpy.array
The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs.
nb_labels : integer
Number of labels.

Returns
-------
cost_matrix : numpy.array.
The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
"""
# Initialize label cost matrix.
cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1))
i = 0
# Costs of insertions.
for col in range(1, nb_labels + 1):
cost_matrix[0, col] = costs[i]
i += 1
# Costs of deletions.
for row in range(1, nb_labels + 1):
cost_matrix[row, 0] = costs[i]
i += 1
# Costs of substitutions.
for row in range(1, nb_labels + 1):
for col in range(row + 1, nb_labels + 1):
cost_matrix[row, col] = costs[i]
cost_matrix[col, row] = costs[i]
i += 1
return cost_matrix


def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
if is_cml:
if edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
raise Exception('Edit cost "', edit_cost, '" is not supported.')
else:
if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
elif edit_cost == 'NON_SYMBOLIC':
node_attrs = kwargs.get('node_attrs', [])
edge_attrs = kwargs.get('edge_attrs', [])
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
node_attrs=node_attrs, edge_attrs=edge_attrs)
elif edit_cost == 'CONSTANT':
node_labels = kwargs.get('node_labels', [])
edge_labels = kwargs.get('edge_labels', [])
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
node_labels=node_labels, edge_labels=edge_labels)
else:
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map,
node_labels=[], edge_labels=[]):
"""Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes.
Returns
-------
list
A vector of numbers of times that costs bewteen labels are used in an edit path, formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included.
"""
# Initialize.
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels)))
nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels)))
# For nodes.
nodes1 = [n for n in g1.nodes()]
for i, map_i in enumerate(forward_map):
label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster
idx_label1 = node_labels.index(label1) # @todo: faster
if map_i == np.inf: # deletions.
nb_ops_node[idx_label1 + 1, 0] += 1
else: # substitutions.
label2 = tuple(g2.nodes[map_i].items())
if label1 != label2:
idx_label2 = node_labels.index(label2) # @todo: faster
nb_ops_node[idx_label1 + 1, idx_label2 + 1] += 1
# insertions.
nodes2 = [n for n in g2.nodes()]
for i, map_i in enumerate(backward_map):
if map_i == np.inf:
label = tuple(g2.nodes[nodes2[i]].items())
idx_label = node_labels.index(label) # @todo: faster
nb_ops_node[0, idx_label + 1] += 1
# For edges.
edges1 = [e for e in g1.edges()]
edges2_marked = []
for nf1, nt1 in edges1:
label1 = tuple(g1.edges[(nf1, nt1)].items())
idx_label1 = edge_labels.index(label1) # @todo: faster
idxf1 = nodes1.index(nf1) # @todo: faster
idxt1 = nodes1.index(nt1) # @todo: faster
# At least one of the nodes is removed, thus the edge is removed.
if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf:
nb_ops_edge[idx_label1 + 1, 0] += 1
# corresponding edge is in g2.
else:
nf2, nt2 = forward_map[idxf1], forward_map[idxt1]
if (nf2, nt2) in g2.edges():
edges2_marked.append((nf2, nt2))
# If edge labels are different.
label2 = tuple(g2.edges[(nf2, nt2)].items())
if label1 != label2:
idx_label2 = edge_labels.index(label2) # @todo: faster
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Switch nf2 and nt2, for directed graphs.
elif (nt2, nf2) in g2.edges():
edges2_marked.append((nt2, nf2))
# If edge labels are different.
label2 = tuple(g2.edges[(nt2, nf2)].items())
if label1 != label2:
idx_label2 = edge_labels.index(label2) # @todo: faster
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1
# Corresponding nodes are in g2, however the edge is removed.
else:
nb_ops_edge[idx_label1 + 1, 0] += 1
# insertions.
for nt, nf in g2.edges():
if (nt, nf) not in edges2_marked and (nf, nt) not in edges2_marked: # @todo: for directed.
label = tuple(g2.edges[(nt, nf)].items())
idx_label = edge_labels.index(label) # @todo: faster
nb_ops_edge[0, idx_label + 1] += 1
# Reform the numbers of edit oeprations into a vector.
nb_eo_vector = []
# node insertion.
for i in range(1, len(nb_ops_node)):
nb_eo_vector.append(nb_ops_node[0, i])
# node deletion.
for i in range(1, len(nb_ops_node)):
nb_eo_vector.append(nb_ops_node[i, 0])
# node substitution.
for i in range(1, len(nb_ops_node)):
for j in range(i + 1, len(nb_ops_node)):
nb_eo_vector.append(nb_ops_node[i, j])
# edge insertion.
for i in range(1, len(nb_ops_edge)):
nb_eo_vector.append(nb_ops_edge[0, i])
# edge deletion.
for i in range(1, len(nb_ops_edge)):
nb_eo_vector.append(nb_ops_edge[i, 0])
# edge substitution.
for i in range(1, len(nb_ops_edge)):
for j in range(i + 1, len(nb_ops_edge)):
nb_eo_vector.append(nb_ops_edge[i, j])
return nb_eo_vector


def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,


+ 2
- 1
gklearn/preimage/__init__.py View File

@@ -11,8 +11,9 @@ __author__ = "Linlin Jia"
__date__ = "March 2020" __date__ = "March 2020"


from gklearn.preimage.preimage_generator import PreimageGenerator from gklearn.preimage.preimage_generator import PreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.median_preimage_generator_py import MedianPreimageGeneratorPy
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv from gklearn.preimage.kernel_knn_cv import kernel_knn_cv
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class

+ 64
- 27
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -5,31 +5,26 @@ Created on Tue Jun 16 16:04:46 2020


@author: ljia @author: ljia
""" """

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 26 18:27:22 2020

@author: ljia
"""
import numpy as np import numpy as np
import time import time
import random import random
import multiprocessing import multiprocessing
import networkx as nx import networkx as nx
import cvxpy as cp import cvxpy as cp
import itertools
from gklearn.preimage import PreimageGenerator from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds_cml, ged_options_to_string
from gklearn.ged.util import compute_geds_cml
from gklearn.ged.env import GEDEnv from gklearn.ged.env import GEDEnv
from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.utils import Timer
from gklearn.ged.median import MedianGraphEstimatorPy
from gklearn.ged.median import constant_node_costs, mge_options_to_string
from gklearn.utils import Timer, SpecialLabel
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name




class MedianPreimageGeneratorCML(PreimageGenerator): class MedianPreimageGeneratorCML(PreimageGenerator):
"""Generator median preimages by cost matrices learning using the pure Python version of GEDEnv. Works only for symbolic labeled graphs.
"""
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
@@ -37,7 +32,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__mge = None self.__mge = None
self.__ged_options = {} self.__ged_options = {}
self.__mge_options = {} self.__mge_options = {}
self.__fit_method = 'k-graphs'
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None self.__init_ecc = None
self.__parallel = True self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count() self.__n_jobs = multiprocessing.cpu_count()
@@ -47,8 +43,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__max_itrs_without_update = 3 self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01 self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1 self.__epsilon_ec = 0.1
self.__allow_zeros = False
self.__triangle_rule = True
self.__allow_zeros = True
# self.__triangle_rule = True
# values to compute. # values to compute.
self.__runtime_optimize_ec = None self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None self.__runtime_generate_preimage = None
@@ -64,6 +60,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__itrs = 0 self.__itrs = 0
self.__converged = False self.__converged = False
self.__num_updates_ecc = 0 self.__num_updates_ecc = 0
self.__node_label_costs = None
self.__edge_label_costs = None
# values that can be set or to be computed. # values that can be set or to be computed.
self.__edit_cost_constants = [] self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None self.__gram_matrix_unnorm = None
@@ -76,7 +74,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {}) self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {}) self.__mge_options = kwargs.get('mge_options', {})
self.__fit_method = kwargs.get('fit_method', 'k-graphs')
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None) self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True) self.__parallel = kwargs.get('parallel', True)
@@ -89,8 +88,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', False)
self.__triangle_rule = kwargs.get('triangle_rule', True)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
def run(self): def run(self):
@@ -122,10 +121,10 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm start -= self.__runtime_precompute_gm
if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
start = time.time()
self.__runtime_precompute_gm = 0
end_precompute_gm = start
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants. # 2. optimize edit cost constants.
self.__optimize_edit_cost_vector() self.__optimize_edit_cost_vector()
@@ -197,7 +196,18 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __optimize_edit_cost_vector(self): def __optimize_edit_cost_vector(self):
"""Learn edit cost vector. """Learn edit cost vector.
""" """
if self.__fit_method == 'random': # random
# Initialize label costs randomly.
if self.__init_method == 'random':
# Initialize label costs.
self.__initialize_label_costs()
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
# Initialize all label costs with the same value.
elif self.__init_method == 'uniform': # random
pass
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER': if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3) self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
@@ -257,6 +267,31 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
pass pass
def __initialize_label_costs(self):
self.__initialize_node_label_costs()
self.__initialize_edge_label_costs()
def __initialize_node_label_costs(self):
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__node_label_costs = rand_costs


def __initialize_edge_label_costs(self):
# Get list of edge labels.
els = self._dataset.get_all_edge_labels()
# Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__edge_label_costs = rand_costs
def __optimize_ecm_by_kernel_distances(self): def __optimize_ecm_by_kernel_distances(self):
# compute distances in feature space. # compute distances in feature space.
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
@@ -279,6 +314,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
options['edge_labels'] = self._dataset.edge_labels options['edge_labels'] = self._dataset.edge_labels
options['node_attrs'] = self._dataset.node_attrs options['node_attrs'] = self._dataset.node_attrs
options['edge_attrs'] = self._dataset.edge_attrs options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1))
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]
time_list = [time.time() - time0] time_list = [time.time() - time0]
@@ -881,8 +918,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_env.init(init_type=self.__ged_options['init_option']) ged_env.init(init_type=self.__ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy() options = self.__mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
@@ -897,8 +934,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
ged_options = self.__ged_options.copy() ged_options = self.__ged_options.copy()
if self.__parallel: if self.__parallel:
ged_options['threads'] = 1 ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options))
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator. # Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id) self.__mge.run(graph_ids, set_median_id, gen_median_id)


+ 1035
- 0
gklearn/preimage/median_preimage_generator_py.py
File diff suppressed because it is too large
View File


gklearn/tests/test_ged_env.py → gklearn/tests/ged/test_ged_env.py View File


+ 122
- 0
gklearn/tests/ged/test_get_nb_edit_operations_symbolic_cml.py View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul 6 12:08:24 2020

@author: ljia
"""
import random
import numpy as np

def test_get_nb_edit_operations_symbolic_cml():
"""Test get_nb_edit_operations_symbolic_cml().
"""
"""**1. Get dataset.**"""

from gklearn.utils import Dataset
# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'
# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
"""**2. Compute graph edit distance.**"""
# try:
# Initialize label costs randomly.
node_label_costs, edge_label_costs = _initialize_label_costs(dataset)
# Compute GEDs.
pi_forward, pi_backward, dis, node_labels, edge_labels = _compute_ged(dataset, node_label_costs, edge_label_costs)
# Compute numbers of edit operations.
from gklearn.ged.util.util import get_nb_edit_operations_symbolic_cml
n_edit_operations = get_nb_edit_operations_symbolic_cml(graph1, graph2, pi_forward, pi_backward, node_labels, edge_labels)
assert np.abs((np.dot(np.concatenate((node_label_costs, edge_label_costs)), n_edit_operations) - dis) / dis) < 10e-6
# except Exception as exception:
# assert False, exception
def _initialize_label_costs(dataset):
node_label_costs = _initialize_node_label_costs(dataset)
edge_label_costs = _initialize_edge_label_costs(dataset)
return node_label_costs, edge_label_costs
def _initialize_node_label_costs(dataset):
# Get list of node labels.
nls = dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs)
return rand_costs


def _initialize_edge_label_costs(dataset):
# Get list of edge labels.
els = dataset.get_all_edge_labels()
# Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs)
return rand_costs


def _compute_ged(dataset, node_label_costs, edge_label_costs):
from gklearn.ged.env import GEDEnv
from gklearn.ged.util.util import label_costs_to_matrix
import networkx as nx
ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
for g in dataset.graphs:
ged_env.add_nx_graph(g, '') # add graphs

node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.set_label_costs(label_costs_to_matrix(node_label_costs, len(node_labels)),
label_costs_to_matrix(edge_label_costs, len(edge_labels)))
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.
ged_env.run_method(listID[0], listID[1]) # run.
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
# make the map label correct (label remove map as np.inf)
nodes1 = [n for n in dataset.graphs[0].nodes()]
nodes2 = [n for n in dataset.graphs[1].nodes()]
nb1 = nx.number_of_nodes(dataset.graphs[0])
nb2 = nx.number_of_nodes(dataset.graphs[1])
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
return pi_forward, pi_backward, dis, node_labels, edge_labels

if __name__ == "__main__":
test_get_nb_edit_operations_symbolic_cml()

+ 20
- 0
gklearn/utils/dataset.py View File

@@ -535,6 +535,26 @@ class Dataset(object):
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs)
# @todo: clean_labels and add other class members? # @todo: clean_labels and add other class members?
return dataset return dataset
def get_all_node_labels(self):
node_labels = []
for g in self.__graphs:
for n in g.nodes():
nl = tuple(g.nodes[n].items())
if nl not in node_labels:
node_labels.append(nl)
return node_labels
def get_all_edge_labels(self):
edge_labels = []
for g in self.__graphs:
for e in g.edges():
el = tuple(g.edges[e].items())
if el not in edge_labels:
edge_labels.append(el)
return edge_labels
def __get_dataset_size(self): def __get_dataset_size(self):


Loading…
Cancel
Save