@@ -0,0 +1,126 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Jun 25 11:31:46 2020 | |||
@author: ljia | |||
""" | |||
def xp_check_results_of_GEDEnv(): | |||
"""Compare results of GEDEnv to GEDLIB. | |||
""" | |||
"""**1. Get dataset.**""" | |||
from gklearn.utils import Dataset | |||
# Predefined dataset name, use dataset "MUTAG". | |||
ds_name = 'MUTAG' | |||
# Initialize a Dataset. | |||
dataset = Dataset() | |||
# Load predefined dataset "MUTAG". | |||
dataset.load_predefined_dataset(ds_name) | |||
results1 = compute_geds_by_GEDEnv(dataset) | |||
results2 = compute_geds_by_GEDLIB(dataset) | |||
# Show results. | |||
import pprint | |||
pp = pprint.PrettyPrinter(indent=4) # pretty print | |||
print('Restuls using GEDEnv:') | |||
pp.pprint(results1) | |||
print() | |||
print('Restuls using GEDLIB:') | |||
pp.pprint(results2) | |||
return results1, results2 | |||
def compute_geds_by_GEDEnv(dataset): | |||
from gklearn.ged.env import GEDEnv | |||
import numpy as np | |||
graph1 = dataset.graphs[0] | |||
graph2 = dataset.graphs[1] | |||
ged_env = GEDEnv() # initailize GED environment. | |||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
) | |||
for g in dataset.graphs[0:10]: | |||
ged_env.add_nx_graph(g, '') | |||
# ged_env.add_nx_graph(graph1, '') # add graph1 | |||
# ged_env.add_nx_graph(graph2, '') # add graph2 | |||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
options = {'threads': 1 # parallel threads. | |||
} | |||
ged_env.set_method('BIPARTITE', # GED method. | |||
options # options for GED method. | |||
) | |||
ged_env.init_method() # initialize GED method. | |||
ged_mat = np.empty((10, 10)) | |||
for i in range(0, 10): | |||
for j in range(i, 10): | |||
ged_env.run_method(i, j) # run. | |||
ged_mat[i, j] = ged_env.get_upper_bound(i, j) | |||
ged_mat[j, i] = ged_mat[i, j] | |||
results = {} | |||
results['pi_forward'] = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
results['pi_backward'] = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
results['upper_bound'] = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
results['runtime'] = ged_env.get_runtime(listID[0], listID[1]) | |||
results['init_time'] = ged_env.get_init_time() | |||
results['ged_mat'] = ged_mat | |||
return results | |||
def compute_geds_by_GEDLIB(dataset): | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
from gklearn.ged.util import ged_options_to_string | |||
import numpy as np | |||
graph1 = dataset.graphs[5] | |||
graph2 = dataset.graphs[6] | |||
ged_env = gedlibpy.GEDEnv() # initailize GED environment. | |||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
edit_cost_constant=[3, 3, 1, 3, 3, 1] # edit costs. | |||
) | |||
# ged_env.add_nx_graph(graph1, '') # add graph1 | |||
# ged_env.add_nx_graph(graph2, '') # add graph2 | |||
for g in dataset.graphs[0:10]: | |||
ged_env.add_nx_graph(g, '') | |||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
ged_env.init(init_option='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
options = {'initialization-method': 'RANDOM', # or 'NODE', etc. | |||
'threads': 1 # parallel threads. | |||
} | |||
ged_env.set_method('BIPARTITE', # GED method. | |||
ged_options_to_string(options) # options for GED method. | |||
) | |||
ged_env.init_method() # initialize GED method. | |||
ged_mat = np.empty((10, 10)) | |||
for i in range(0, 10): | |||
for j in range(i, 10): | |||
ged_env.run_method(i, j) # run. | |||
ged_mat[i, j] = ged_env.get_upper_bound(i, j) | |||
ged_mat[j, i] = ged_mat[i, j] | |||
results = {} | |||
results['pi_forward'] = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
results['pi_backward'] = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
results['upper_bound'] = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
results['runtime'] = ged_env.get_runtime(listID[0], listID[1]) | |||
results['init_time'] = ged_env.get_init_time() | |||
results['ged_mat'] = ged_mat | |||
return results | |||
if __name__ == '__main__': | |||
results1, results2 = xp_check_results_of_GEDEnv() |
@@ -23,6 +23,8 @@ class GEDData(object): | |||
self._edit_cost = None | |||
self._node_costs = None | |||
self._edge_costs = None | |||
self._node_label_costs = None | |||
self._edge_label_costs = None | |||
self._node_labels = [] | |||
self._edge_labels = [] | |||
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES | |||
@@ -41,6 +43,17 @@ class GEDData(object): | |||
return len(self._graphs) | |||
def graph(self, graph_id): | |||
""" | |||
/*! | |||
* @brief Provides access to a graph. | |||
* @param[in] graph_id The ID of the graph. | |||
* @return Constant reference to the graph with ID @p graph_id. | |||
*/ | |||
""" | |||
return self._graphs[graph_id] | |||
def shuffled_graph_copies_available(self): | |||
""" | |||
/*! | |||
@@ -51,6 +64,16 @@ class GEDData(object): | |||
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) | |||
def num_graphs_without_shuffled_copies(self): | |||
""" | |||
/*! | |||
* @brief Returns the number of graphs in the instance without the shuffled copies. | |||
* @return Number of graphs without shuffled copies contained in the instance. | |||
*/ | |||
""" | |||
return self._num_graphs_without_shuffled_copies | |||
def node_cost(self, label1, label2): | |||
""" | |||
/*! | |||
@@ -63,15 +86,21 @@ class GEDData(object): | |||
* and 0 otherwise. | |||
*/ | |||
""" | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check | |||
if label2 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) | |||
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) | |||
if self._node_label_costs is None: | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check | |||
if label2 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) | |||
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) | |||
# use pre-computed node label costs. | |||
else: | |||
id1 = 0 if label1 == SpecialLabel.DUMMY else self._node_label_to_id(label1) # @todo: this is slow. | |||
id2 = 0 if label2 == SpecialLabel.DUMMY else self._node_label_to_id(label2) | |||
return self._node_label_costs[id1, id2] | |||
def edge_cost(self, label1, label2): | |||
@@ -86,15 +115,22 @@ class GEDData(object): | |||
* and 0 otherwise. | |||
*/ | |||
""" | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) | |||
if label2 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) | |||
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) | |||
if self._edge_label_costs is None: | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) | |||
if label2 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) | |||
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) | |||
# use pre-computed edge label costs. | |||
else: | |||
id1 = 0 if label1 == SpecialLabel.DUMMY else self._edge_label_to_id(label1) # @todo: this is slow. | |||
id2 = 0 if label2 == SpecialLabel.DUMMY else self._edge_label_to_id(label2) | |||
return self._edge_label_costs[id1, id2] | |||
def compute_induced_cost(self, g, h, node_map): | |||
@@ -177,5 +213,37 @@ class GEDData(object): | |||
self._delete_edit_cost = True | |||
def id_to_node_label(self, label_id): | |||
if label_id > len(self._node_labels) or label_id == 0: | |||
raise Exception('Invalid node label ID', str(label_id), '.') | |||
return self._node_labels[label_id - 1] | |||
def _node_label_to_id(self, node_label): | |||
n_id = 0 | |||
for n_l in self._node_labels: | |||
if n_l == node_label: | |||
return n_id + 1 | |||
n_id += 1 | |||
self._node_labels.append(node_label) | |||
return n_id + 1 | |||
def id_to_edge_label(self, label_id): | |||
if label_id > len(self._edge_labels) or label_id == 0: | |||
raise Exception('Invalid edge label ID', str(label_id), '.') | |||
return self._edge_labels[label_id - 1] | |||
def _edge_label_to_id(self, edge_label): | |||
e_id = 0 | |||
for e_l in self._edge_labels: | |||
if e_l == edge_label: | |||
return e_id + 1 | |||
e_id += 1 | |||
self._edge_labels.append(edge_label) | |||
return e_id + 1 | |||
def _eager_init(self): | |||
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) |
@@ -63,6 +63,23 @@ class GEDEnv(object): | |||
return graph_id | |||
def clear_graph(self, graph_id): | |||
""" | |||
/*! | |||
* @brief Clears and de-initializes a graph that has previously been added to the environment. Call init() after calling this method. | |||
* @param[in] graph_id ID of graph that has to be cleared. | |||
*/ | |||
""" | |||
if graph_id > self.__ged_data.num_graphs_without_shuffled_copies(): | |||
raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.') | |||
self.__ged_data._graphs[graph_id].clear() | |||
self.__original_to_internal_node_ids[graph_id].clear() | |||
self.__internal_to_original_node_ids[graph_id].clear() | |||
self.__ged_data._strings_to_internal_node_ids[graph_id].clear() | |||
self.__ged_data._internal_node_ids_to_strings[graph_id].clear() | |||
self.__initialized = False | |||
def add_node(self, graph_id, node_id, node_label): | |||
""" | |||
/*! | |||
@@ -80,7 +97,9 @@ class GEDEnv(object): | |||
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||
# @todo: node_label_to_id_ | |||
self.__ged_data._node_label_to_id(node_label) | |||
label_id = self.__ged_data._node_label_to_id(node_label) | |||
# @todo: ged_data_.graphs_[graph_id].set_label | |||
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): | |||
@@ -98,7 +117,8 @@ class GEDEnv(object): | |||
self.__initialized = False | |||
# @todo: check ignore_duplicates. | |||
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||
# @todo: edge_id and label_id, edge_label_to_id_. | |||
label_id = self.__ged_data._edge_label_to_id(edge_label) | |||
# @todo: ged_data_.graphs_[graph_id].set_label | |||
def add_nx_graph(self, g, classe, ignore_duplicates=True) : | |||
@@ -123,6 +143,40 @@ class GEDEnv(object): | |||
return graph_id | |||
def load_nx_graph(self, nx_graph, graph_id, graph_name='', graph_class=''): | |||
""" | |||
Loads NetworkX Graph into the GED environment. | |||
Parameters | |||
---------- | |||
nx_graph : NetworkX Graph object | |||
The graph that should be loaded. | |||
graph_id : int or None | |||
The ID of a graph contained the environment (overwrite existing graph) or add new graph if `None`. | |||
graph_name : string, optional | |||
The name of newly added graph. The default is ''. Has no effect unless `graph_id` equals `None`. | |||
graph_class : string, optional | |||
The class of newly added graph. The default is ''. Has no effect unless `graph_id` equals `None`. | |||
Returns | |||
------- | |||
int | |||
The ID of the newly loaded graph. | |||
""" | |||
if graph_id is None: # @todo: undefined. | |||
graph_id = self.add_graph(graph_name, graph_class) | |||
else: | |||
self.clear_graph(graph_id) | |||
for node in nx_graph.nodes: | |||
self.add_node(graph_id, node, tuple(sorted(nx_graph.nodes[node].items(), key=lambda kv: kv[0]))) | |||
for edge in nx_graph.edges: | |||
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(nx_graph.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0]))) | |||
return graph_id | |||
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): | |||
if isinstance(init_type, str): | |||
init_type = OptionsStringMap.InitType[init_type] | |||
@@ -154,6 +208,35 @@ class GEDEnv(object): | |||
self.__new_graph_ids.clear() | |||
def is_initialized(self): | |||
""" | |||
/*! | |||
* @brief Check if the environment is initialized. | |||
* @return True if the environment is initialized. | |||
*/ | |||
""" | |||
return self.__initialized | |||
def get_init_type(self): | |||
""" | |||
/*! | |||
* @brief Returns the initialization type of the last initialization. | |||
* @return Initialization type. | |||
*/ | |||
""" | |||
return self.__ged_data._init_type | |||
def set_label_costs(self, node_label_costs=None, edge_label_costs=None): | |||
"""Set the costs between labels. | |||
""" | |||
if node_label_costs is not None: | |||
self.__ged_data._node_label_costs = node_label_costs | |||
if edge_label_costs is not None: | |||
self.__ged_data._edge_label_costs = edge_label_costs | |||
def set_method(self, method, options=''): | |||
""" | |||
/*! | |||
@@ -263,6 +346,80 @@ class GEDEnv(object): | |||
self.__ged_method.init() | |||
def get_num_node_labels(self): | |||
""" | |||
/*! | |||
* @brief Returns the number of node labels. | |||
* @return Number of pairwise different node labels contained in the environment. | |||
* @note If @p 1 is returned, the nodes are unlabeled. | |||
*/ | |||
""" | |||
return len(self.__ged_data._node_labels) | |||
def get_all_node_labels(self): | |||
""" | |||
/*! | |||
* @brief Returns the list of all node labels. | |||
* @return List of pairwise different node labels contained in the environment. | |||
* @note If @p 1 is returned, the nodes are unlabeled. | |||
*/ | |||
""" | |||
return self.__ged_data._node_labels | |||
def get_node_label(self, label_id, to_dict=True): | |||
""" | |||
/*! | |||
* @brief Returns node label. | |||
* @param[in] label_id ID of node label that should be returned. Must be between 1 and num_node_labels(). | |||
* @return Node label for selected label ID. | |||
*/ | |||
""" | |||
if label_id < 1 or label_id > self.get_num_node_labels(): | |||
raise Exception('The environment does not contain a node label with ID', str(label_id), '.') | |||
if to_dict: | |||
return dict(self.__ged_data._node_labels[label_id - 1]) | |||
return self.__ged_data._node_labels[label_id - 1] | |||
def get_num_edge_labels(self): | |||
""" | |||
/*! | |||
* @brief Returns the number of edge labels. | |||
* @return Number of pairwise different edge labels contained in the environment. | |||
* @note If @p 1 is returned, the edges are unlabeled. | |||
*/ | |||
""" | |||
return len(self.__ged_data._edge_labels) | |||
def get_all_edge_labels(self): | |||
""" | |||
/*! | |||
* @brief Returns the list of all edge labels. | |||
* @return List of pairwise different edge labels contained in the environment. | |||
* @note If @p 1 is returned, the edges are unlabeled. | |||
*/ | |||
""" | |||
return self.__ged_data._edge_labels | |||
def get_edge_label(self, label_id, to_dict=True): | |||
""" | |||
/*! | |||
* @brief Returns edge label. | |||
* @param[in] label_id ID of edge label that should be returned. Must be between 1 and num_node_labels(). | |||
* @return Edge label for selected label ID. | |||
*/ | |||
""" | |||
if label_id < 1 or label_id > self.get_num_edge_labels(): | |||
raise Exception('The environment does not contain an edge label with ID', str(label_id), '.') | |||
if to_dict: | |||
return dict(self.__ged_data._edge_labels[label_id - 1]) | |||
return self.__ged_data._edge_labels[label_id - 1] | |||
def get_upper_bound(self, g_id, h_id): | |||
""" | |||
/*! | |||
@@ -363,6 +520,205 @@ class GEDEnv(object): | |||
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||
""" | |||
return self.get_node_map(g_id, h_id).backward_map | |||
def compute_induced_cost(self, g_id, h_id, node_map): | |||
""" | |||
/*! | |||
* @brief Computes the edit cost between two graphs induced by a node map. | |||
* @param[in] g_id ID of input graph. | |||
* @param[in] h_id ID of input graph. | |||
* @param[in,out] node_map Node map whose induced edit cost is to be computed. | |||
*/ | |||
""" | |||
self.__ged_data.compute_induced_cost(self.__ged_data._graphs[g_id], self.__ged_data._graphs[h_id], node_map) | |||
def get_nx_graph(self, graph_id): | |||
""" | |||
* @brief Returns NetworkX.Graph() representation. | |||
* @param[in] graph_id ID of the selected graph. | |||
""" | |||
graph = nx.Graph() # @todo: add graph attributes. | |||
graph.graph['id'] = graph_id | |||
nb_nodes = self.get_graph_num_nodes(graph_id) | |||
original_node_ids = self.get_original_node_ids(graph_id) | |||
node_labels = self.get_graph_node_labels(graph_id, to_dict=True) | |||
graph.graph['original_node_ids'] = original_node_ids | |||
for node_id in range(0, nb_nodes): | |||
graph.add_node(node_id, **node_labels[node_id]) | |||
edges = self.get_graph_edges(graph_id, to_dict=True) | |||
for (head, tail), labels in edges.items(): | |||
graph.add_edge(head, tail, **labels) | |||
return graph | |||
def get_graph_node_labels(self, graph_id, to_dict=True): | |||
""" | |||
Searchs and returns all the labels of nodes on a graph, selected by its ID. | |||
:param graph_id: The ID of the wanted graph | |||
:type graph_id: size_t | |||
:return: The list of nodes' labels on the selected graph | |||
:rtype: list[dict{string : string}] | |||
.. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix() | |||
.. note:: These functions allow to collect all the graph's informations. | |||
""" | |||
graph = self.__ged_data.graph(graph_id) | |||
node_labels = [] | |||
for n in graph.nodes(): | |||
node_labels.append(graph.nodes[n]['label']) | |||
if to_dict: | |||
return [dict(i) for i in node_labels] | |||
return node_labels | |||
def get_graph_edges(self, graph_id, to_dict=True): | |||
""" | |||
Searchs and returns all the edges on a graph, selected by its ID. | |||
:param graph_id: The ID of the wanted graph | |||
:type graph_id: size_t | |||
:return: The list of edges on the selected graph | |||
:rtype: dict{tuple(size_t, size_t) : dict{string : string}} | |||
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix() | |||
.. note:: These functions allow to collect all the graph's informations. | |||
""" | |||
graph = self.__ged_data.graph(graph_id) | |||
if to_dict: | |||
edges = {} | |||
for n1, n2, attr in graph.edges(data=True): | |||
edges[(n1, n2)] = dict(attr['label']) | |||
return edges | |||
return {(n1, n2): attr['label'] for n1, n2, attr in graph.edges(data=True)} | |||
def get_graph_name(self, graph_id): | |||
""" | |||
/*! | |||
* @brief Returns the graph name. | |||
* @param[in] graph_id ID of an input graph that has been added to the environment. | |||
* @return Name of the input graph. | |||
*/ | |||
""" | |||
return self.__ged_data._graph_names[graph_id] | |||
def get_graph_num_nodes(self, graph_id): | |||
""" | |||
/*! | |||
* @brief Returns the number of nodes. | |||
* @param[in] graph_id ID of an input graph that has been added to the environment. | |||
* @return Number of nodes in the graph. | |||
*/ | |||
""" | |||
return nx.number_of_nodes(self.__ged_data.graph(graph_id)) | |||
def get_original_node_ids(self, graph_id): | |||
""" | |||
Searchs and returns all th Ids of nodes on a graph, selected by its ID. | |||
:param graph_id: The ID of the wanted graph | |||
:type graph_id: size_t | |||
:return: The list of IDs's nodes on the selected graph | |||
:rtype: list[string] | |||
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix() | |||
.. note:: These functions allow to collect all the graph's informations. | |||
""" | |||
return [i for i in self.__internal_to_original_node_ids[graph_id].values()] | |||
def get_node_rel_cost(self, node_label_1, node_label_2): | |||
""" | |||
/*! | |||
* @brief Returns node relabeling cost. | |||
* @param[in] node_label_1 First node label. | |||
* @param[in] node_label_2 Second node label. | |||
* @return Node relabeling cost for the given node labels. | |||
*/ | |||
""" | |||
if isinstance(node_label_1, dict): | |||
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0])) | |||
if isinstance(node_label_2, dict): | |||
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) | |||
def get_node_del_cost(self, node_label): | |||
""" | |||
/*! | |||
* @brief Returns node deletion cost. | |||
* @param[in] node_label Node label. | |||
* @return Cost of deleting node with given label. | |||
*/ | |||
""" | |||
if isinstance(node_label, dict): | |||
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.node_del_cost_fun(node_label) | |||
def get_node_ins_cost(self, node_label): | |||
""" | |||
/*! | |||
* @brief Returns node insertion cost. | |||
* @param[in] node_label Node label. | |||
* @return Cost of inserting node with given label. | |||
*/ | |||
""" | |||
if isinstance(node_label, dict): | |||
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label) | |||
def get_edge_rel_cost(self, edge_label_1, edge_label_2): | |||
""" | |||
/*! | |||
* @brief Returns edge relabeling cost. | |||
* @param[in] edge_label_1 First edge label. | |||
* @param[in] edge_label_2 Second edge label. | |||
* @return Edge relabeling cost for the given edge labels. | |||
*/ | |||
""" | |||
if isinstance(edge_label_1, dict): | |||
edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0])) | |||
if isinstance(edge_label_2, dict): | |||
edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2) | |||
def get_edge_del_cost(self, edge_label): | |||
""" | |||
/*! | |||
* @brief Returns edge deletion cost. | |||
* @param[in] edge_label Edge label. | |||
* @return Cost of deleting edge with given label. | |||
*/ | |||
""" | |||
if isinstance(edge_label, dict): | |||
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.edge_del_cost_fun(edge_label) | |||
def get_edge_ins_cost(self, edge_label): | |||
""" | |||
/*! | |||
* @brief Returns edge insertion cost. | |||
* @param[in] edge_label Edge label. | |||
* @return Cost of inserting edge with given label. | |||
*/ | |||
""" | |||
if isinstance(edge_label, dict): | |||
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | |||
return self.__ged_data._edit_cost.edge_ins_cost_fun(edge_label) | |||
def get_all_graph_ids(self): |
@@ -1,2 +1,3 @@ | |||
from gklearn.ged.median.median_graph_estimator import MedianGraphEstimator | |||
from gklearn.ged.median.median_graph_estimator_py import MedianGraphEstimatorPy | |||
from gklearn.ged.median.utils import constant_node_costs, mge_options_to_string |
@@ -8,6 +8,7 @@ Created on Mon Jun 22 15:37:36 2020 | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
class LSAPESolver(object): | |||
@@ -61,9 +62,9 @@ class LSAPESolver(object): | |||
""" | |||
self.clear_solution() | |||
if self.__solve_optimally: | |||
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||
self.__row_to_col_assignments[0] = col_id | |||
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id | |||
row_ind, col_ind = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||
self.__row_to_col_assignments[0] = col_ind | |||
self.__col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind | |||
self.__compute_cost_from_assignments() | |||
if num_solutions > 1: | |||
pass # @todo: | |||
@@ -49,12 +49,19 @@ def compute_ged(g1, g2, options): | |||
def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
# initialize ged env. | |||
ged_env = GEDEnv() | |||
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) | |||
for g in graphs: | |||
ged_env.add_nx_graph(g, '') | |||
listID = ged_env.get_all_graph_ids() | |||
listID = ged_env.get_all_graph_ids() | |||
node_labels = ged_env.get_all_node_labels() | |||
edge_labels = ged_env.get_all_edge_labels() | |||
node_label_costs = label_costs_to_matrix(options['node_label_costs'], len(node_labels)) if 'node_label_costs' in options else None | |||
edge_label_costs = label_costs_to_matrix(options['edge_label_costs'], len(edge_labels)) if 'edge_label_costs' in options else None | |||
ged_env.set_label_costs(node_label_costs, edge_label_costs) | |||
ged_env.init(init_type=options['init_option']) | |||
if parallel: | |||
options['threads'] = 1 | |||
@@ -62,9 +69,11 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True | |||
ged_env.init_method() | |||
# compute ged. | |||
# options used to compute numbers of edit operations. | |||
neo_options = {'edit_cost': options['edit_cost'], | |||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||
'is_cml': True, | |||
'node_labels': node_labels, | |||
'edge_labels': edge_labels} | |||
ged_mat = np.zeros((len(graphs), len(graphs))) | |||
if parallel: | |||
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | |||
@@ -120,8 +129,7 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True | |||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||
n_edit_operations.append(n_eo_tmp) | |||
return ged_vec, ged_mat, n_edit_operations | |||
return ged_vec, ged_mat, n_edit_operations | |||
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
@@ -235,21 +243,164 @@ def _compute_ged(env, gid1, gid2, g1, g2): | |||
return dis, pi_forward, pi_backward | |||
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs): | |||
if edit_cost == 'LETTER' or edit_cost == 'LETTER2': | |||
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map) | |||
elif edit_cost == 'NON_SYMBOLIC': | |||
node_attrs = kwargs.get('node_attrs', []) | |||
edge_attrs = kwargs.get('edge_attrs', []) | |||
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, | |||
node_attrs=node_attrs, edge_attrs=edge_attrs) | |||
elif edit_cost == 'CONSTANT': | |||
node_labels = kwargs.get('node_labels', []) | |||
edge_labels = kwargs.get('edge_labels', []) | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||
node_labels=node_labels, edge_labels=edge_labels) | |||
else: | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) | |||
def label_costs_to_matrix(costs, nb_labels): | |||
"""Reform a label cost vector to a matrix. | |||
Parameters | |||
---------- | |||
costs : numpy.array | |||
The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. | |||
nb_labels : integer | |||
Number of labels. | |||
Returns | |||
------- | |||
cost_matrix : numpy.array. | |||
The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData. | |||
""" | |||
# Initialize label cost matrix. | |||
cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1)) | |||
i = 0 | |||
# Costs of insertions. | |||
for col in range(1, nb_labels + 1): | |||
cost_matrix[0, col] = costs[i] | |||
i += 1 | |||
# Costs of deletions. | |||
for row in range(1, nb_labels + 1): | |||
cost_matrix[row, 0] = costs[i] | |||
i += 1 | |||
# Costs of substitutions. | |||
for row in range(1, nb_labels + 1): | |||
for col in range(row + 1, nb_labels + 1): | |||
cost_matrix[row, col] = costs[i] | |||
cost_matrix[col, row] = costs[i] | |||
i += 1 | |||
return cost_matrix | |||
def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs): | |||
if is_cml: | |||
if edit_cost == 'CONSTANT': | |||
node_labels = kwargs.get('node_labels', []) | |||
edge_labels = kwargs.get('edge_labels', []) | |||
return get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map, | |||
node_labels=node_labels, edge_labels=edge_labels) | |||
else: | |||
raise Exception('Edit cost "', edit_cost, '" is not supported.') | |||
else: | |||
if edit_cost == 'LETTER' or edit_cost == 'LETTER2': | |||
return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map) | |||
elif edit_cost == 'NON_SYMBOLIC': | |||
node_attrs = kwargs.get('node_attrs', []) | |||
edge_attrs = kwargs.get('edge_attrs', []) | |||
return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, | |||
node_attrs=node_attrs, edge_attrs=edge_attrs) | |||
elif edit_cost == 'CONSTANT': | |||
node_labels = kwargs.get('node_labels', []) | |||
edge_labels = kwargs.get('edge_labels', []) | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||
node_labels=node_labels, edge_labels=edge_labels) | |||
else: | |||
return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map) | |||
def get_nb_edit_operations_symbolic_cml(g1, g2, forward_map, backward_map, | |||
node_labels=[], edge_labels=[]): | |||
"""Compute times that edit operations are used in an edit path for symbolic-labeled graphs, where the costs are different for each pair of nodes. | |||
Returns | |||
------- | |||
list | |||
A vector of numbers of times that costs bewteen labels are used in an edit path, formed in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs. The dummy label is the first label, and the self label costs are not included. | |||
""" | |||
# Initialize. | |||
nb_ops_node = np.zeros((1 + len(node_labels), 1 + len(node_labels))) | |||
nb_ops_edge = np.zeros((1 + len(edge_labels), 1 + len(edge_labels))) | |||
# For nodes. | |||
nodes1 = [n for n in g1.nodes()] | |||
for i, map_i in enumerate(forward_map): | |||
label1 = tuple(g1.nodes[nodes1[i]].items()) # @todo: order and faster | |||
idx_label1 = node_labels.index(label1) # @todo: faster | |||
if map_i == np.inf: # deletions. | |||
nb_ops_node[idx_label1 + 1, 0] += 1 | |||
else: # substitutions. | |||
label2 = tuple(g2.nodes[map_i].items()) | |||
if label1 != label2: | |||
idx_label2 = node_labels.index(label2) # @todo: faster | |||
nb_ops_node[idx_label1 + 1, idx_label2 + 1] += 1 | |||
# insertions. | |||
nodes2 = [n for n in g2.nodes()] | |||
for i, map_i in enumerate(backward_map): | |||
if map_i == np.inf: | |||
label = tuple(g2.nodes[nodes2[i]].items()) | |||
idx_label = node_labels.index(label) # @todo: faster | |||
nb_ops_node[0, idx_label + 1] += 1 | |||
# For edges. | |||
edges1 = [e for e in g1.edges()] | |||
edges2_marked = [] | |||
for nf1, nt1 in edges1: | |||
label1 = tuple(g1.edges[(nf1, nt1)].items()) | |||
idx_label1 = edge_labels.index(label1) # @todo: faster | |||
idxf1 = nodes1.index(nf1) # @todo: faster | |||
idxt1 = nodes1.index(nt1) # @todo: faster | |||
# At least one of the nodes is removed, thus the edge is removed. | |||
if forward_map[idxf1] == np.inf or forward_map[idxt1] == np.inf: | |||
nb_ops_edge[idx_label1 + 1, 0] += 1 | |||
# corresponding edge is in g2. | |||
else: | |||
nf2, nt2 = forward_map[idxf1], forward_map[idxt1] | |||
if (nf2, nt2) in g2.edges(): | |||
edges2_marked.append((nf2, nt2)) | |||
# If edge labels are different. | |||
label2 = tuple(g2.edges[(nf2, nt2)].items()) | |||
if label1 != label2: | |||
idx_label2 = edge_labels.index(label2) # @todo: faster | |||
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1 | |||
# Switch nf2 and nt2, for directed graphs. | |||
elif (nt2, nf2) in g2.edges(): | |||
edges2_marked.append((nt2, nf2)) | |||
# If edge labels are different. | |||
label2 = tuple(g2.edges[(nt2, nf2)].items()) | |||
if label1 != label2: | |||
idx_label2 = edge_labels.index(label2) # @todo: faster | |||
nb_ops_edge[idx_label1 + 1, idx_label2 + 1] += 1 | |||
# Corresponding nodes are in g2, however the edge is removed. | |||
else: | |||
nb_ops_edge[idx_label1 + 1, 0] += 1 | |||
# insertions. | |||
for nt, nf in g2.edges(): | |||
if (nt, nf) not in edges2_marked and (nf, nt) not in edges2_marked: # @todo: for directed. | |||
label = tuple(g2.edges[(nt, nf)].items()) | |||
idx_label = edge_labels.index(label) # @todo: faster | |||
nb_ops_edge[0, idx_label + 1] += 1 | |||
# Reform the numbers of edit oeprations into a vector. | |||
nb_eo_vector = [] | |||
# node insertion. | |||
for i in range(1, len(nb_ops_node)): | |||
nb_eo_vector.append(nb_ops_node[0, i]) | |||
# node deletion. | |||
for i in range(1, len(nb_ops_node)): | |||
nb_eo_vector.append(nb_ops_node[i, 0]) | |||
# node substitution. | |||
for i in range(1, len(nb_ops_node)): | |||
for j in range(i + 1, len(nb_ops_node)): | |||
nb_eo_vector.append(nb_ops_node[i, j]) | |||
# edge insertion. | |||
for i in range(1, len(nb_ops_edge)): | |||
nb_eo_vector.append(nb_ops_edge[0, i]) | |||
# edge deletion. | |||
for i in range(1, len(nb_ops_edge)): | |||
nb_eo_vector.append(nb_ops_edge[i, 0]) | |||
# edge substitution. | |||
for i in range(1, len(nb_ops_edge)): | |||
for j in range(i + 1, len(nb_ops_edge)): | |||
nb_eo_vector.append(nb_ops_edge[i, j]) | |||
return nb_eo_vector | |||
def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, | |||
@@ -11,8 +11,9 @@ __author__ = "Linlin Jia" | |||
__date__ = "March 2020" | |||
from gklearn.preimage.preimage_generator import PreimageGenerator | |||
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | |||
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | |||
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | |||
from gklearn.preimage.median_preimage_generator_py import MedianPreimageGeneratorPy | |||
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML | |||
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | |||
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class |
@@ -5,31 +5,26 @@ Created on Tue Jun 16 16:04:46 2020 | |||
@author: ljia | |||
""" | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Mar 26 18:27:22 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import time | |||
import random | |||
import multiprocessing | |||
import networkx as nx | |||
import cvxpy as cp | |||
import itertools | |||
from gklearn.preimage import PreimageGenerator | |||
from gklearn.preimage.utils import compute_k_dis | |||
from gklearn.ged.util import compute_geds_cml, ged_options_to_string | |||
from gklearn.ged.util import compute_geds_cml | |||
from gklearn.ged.env import GEDEnv | |||
from gklearn.ged.median import MedianGraphEstimator | |||
from gklearn.ged.median import constant_node_costs,mge_options_to_string | |||
from gklearn.utils import Timer | |||
from gklearn.ged.median import MedianGraphEstimatorPy | |||
from gklearn.ged.median import constant_node_costs, mge_options_to_string | |||
from gklearn.utils import Timer, SpecialLabel | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
class MedianPreimageGeneratorCML(PreimageGenerator): | |||
"""Generator median preimages by cost matrices learning using the pure Python version of GEDEnv. Works only for symbolic labeled graphs. | |||
""" | |||
def __init__(self, dataset=None): | |||
PreimageGenerator.__init__(self, dataset=dataset) | |||
@@ -37,7 +32,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__mge = None | |||
self.__ged_options = {} | |||
self.__mge_options = {} | |||
self.__fit_method = 'k-graphs' | |||
# self.__fit_method = 'k-graphs' | |||
self.__init_method = 'random' | |||
self.__init_ecc = None | |||
self.__parallel = True | |||
self.__n_jobs = multiprocessing.cpu_count() | |||
@@ -47,8 +43,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__max_itrs_without_update = 3 | |||
self.__epsilon_residual = 0.01 | |||
self.__epsilon_ec = 0.1 | |||
self.__allow_zeros = False | |||
self.__triangle_rule = True | |||
self.__allow_zeros = True | |||
# self.__triangle_rule = True | |||
# values to compute. | |||
self.__runtime_optimize_ec = None | |||
self.__runtime_generate_preimage = None | |||
@@ -64,6 +60,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__itrs = 0 | |||
self.__converged = False | |||
self.__num_updates_ecc = 0 | |||
self.__node_label_costs = None | |||
self.__edge_label_costs = None | |||
# values that can be set or to be computed. | |||
self.__edit_cost_constants = [] | |||
self.__gram_matrix_unnorm = None | |||
@@ -76,7 +74,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self._verbose = kwargs.get('verbose', 2) | |||
self.__ged_options = kwargs.get('ged_options', {}) | |||
self.__mge_options = kwargs.get('mge_options', {}) | |||
self.__fit_method = kwargs.get('fit_method', 'k-graphs') | |||
# self.__fit_method = kwargs.get('fit_method', 'k-graphs') | |||
self.__init_method = kwargs.get('init_method', 'random') | |||
self.__init_ecc = kwargs.get('init_ecc', None) | |||
self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) | |||
self.__parallel = kwargs.get('parallel', True) | |||
@@ -89,8 +88,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
self.__allow_zeros = kwargs.get('allow_zeros', False) | |||
self.__triangle_rule = kwargs.get('triangle_rule', True) | |||
self.__allow_zeros = kwargs.get('allow_zeros', True) | |||
# self.__triangle_rule = kwargs.get('triangle_rule', True) | |||
def run(self): | |||
@@ -122,10 +121,10 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
end_precompute_gm = time.time() | |||
start -= self.__runtime_precompute_gm | |||
if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': | |||
start = time.time() | |||
self.__runtime_precompute_gm = 0 | |||
end_precompute_gm = start | |||
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': | |||
# start = time.time() | |||
# self.__runtime_precompute_gm = 0 | |||
# end_precompute_gm = start | |||
# 2. optimize edit cost constants. | |||
self.__optimize_edit_cost_vector() | |||
@@ -197,7 +196,18 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
def __optimize_edit_cost_vector(self): | |||
"""Learn edit cost vector. | |||
""" | |||
if self.__fit_method == 'random': # random | |||
# Initialize label costs randomly. | |||
if self.__init_method == 'random': | |||
# Initialize label costs. | |||
self.__initialize_label_costs() | |||
# Optimize edit cost matrices. | |||
self.__optimize_ecm_by_kernel_distances() | |||
# Initialize all label costs with the same value. | |||
elif self.__init_method == 'uniform': # random | |||
pass | |||
elif self.__fit_method == 'random': # random | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
self.__edit_cost_constants = random.sample(range(1, 1000), 3) | |||
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] | |||
@@ -257,6 +267,31 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
pass | |||
def __initialize_label_costs(self): | |||
self.__initialize_node_label_costs() | |||
self.__initialize_edge_label_costs() | |||
def __initialize_node_label_costs(self): | |||
# Get list of node labels. | |||
nls = self._dataset.get_all_node_labels() | |||
# Generate random costs. | |||
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) | |||
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) | |||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | |||
self.__node_label_costs = rand_costs | |||
def __initialize_edge_label_costs(self): | |||
# Get list of edge labels. | |||
els = self._dataset.get_all_edge_labels() | |||
# Generate random costs. | |||
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) | |||
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) | |||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | |||
self.__edge_label_costs = rand_costs | |||
def __optimize_ecm_by_kernel_distances(self): | |||
# compute distances in feature space. | |||
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() | |||
@@ -279,6 +314,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
options['edge_labels'] = self._dataset.edge_labels | |||
options['node_attrs'] = self._dataset.node_attrs | |||
options['edge_attrs'] = self._dataset.edge_attrs | |||
options['node_label_costs'] = self.__node_label_costs | |||
options['edge_label_costs'] = self.__edge_label_costs | |||
ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) | |||
residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] | |||
time_list = [time.time() - time0] | |||
@@ -881,8 +918,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
ged_env.init(init_type=self.__ged_options['init_option']) | |||
# Set up the madian graph estimator. | |||
self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) | |||
self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) | |||
options = self.__mge_options.copy() | |||
if not 'seed' in options: | |||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | |||
@@ -897,8 +934,8 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
ged_options = self.__ged_options.copy() | |||
if self.__parallel: | |||
ged_options['threads'] = 1 | |||
self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options)) | |||
self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options)) | |||
self.__mge.set_init_method(ged_options['method'], ged_options) | |||
self.__mge.set_descent_method(ged_options['method'], ged_options) | |||
# Run the estimator. | |||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||
@@ -0,0 +1,122 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Jul 6 12:08:24 2020 | |||
@author: ljia | |||
""" | |||
import random | |||
import numpy as np | |||
def test_get_nb_edit_operations_symbolic_cml(): | |||
"""Test get_nb_edit_operations_symbolic_cml(). | |||
""" | |||
"""**1. Get dataset.**""" | |||
from gklearn.utils import Dataset | |||
# Predefined dataset name, use dataset "MUTAG". | |||
ds_name = 'MUTAG' | |||
# Initialize a Dataset. | |||
dataset = Dataset() | |||
# Load predefined dataset "MUTAG". | |||
dataset.load_predefined_dataset(ds_name) | |||
graph1 = dataset.graphs[0] | |||
graph2 = dataset.graphs[1] | |||
"""**2. Compute graph edit distance.**""" | |||
# try: | |||
# Initialize label costs randomly. | |||
node_label_costs, edge_label_costs = _initialize_label_costs(dataset) | |||
# Compute GEDs. | |||
pi_forward, pi_backward, dis, node_labels, edge_labels = _compute_ged(dataset, node_label_costs, edge_label_costs) | |||
# Compute numbers of edit operations. | |||
from gklearn.ged.util.util import get_nb_edit_operations_symbolic_cml | |||
n_edit_operations = get_nb_edit_operations_symbolic_cml(graph1, graph2, pi_forward, pi_backward, node_labels, edge_labels) | |||
assert np.abs((np.dot(np.concatenate((node_label_costs, edge_label_costs)), n_edit_operations) - dis) / dis) < 10e-6 | |||
# except Exception as exception: | |||
# assert False, exception | |||
def _initialize_label_costs(dataset): | |||
node_label_costs = _initialize_node_label_costs(dataset) | |||
edge_label_costs = _initialize_edge_label_costs(dataset) | |||
return node_label_costs, edge_label_costs | |||
def _initialize_node_label_costs(dataset): | |||
# Get list of node labels. | |||
nls = dataset.get_all_node_labels() | |||
# Generate random costs. | |||
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) | |||
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) | |||
rand_costs /= np.max(rand_costs) | |||
return rand_costs | |||
def _initialize_edge_label_costs(dataset): | |||
# Get list of edge labels. | |||
els = dataset.get_all_edge_labels() | |||
# Generate random costs. | |||
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) | |||
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) | |||
rand_costs /= np.max(rand_costs) | |||
return rand_costs | |||
def _compute_ged(dataset, node_label_costs, edge_label_costs): | |||
from gklearn.ged.env import GEDEnv | |||
from gklearn.ged.util.util import label_costs_to_matrix | |||
import networkx as nx | |||
ged_env = GEDEnv() # initailize GED environment. | |||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
) | |||
for g in dataset.graphs: | |||
ged_env.add_nx_graph(g, '') # add graphs | |||
node_labels = ged_env.get_all_node_labels() | |||
edge_labels = ged_env.get_all_edge_labels() | |||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
ged_env.set_label_costs(label_costs_to_matrix(node_label_costs, len(node_labels)), | |||
label_costs_to_matrix(edge_label_costs, len(edge_labels))) | |||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||
'threads': 1 # parallel threads. | |||
} | |||
ged_env.set_method('BIPARTITE', # GED method. | |||
options # options for GED method. | |||
) | |||
ged_env.init_method() # initialize GED method. | |||
ged_env.run_method(listID[0], listID[1]) # run. | |||
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
# make the map label correct (label remove map as np.inf) | |||
nodes1 = [n for n in dataset.graphs[0].nodes()] | |||
nodes2 = [n for n in dataset.graphs[1].nodes()] | |||
nb1 = nx.number_of_nodes(dataset.graphs[0]) | |||
nb2 = nx.number_of_nodes(dataset.graphs[1]) | |||
pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward] | |||
pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward] | |||
return pi_forward, pi_backward, dis, node_labels, edge_labels | |||
if __name__ == "__main__": | |||
test_get_nb_edit_operations_symbolic_cml() |
@@ -535,6 +535,26 @@ class Dataset(object): | |||
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | |||
# @todo: clean_labels and add other class members? | |||
return dataset | |||
def get_all_node_labels(self): | |||
node_labels = [] | |||
for g in self.__graphs: | |||
for n in g.nodes(): | |||
nl = tuple(g.nodes[n].items()) | |||
if nl not in node_labels: | |||
node_labels.append(nl) | |||
return node_labels | |||
def get_all_edge_labels(self): | |||
edge_labels = [] | |||
for g in self.__graphs: | |||
for e in g.edges(): | |||
el = tuple(g.edges[e].items()) | |||
if el not in edge_labels: | |||
edge_labels.append(el) | |||
return edge_labels | |||
def __get_dataset_size(self): | |||