|
|
@@ -0,0 +1,249 @@ |
|
|
|
#!/usr/bin/env python3 |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
""" |
|
|
|
Created on Wed Jun 17 15:05:01 2020 |
|
|
|
|
|
|
|
@author: ljia |
|
|
|
""" |
|
|
|
from gklearn.ged.env import Options, OptionsStringMap |
|
|
|
from gklearn.ged.edit_costs import Constant |
|
|
|
from gklearn.utils import SpecialLabel, dummy_node |
|
|
|
|
|
|
|
|
|
|
|
class GEDData(object): |
|
|
|
|
|
|
|
|
|
|
|
def __init__(self): |
|
|
|
self._graphs = [] |
|
|
|
self._graph_names = [] |
|
|
|
self._graph_classes = [] |
|
|
|
self._num_graphs_without_shuffled_copies = 0 |
|
|
|
self._strings_to_internal_node_ids = [] |
|
|
|
self._internal_node_ids_to_strings = [] |
|
|
|
self._edit_cost = None |
|
|
|
self._node_costs = None |
|
|
|
self._edge_costs = None |
|
|
|
self._node_label_costs = None |
|
|
|
self._edge_label_costs = None |
|
|
|
self._node_labels = [] |
|
|
|
self._edge_labels = [] |
|
|
|
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES |
|
|
|
self._delete_edit_cost = True |
|
|
|
self._max_num_nodes = 0 |
|
|
|
self._max_num_edges = 0 |
|
|
|
|
|
|
|
|
|
|
|
def num_graphs(self): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Returns the number of graphs. |
|
|
|
* @return Number of graphs in the instance. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
return len(self._graphs) |
|
|
|
|
|
|
|
|
|
|
|
def graph(self, graph_id): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Provides access to a graph. |
|
|
|
* @param[in] graph_id The ID of the graph. |
|
|
|
* @return Constant reference to the graph with ID @p graph_id. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
return self._graphs[graph_id] |
|
|
|
|
|
|
|
|
|
|
|
def shuffled_graph_copies_available(self): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Checks if shuffled graph copies are available. |
|
|
|
* @return Boolean @p true if shuffled graph copies are available. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) |
|
|
|
|
|
|
|
|
|
|
|
def num_graphs_without_shuffled_copies(self): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Returns the number of graphs in the instance without the shuffled copies. |
|
|
|
* @return Number of graphs without shuffled copies contained in the instance. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
return self._num_graphs_without_shuffled_copies |
|
|
|
|
|
|
|
|
|
|
|
def node_cost(self, label1, label2): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Returns node relabeling, insertion, or deletion cost. |
|
|
|
* @param[in] label1 First node label. |
|
|
|
* @param[in] label2 Second node label. |
|
|
|
* @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), |
|
|
|
* node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, |
|
|
|
* node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, |
|
|
|
* and 0 otherwise. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
if self._node_label_costs is None: |
|
|
|
if self._eager_init(): # @todo: check if correct |
|
|
|
return self._node_costs[label1, label2] |
|
|
|
if label1 == label2: |
|
|
|
return 0 |
|
|
|
if label1 == SpecialLabel.DUMMY: # @todo: check dummy |
|
|
|
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check |
|
|
|
if label2 == SpecialLabel.DUMMY: # @todo: check dummy |
|
|
|
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) |
|
|
|
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) |
|
|
|
# use pre-computed node label costs. |
|
|
|
else: |
|
|
|
id1 = 0 if label1 == SpecialLabel.DUMMY else self._node_label_to_id(label1) # @todo: this is slow. |
|
|
|
id2 = 0 if label2 == SpecialLabel.DUMMY else self._node_label_to_id(label2) |
|
|
|
return self._node_label_costs[id1, id2] |
|
|
|
|
|
|
|
|
|
|
|
def edge_cost(self, label1, label2): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Returns edge relabeling, insertion, or deletion cost. |
|
|
|
* @param[in] label1 First edge label. |
|
|
|
* @param[in] label2 Second edge label. |
|
|
|
* @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), |
|
|
|
* edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, |
|
|
|
* edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, |
|
|
|
* and 0 otherwise. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
if self._edge_label_costs is None: |
|
|
|
if self._eager_init(): # @todo: check if correct |
|
|
|
return self._node_costs[label1, label2] |
|
|
|
if label1 == label2: |
|
|
|
return 0 |
|
|
|
if label1 == SpecialLabel.DUMMY: |
|
|
|
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) |
|
|
|
if label2 == SpecialLabel.DUMMY: |
|
|
|
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) |
|
|
|
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) |
|
|
|
|
|
|
|
# use pre-computed edge label costs. |
|
|
|
else: |
|
|
|
id1 = 0 if label1 == SpecialLabel.DUMMY else self._edge_label_to_id(label1) # @todo: this is slow. |
|
|
|
id2 = 0 if label2 == SpecialLabel.DUMMY else self._edge_label_to_id(label2) |
|
|
|
return self._edge_label_costs[id1, id2] |
|
|
|
|
|
|
|
|
|
|
|
def compute_induced_cost(self, g, h, node_map): |
|
|
|
""" |
|
|
|
/*! |
|
|
|
* @brief Computes the edit cost between two graphs induced by a node map. |
|
|
|
* @param[in] g Input graph. |
|
|
|
* @param[in] h Input graph. |
|
|
|
* @param[in,out] node_map Node map whose induced edit cost is to be computed. |
|
|
|
*/ |
|
|
|
""" |
|
|
|
cost = 0 |
|
|
|
|
|
|
|
# collect node costs |
|
|
|
for node in g.nodes(): |
|
|
|
image = node_map.image(node) |
|
|
|
label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label']) |
|
|
|
cost += self.node_cost(g.nodes[node]['label'], label2) |
|
|
|
for node in h.nodes(): |
|
|
|
pre_image = node_map.pre_image(node) |
|
|
|
if pre_image == dummy_node(): |
|
|
|
cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label']) |
|
|
|
|
|
|
|
# collect edge costs |
|
|
|
for (n1, n2) in g.edges(): |
|
|
|
image1 = node_map.image(n1) |
|
|
|
image2 = node_map.image(n2) |
|
|
|
label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY) |
|
|
|
cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2) |
|
|
|
for (n1, n2) in h.edges(): |
|
|
|
if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)): |
|
|
|
cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label']) |
|
|
|
|
|
|
|
node_map.set_induced_cost(cost) |
|
|
|
|
|
|
|
|
|
|
|
def _set_edit_cost(self, edit_cost, edit_cost_constants): |
|
|
|
if self._delete_edit_cost: |
|
|
|
self._edit_cost = None |
|
|
|
|
|
|
|
if isinstance(edit_cost, str): |
|
|
|
edit_cost = OptionsStringMap.EditCosts[edit_cost] |
|
|
|
|
|
|
|
if edit_cost == Options.EditCosts.CHEM_1: |
|
|
|
if len(edit_cost_constants) == 4: |
|
|
|
self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3]) |
|
|
|
elif len(edit_cost_constants) == 0: |
|
|
|
self._edit_cost = CHEM1() |
|
|
|
else: |
|
|
|
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.') |
|
|
|
elif edit_cost == Options.EditCosts.LETTER: |
|
|
|
if len(edit_cost_constants) == 3: |
|
|
|
self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2]) |
|
|
|
elif len(edit_cost_constants) == 0: |
|
|
|
self._edit_cost = Letter() |
|
|
|
else: |
|
|
|
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.') |
|
|
|
elif edit_cost == Options.EditCosts.LETTER2: |
|
|
|
if len(edit_cost_constants) == 5: |
|
|
|
self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4]) |
|
|
|
elif len(edit_cost_constants) == 0: |
|
|
|
self._edit_cost = Letter2() |
|
|
|
else: |
|
|
|
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.') |
|
|
|
elif edit_cost == Options.EditCosts.NON_SYMBOLIC: |
|
|
|
if len(edit_cost_constants) == 6: |
|
|
|
self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) |
|
|
|
elif len(edit_cost_constants) == 0: |
|
|
|
self._edit_cost = NonSymbolic() |
|
|
|
else: |
|
|
|
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') |
|
|
|
elif edit_cost == Options.EditCosts.CONSTANT: |
|
|
|
if len(edit_cost_constants) == 6: |
|
|
|
self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) |
|
|
|
elif len(edit_cost_constants) == 0: |
|
|
|
self._edit_cost = Constant() |
|
|
|
else: |
|
|
|
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') |
|
|
|
|
|
|
|
self._delete_edit_cost = True |
|
|
|
|
|
|
|
|
|
|
|
def id_to_node_label(self, label_id): |
|
|
|
if label_id > len(self._node_labels) or label_id == 0: |
|
|
|
raise Exception('Invalid node label ID', str(label_id), '.') |
|
|
|
return self._node_labels[label_id - 1] |
|
|
|
|
|
|
|
|
|
|
|
def _node_label_to_id(self, node_label): |
|
|
|
n_id = 0 |
|
|
|
for n_l in self._node_labels: |
|
|
|
if n_l == node_label: |
|
|
|
return n_id + 1 |
|
|
|
n_id += 1 |
|
|
|
self._node_labels.append(node_label) |
|
|
|
return n_id + 1 |
|
|
|
|
|
|
|
|
|
|
|
def id_to_edge_label(self, label_id): |
|
|
|
if label_id > len(self._edge_labels) or label_id == 0: |
|
|
|
raise Exception('Invalid edge label ID', str(label_id), '.') |
|
|
|
return self._edge_labels[label_id - 1] |
|
|
|
|
|
|
|
|
|
|
|
def _edge_label_to_id(self, edge_label): |
|
|
|
e_id = 0 |
|
|
|
for e_l in self._edge_labels: |
|
|
|
if e_l == edge_label: |
|
|
|
return e_id + 1 |
|
|
|
e_id += 1 |
|
|
|
self._edge_labels.append(edge_label) |
|
|
|
return e_id + 1 |
|
|
|
|
|
|
|
|
|
|
|
def _eager_init(self): |
|
|
|
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) |