@@ -1,2 +1,4 @@ | |||||
[run] | [run] | ||||
omit = gklearn/tests/* | |||||
omit = | |||||
gklearn/tests/* | |||||
gklearn/examples/* |
@@ -0,0 +1,58 @@ | |||||
# -*- coding: utf-8 -*- | |||||
"""compute_graph_edit_distance.ipynb | |||||
Automatically generated by Colaboratory. | |||||
Original file is located at | |||||
https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3 | |||||
**This script demonstrates how to compute a graph edit distance.** | |||||
--- | |||||
**0. Install `graphkit-learn`.** | |||||
""" | |||||
"""**1. Get dataset.**""" | |||||
from gklearn.utils import Dataset | |||||
# Predefined dataset name, use dataset "MUTAG". | |||||
ds_name = 'MUTAG' | |||||
# Initialize a Dataset. | |||||
dataset = Dataset() | |||||
# Load predefined dataset "MUTAG". | |||||
dataset.load_predefined_dataset(ds_name) | |||||
graph1 = dataset.graphs[0] | |||||
graph2 = dataset.graphs[1] | |||||
print(graph1, graph2) | |||||
"""**2. Compute graph edit distance.**""" | |||||
from gklearn.ged.env import GEDEnv | |||||
ged_env = GEDEnv() # initailize GED environment. | |||||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||||
) | |||||
ged_env.add_nx_graph(graph1, '') # add graph1 | |||||
ged_env.add_nx_graph(graph2, '') # add graph2 | |||||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||||
options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
'threads': 1 # parallel threads. | |||||
} | |||||
ged_env.set_method('BIPARTITE', # GED method. | |||||
options # options for GED method. | |||||
) | |||||
ged_env.init_method() # initialize GED method. | |||||
ged_env.run_method(listID[0], listID[1]) # run. | |||||
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||||
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||||
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||||
print(pi_forward) | |||||
print(pi_backward) | |||||
print(dis) |
@@ -0,0 +1,2 @@ | |||||
from gklearn.ged.edit_costs.edit_cost import EditCost | |||||
from gklearn.ged.edit_costs.constant import Constant |
@@ -0,0 +1,50 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Jun 17 17:52:23 2020 | |||||
@author: ljia | |||||
""" | |||||
from gklearn.ged.edit_costs import EditCost | |||||
class Constant(EditCost): | |||||
"""Implements constant edit cost functions. | |||||
""" | |||||
def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): | |||||
self.__node_ins_cost = node_ins_cost | |||||
self.__node_del_cost = node_del_cost | |||||
self.__node_rel_cost = node_rel_cost | |||||
self.__edge_ins_cost = edge_ins_cost | |||||
self.__edge_del_cost = edge_del_cost | |||||
self.__edge_rel_cost = edge_rel_cost | |||||
def node_ins_cost_fun(self, node_label): | |||||
return self.__node_ins_cost | |||||
def node_del_cost_fun(self, node_label): | |||||
return self.__node_del_cost | |||||
def node_rel_cost_fun(self, node_label_1, node_label_2): | |||||
if node_label_1 != node_label_2: | |||||
return self.__node_rel_cost | |||||
return 0 | |||||
def edge_ins_cost_fun(self, edge_label): | |||||
return self.__edge_ins_cost | |||||
def edge_del_cost_fun(self, edge_label): | |||||
return self.__edge_del_cost | |||||
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||||
if edge_label_1 != edge_label_2: | |||||
return self.__edge_rel_cost | |||||
return 0 |
@@ -0,0 +1,88 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Jun 17 17:49:24 2020 | |||||
@author: ljia | |||||
""" | |||||
class EditCost(object): | |||||
def __init__(self): | |||||
pass | |||||
def node_ins_cost_fun(self, node_label): | |||||
""" | |||||
/*! | |||||
* @brief Node insertions cost function. | |||||
* @param[in] node_label A node label. | |||||
* @return The cost of inserting a node with label @p node_label. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 | |||||
def node_del_cost_fun(self, node_label): | |||||
""" | |||||
/*! | |||||
* @brief Node deletion cost function. | |||||
* @param[in] node_label A node label. | |||||
* @return The cost of deleting a node with label @p node_label. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 | |||||
def node_rel_cost_fun(self, node_label_1, node_label_2): | |||||
""" | |||||
/*! | |||||
* @brief Node relabeling cost function. | |||||
* @param[in] node_label_1 A node label. | |||||
* @param[in] node_label_2 A node label. | |||||
* @return The cost of changing a node's label from @p node_label_1 to @p node_label_2. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 | |||||
def edge_ins_cost_fun(self, edge_label): | |||||
""" | |||||
/*! | |||||
* @brief Edge insertion cost function. | |||||
* @param[in] edge_label An edge label. | |||||
* @return The cost of inserting an edge with label @p edge_label. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 | |||||
def edge_del_cost_fun(self, edge_label): | |||||
""" | |||||
/*! | |||||
* @brief Edge deletion cost function. | |||||
* @param[in] edge_label An edge label. | |||||
* @return The cost of deleting an edge with label @p edge_label. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 | |||||
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||||
""" | |||||
/*! | |||||
* @brief Edge relabeling cost function. | |||||
* @param[in] edge_label_1 An edge label. | |||||
* @param[in] edge_label_2 An edge label. | |||||
* @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2. | |||||
* @note Must be implemented by derived classes of ged::EditCosts. | |||||
*/ | |||||
""" | |||||
return 0 |
@@ -1,2 +1,4 @@ | |||||
from gklearn.ged.env.common_types import AlgorithmState | |||||
from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState | |||||
from gklearn.ged.env.ged_data import GEDData | |||||
from gklearn.ged.env.ged_env import GEDEnv | |||||
from gklearn.ged.env.node_map import NodeMap | from gklearn.ged.env.node_map import NodeMap |
@@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020 | |||||
from enum import Enum, unique | from enum import Enum, unique | ||||
class Options(object): | |||||
"""Contains enums for options employed by ged::GEDEnv. | |||||
""" | |||||
@unique | |||||
class GEDMethod(Enum): | |||||
"""Selects the method. | |||||
""" | |||||
# @todo: what is this? #ifdef GUROBI | |||||
F1 = 1 # Selects ged::F1. | |||||
F2 = 2 # Selects ged::F2. | |||||
COMPACT_MIP = 3 # Selects ged::CompactMIP. | |||||
BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels. | |||||
#endif /* GUROBI */ | |||||
BRANCH = 5 # Selects ged::Branch. | |||||
BRANCH_FAST = 6 # Selects ged::BranchFast. | |||||
BRANCH_TIGHT = 7 # Selects ged::BranchTight. | |||||
BRANCH_UNIFORM = 8 # Selects ged::BranchUniform. | |||||
BRANCH_COMPACT = 9 # Selects ged::BranchCompact. | |||||
PARTITION = 10 # Selects ged::Partition. | |||||
HYBRID = 11 # Selects ged::Hybrid. | |||||
RING = 12 # Selects ged::Ring. | |||||
ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED. | |||||
WALKS = 14 # Selects ged::Walks. | |||||
IPFP = 15 # Selects ged::IPFP | |||||
BIPARTITE = 16 # Selects ged::Bipartite. | |||||
SUBGRAPH = 17 # Selects ged::Subgraph. | |||||
NODE = 18 # Selects ged::Node. | |||||
RING_ML = 19 # Selects ged::RingML. | |||||
BIPARTITE_ML = 20 # Selects ged::BipartiteML. | |||||
REFINE = 21 # Selects ged::Refine. | |||||
BP_BEAM = 22 # Selects ged::BPBeam. | |||||
SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing. | |||||
HED = 24 # Selects ged::HED. | |||||
STAR = 25 # Selects ged::Star. | |||||
@unique | |||||
class EditCosts(Enum): | |||||
"""Selects the edit costs. | |||||
""" | |||||
CHEM_1 = 1 # Selects ged::CHEM1. | |||||
CHEM_2 = 2 # Selects ged::CHEM2. | |||||
CMU = 3 # Selects ged::CMU. | |||||
GREC_1 = 4 # Selects ged::GREC1. | |||||
GREC_2 = 5 # Selects ged::GREC2. | |||||
PROTEIN = 6 # Selects ged::Protein. | |||||
FINGERPRINT = 7 # Selects ged::Fingerprint. | |||||
LETTER = 8 # Selects ged::Letter. | |||||
LETTER2 = 9 # Selects ged:Letter2. | |||||
NON_SYMBOLIC = 10 # Selects ged:NonSymbolic. | |||||
CONSTANT = 11 # Selects ged::Constant. | |||||
@unique | |||||
class InitType(Enum): | |||||
"""@brief Selects the initialization type of the environment. | |||||
* @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment. | |||||
* Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of | |||||
* all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs. | |||||
* In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence | |||||
* compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies | |||||
* are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself. | |||||
""" | |||||
LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed. | |||||
EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed. | |||||
LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed. | |||||
EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed. | |||||
@unique | |||||
class AlgorithmState(Enum): | |||||
"""can be used to specify the state of an algorithm. | |||||
""" | |||||
CALLED = 1 # The algorithm has been called. | |||||
INITIALIZED = 2 # The algorithm has been initialized. | |||||
CONVERGED = 3 # The algorithm has converged. | |||||
TERMINATED = 4 # The algorithm has terminated. | |||||
class OptionsStringMap(object): | |||||
# Map of available computation methods between enum type and string. | |||||
GEDMethod = { | |||||
"BRANCH": Options.GEDMethod.BRANCH, | |||||
"BRANCH_FAST": Options.GEDMethod.BRANCH_FAST, | |||||
"BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT, | |||||
"BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM, | |||||
"BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT, | |||||
"PARTITION": Options.GEDMethod.PARTITION, | |||||
"HYBRID": Options.GEDMethod.HYBRID, | |||||
"RING": Options.GEDMethod.RING, | |||||
"ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED, | |||||
"WALKS": Options.GEDMethod.WALKS, | |||||
"IPFP": Options.GEDMethod.IPFP, | |||||
"BIPARTITE": Options.GEDMethod.BIPARTITE, | |||||
"SUBGRAPH": Options.GEDMethod.SUBGRAPH, | |||||
"NODE": Options.GEDMethod.NODE, | |||||
"RING_ML": Options.GEDMethod.RING_ML, | |||||
"BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML, | |||||
"REFINE": Options.GEDMethod.REFINE, | |||||
"BP_BEAM": Options.GEDMethod.BP_BEAM, | |||||
"SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING, | |||||
"HED": Options.GEDMethod.HED, | |||||
"STAR": Options.GEDMethod.STAR, | |||||
# ifdef GUROBI | |||||
"F1": Options.GEDMethod.F1, | |||||
"F2": Options.GEDMethod.F2, | |||||
"COMPACT_MIP": Options.GEDMethod.COMPACT_MIP, | |||||
"BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS | |||||
} | |||||
# Map of available edit cost functions between enum type and string. | |||||
EditCosts = { | |||||
"CHEM_1": Options.EditCosts.CHEM_1, | |||||
"CHEM_2": Options.EditCosts.CHEM_2, | |||||
"CMU": Options.EditCosts.CMU, | |||||
"GREC_1": Options.EditCosts.GREC_1, | |||||
"GREC_2": Options.EditCosts.GREC_2, | |||||
"LETTER": Options.EditCosts.LETTER, | |||||
"LETTER2": Options.EditCosts.LETTER2, | |||||
"NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC, | |||||
"FINGERPRINT": Options.EditCosts.FINGERPRINT, | |||||
"PROTEIN": Options.EditCosts.PROTEIN, | |||||
"CONSTANT": Options.EditCosts.CONSTANT | |||||
} | |||||
# Map of available initialization types of the environment between enum type and string. | |||||
InitType = { | |||||
"LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES, | |||||
"EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, | |||||
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES, | |||||
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES | |||||
} | |||||
@unique | @unique | ||||
class AlgorithmState(Enum): | class AlgorithmState(Enum): | ||||
"""can be used to specify the state of an algorithm. | |||||
""" | |||||
CALLED = 1 # The algorithm has been called. | |||||
INITIALIZED = 2 # The algorithm has been initialized. | |||||
CONVERGED = 3 # The algorithm has converged. | |||||
TERMINATED = 4 # The algorithm has terminated. | |||||
"""can be used to specify the state of an algorithm. | |||||
""" | |||||
CALLED = 1 # The algorithm has been called. | |||||
INITIALIZED = 2 # The algorithm has been initialized. | |||||
CONVERGED = 3 # The algorithm has converged. | |||||
TERMINATED = 4 # The algorithm has terminated. | |||||
@@ -0,0 +1,181 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Jun 17 15:05:01 2020 | |||||
@author: ljia | |||||
""" | |||||
from gklearn.ged.env import Options, OptionsStringMap | |||||
from gklearn.ged.edit_costs import Constant | |||||
from gklearn.utils import SpecialLabel, dummy_node | |||||
class GEDData(object): | |||||
def __init__(self): | |||||
self._graphs = [] | |||||
self._graph_names = [] | |||||
self._graph_classes = [] | |||||
self._num_graphs_without_shuffled_copies = 0 | |||||
self._strings_to_internal_node_ids = [] | |||||
self._internal_node_ids_to_strings = [] | |||||
self._edit_cost = None | |||||
self._node_costs = None | |||||
self._edge_costs = None | |||||
self._node_labels = [] | |||||
self._edge_labels = [] | |||||
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES | |||||
self._delete_edit_cost = True | |||||
self._max_num_nodes = 0 | |||||
self._max_num_edges = 0 | |||||
def num_graphs(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns the number of graphs. | |||||
* @return Number of graphs in the instance. | |||||
*/ | |||||
""" | |||||
return len(self._graphs) | |||||
def shuffled_graph_copies_available(self): | |||||
""" | |||||
/*! | |||||
* @brief Checks if shuffled graph copies are available. | |||||
* @return Boolean @p true if shuffled graph copies are available. | |||||
*/ | |||||
""" | |||||
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) | |||||
def node_cost(self, label1, label2): | |||||
""" | |||||
/*! | |||||
* @brief Returns node relabeling, insertion, or deletion cost. | |||||
* @param[in] label1 First node label. | |||||
* @param[in] label2 Second node label. | |||||
* @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||||
* node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||||
* node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||||
* and 0 otherwise. | |||||
*/ | |||||
""" | |||||
if self._eager_init(): # @todo: check if correct | |||||
return self._node_costs[label1, label2] | |||||
if label1 == label2: | |||||
return 0 | |||||
if label1 == SpecialLabel.DUMMY: # @todo: check dummy | |||||
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check | |||||
if label2 == SpecialLabel.DUMMY: # @todo: check dummy | |||||
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) | |||||
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) | |||||
def edge_cost(self, label1, label2): | |||||
""" | |||||
/*! | |||||
* @brief Returns edge relabeling, insertion, or deletion cost. | |||||
* @param[in] label1 First edge label. | |||||
* @param[in] label2 Second edge label. | |||||
* @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||||
* edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||||
* edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||||
* and 0 otherwise. | |||||
*/ | |||||
""" | |||||
if self._eager_init(): # @todo: check if correct | |||||
return self._node_costs[label1, label2] | |||||
if label1 == label2: | |||||
return 0 | |||||
if label1 == SpecialLabel.DUMMY: | |||||
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) | |||||
if label2 == SpecialLabel.DUMMY: | |||||
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) | |||||
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) | |||||
def compute_induced_cost(self, g, h, node_map): | |||||
""" | |||||
/*! | |||||
* @brief Computes the edit cost between two graphs induced by a node map. | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[in,out] node_map Node map whose induced edit cost is to be computed. | |||||
*/ | |||||
""" | |||||
cost = 0 | |||||
# collect node costs | |||||
for node in g.nodes(): | |||||
image = node_map.image(node) | |||||
label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label']) | |||||
cost += self.node_cost(g.nodes[node]['label'], label2) | |||||
for node in h.nodes(): | |||||
pre_image = node_map.pre_image(node) | |||||
if pre_image == dummy_node(): | |||||
cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label']) | |||||
# collect edge costs | |||||
for (n1, n2) in g.edges(): | |||||
image1 = node_map.image(n1) | |||||
image2 = node_map.image(n2) | |||||
label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY) | |||||
cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2) | |||||
for (n1, n2) in h.edges(): | |||||
if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)): | |||||
cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label']) | |||||
node_map.set_induced_cost(cost) | |||||
def _set_edit_cost(self, edit_cost, edit_cost_constants): | |||||
if self._delete_edit_cost: | |||||
self._edit_cost = None | |||||
if isinstance(edit_cost, str): | |||||
edit_cost = OptionsStringMap.EditCosts[edit_cost] | |||||
if edit_cost == Options.EditCosts.CHEM_1: | |||||
if len(edit_cost_constants) == 4: | |||||
self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3]) | |||||
elif len(edit_cost_constants) == 0: | |||||
self._edit_cost = CHEM1() | |||||
else: | |||||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.') | |||||
elif edit_cost == Options.EditCosts.LETTER: | |||||
if len(edit_cost_constants) == 3: | |||||
self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2]) | |||||
elif len(edit_cost_constants) == 0: | |||||
self._edit_cost = Letter() | |||||
else: | |||||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.') | |||||
elif edit_cost == Options.EditCosts.LETTER2: | |||||
if len(edit_cost_constants) == 5: | |||||
self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4]) | |||||
elif len(edit_cost_constants) == 0: | |||||
self._edit_cost = Letter2() | |||||
else: | |||||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.') | |||||
elif edit_cost == Options.EditCosts.NON_SYMBOLIC: | |||||
if len(edit_cost_constants) == 6: | |||||
self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||||
elif len(edit_cost_constants) == 0: | |||||
self._edit_cost = NonSymbolic() | |||||
else: | |||||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||||
elif edit_cost == Options.EditCosts.CONSTANT: | |||||
if len(edit_cost_constants) == 6: | |||||
self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||||
elif len(edit_cost_constants) == 0: | |||||
self._edit_cost = Constant() | |||||
else: | |||||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||||
self._delete_edit_cost = True | |||||
def _eager_init(self): | |||||
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) |
@@ -0,0 +1,369 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Wed Jun 17 12:02:36 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
from gklearn.ged.env import Options, OptionsStringMap | |||||
from gklearn.ged.env import GEDData | |||||
class GEDEnv(object): | |||||
def __init__(self): | |||||
self.__initialized = False | |||||
self.__new_graph_ids = [] | |||||
self.__ged_data = GEDData() | |||||
# Variables needed for approximating ged_instance_. | |||||
self.__lower_bounds = {} | |||||
self.__upper_bounds = {} | |||||
self.__runtimes = {} | |||||
self.__node_maps = {} | |||||
self.__original_to_internal_node_ids = [] | |||||
self.__internal_to_original_node_ids = [] | |||||
self.__ged_method = None | |||||
def set_edit_cost(self, edit_cost, edit_cost_constants=[]): | |||||
""" | |||||
/*! | |||||
* @brief Sets the edit costs to one of the predefined edit costs. | |||||
* @param[in] edit_costs Select one of the predefined edit costs. | |||||
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. | |||||
*/ | |||||
""" | |||||
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants) | |||||
def add_graph(self, graph_name='', graph_class=''): | |||||
""" | |||||
/*! | |||||
* @brief Adds a new uninitialized graph to the environment. Call init() after calling this method. | |||||
* @param[in] graph_name The name of the added graph. Empty if not specified. | |||||
* @param[in] graph_class The class of the added graph. Empty if not specified. | |||||
* @return The ID of the newly added graph. | |||||
*/ | |||||
""" | |||||
# @todo: graphs are not uninitialized. | |||||
self.__initialized = False | |||||
graph_id = self.__ged_data._num_graphs_without_shuffled_copies | |||||
self.__ged_data._num_graphs_without_shuffled_copies += 1 | |||||
self.__new_graph_ids.append(graph_id) | |||||
self.__ged_data._graphs.append(nx.Graph()) | |||||
self.__ged_data._graph_names.append(graph_name) | |||||
self.__ged_data._graph_classes.append(graph_class) | |||||
self.__original_to_internal_node_ids.append({}) | |||||
self.__internal_to_original_node_ids.append({}) | |||||
self.__ged_data._strings_to_internal_node_ids.append({}) | |||||
self.__ged_data._internal_node_ids_to_strings.append({}) | |||||
return graph_id | |||||
def add_node(self, graph_id, node_id, node_label): | |||||
""" | |||||
/*! | |||||
* @brief Adds a labeled node. | |||||
* @param[in] graph_id ID of graph that has been added to the environment. | |||||
* @param[in] node_id The user-specific ID of the vertex that has to be added. | |||||
* @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel. | |||||
*/ | |||||
""" | |||||
# @todo: check ids. | |||||
self.__initialized = False | |||||
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id]) | |||||
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) | |||||
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id | |||||
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||||
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||||
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||||
# @todo: node_label_to_id_ | |||||
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): | |||||
""" | |||||
/*! | |||||
* @brief Adds a labeled edge. | |||||
* @param[in] graph_id ID of graph that has been added to the environment. | |||||
* @param[in] tail The user-specific ID of the tail of the edge that has to be added. | |||||
* @param[in] head The user-specific ID of the head of the edge that has to be added. | |||||
* @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel. | |||||
* @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph. | |||||
*/ | |||||
""" | |||||
# @todo: check everything. | |||||
self.__initialized = False | |||||
# @todo: check ignore_duplicates. | |||||
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||||
# @todo: edge_id and label_id, edge_label_to_id_. | |||||
def add_nx_graph(self, g, classe, ignore_duplicates=True) : | |||||
""" | |||||
Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges. | |||||
:param g: The graph to add (networkx graph) | |||||
:param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default | |||||
:type g: networkx.graph | |||||
:type ignore_duplicates: bool | |||||
:return: The ID of the newly added graphe | |||||
:rtype: size_t | |||||
.. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct. | |||||
""" | |||||
graph_id = self.add_graph(g.name, classe) # check if the graph name already exists. | |||||
for node in g.nodes: # @todo: if the keys of labels include int and str at the same time. | |||||
self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0]))) | |||||
for edge in g.edges: | |||||
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates) | |||||
return graph_id | |||||
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): | |||||
if isinstance(init_type, str): | |||||
init_type = OptionsStringMap.InitType[init_type] | |||||
# Throw an exception if no edit costs have been selected. | |||||
if self.__ged_data._edit_cost is None: | |||||
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') | |||||
# Return if the environment is initialized. | |||||
if self.__initialized: | |||||
return | |||||
# Set initialization type. | |||||
self.__ged_data._init_type = init_type | |||||
# @todo: Construct shuffled graph copies if necessary. | |||||
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). | |||||
# @todo: setup_adjacency_matrix, don't know if neccessary. | |||||
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs]) | |||||
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs]) | |||||
# Initialize cost matrices if necessary. | |||||
if self.__ged_data._eager_init(): | |||||
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. | |||||
# Mark environment as initialized. | |||||
self.__initialized = True | |||||
self.__new_graph_ids.clear() | |||||
def set_method(self, method, options=''): | |||||
""" | |||||
/*! | |||||
* @brief Sets the GEDMethod to be used by run_method(). | |||||
* @param[in] method Select the method that is to be used. | |||||
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. | |||||
*/ | |||||
""" | |||||
del self.__ged_method | |||||
if isinstance(method, str): | |||||
method = OptionsStringMap.GEDMethod[method] | |||||
if method == Options.GEDMethod.BRANCH: | |||||
self.__ged_method = Branch(self.__ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_FAST: | |||||
self.__ged_method = BranchFast(self.__ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_FAST: | |||||
self.__ged_method = BranchFast(self.__ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_TIGHT: | |||||
self.__ged_method = BranchTight(self.__ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_UNIFORM: | |||||
self.__ged_method = BranchUniform(self.__ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_COMPACT: | |||||
self.__ged_method = BranchCompact(self.__ged_data) | |||||
elif method == Options.GEDMethod.PARTITION: | |||||
self.__ged_method = Partition(self.__ged_data) | |||||
elif method == Options.GEDMethod.HYBRID: | |||||
self.__ged_method = Hybrid(self.__ged_data) | |||||
elif method == Options.GEDMethod.RING: | |||||
self.__ged_method = Ring(self.__ged_data) | |||||
elif method == Options.GEDMethod.ANCHOR_AWARE_GED: | |||||
self.__ged_method = AnchorAwareGED(self.__ged_data) | |||||
elif method == Options.GEDMethod.WALKS: | |||||
self.__ged_method = Walks(self.__ged_data) | |||||
elif method == Options.GEDMethod.IPFP: | |||||
self.__ged_method = IPFP(self.__ged_data) | |||||
elif method == Options.GEDMethod.BIPARTITE: | |||||
from gklearn.ged.methods import Bipartite | |||||
self.__ged_method = Bipartite(self.__ged_data) | |||||
elif method == Options.GEDMethod.SUBGRAPH: | |||||
self.__ged_method = Subgraph(self.__ged_data) | |||||
elif method == Options.GEDMethod.NODE: | |||||
self.__ged_method = Node(self.__ged_data) | |||||
elif method == Options.GEDMethod.RING_ML: | |||||
self.__ged_method = RingML(self.__ged_data) | |||||
elif method == Options.GEDMethod.BIPARTITE_ML: | |||||
self.__ged_method = BipartiteML(self.__ged_data) | |||||
elif method == Options.GEDMethod.REFINE: | |||||
self.__ged_method = Refine(self.__ged_data) | |||||
elif method == Options.GEDMethod.BP_BEAM: | |||||
self.__ged_method = BPBeam(self.__ged_data) | |||||
elif method == Options.GEDMethod.SIMULATED_ANNEALING: | |||||
self.__ged_method = SimulatedAnnealing(self.__ged_data) | |||||
elif method == Options.GEDMethod.HED: | |||||
self.__ged_method = HED(self.__ged_data) | |||||
elif method == Options.GEDMethod.STAR: | |||||
self.__ged_method = STAR(self.__ged_data) | |||||
# #ifdef GUROBI | |||||
elif method == Options.GEDMethod.F1: | |||||
self.__ged_method = F1(self.__ged_data) | |||||
elif method == Options.GEDMethod.F2: | |||||
self.__ged_method = F2(self.__ged_data) | |||||
elif method == Options.GEDMethod.COMPACT_MIP: | |||||
self.__ged_method = CompactMIP(self.__ged_data) | |||||
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: | |||||
self.__ged_method = BLPNoEdgeLabels(self.__ged_data) | |||||
self.__ged_method.set_options(options) | |||||
def run_method(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id. | |||||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||||
*/ | |||||
""" | |||||
if g_id >= self.__ged_data.num_graphs(): | |||||
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') | |||||
if h_id >= self.__ged_data.num_graphs(): | |||||
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') | |||||
if not self.__initialized: | |||||
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') | |||||
if self.__ged_method is None: | |||||
raise Exception('No method has been set. Call set_method() before calling run().') | |||||
# Call selected GEDMethod and store results. | |||||
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id): | |||||
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? | |||||
else: | |||||
self.__ged_method.run(g_id, h_id) | |||||
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound() | |||||
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound() | |||||
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime() | |||||
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map() | |||||
def init_method(self): | |||||
"""Initializes the method specified by call to set_method(). | |||||
""" | |||||
if not self.__initialized: | |||||
raise Exception('The environment is uninitialized. Call init() before calling init_method().') | |||||
if self.__ged_method is None: | |||||
raise Exception('No method has been set. Call set_method() before calling init_method().') | |||||
self.__ged_method.init() | |||||
def get_upper_bound(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Returns upper bound for edit distance between the input graphs. | |||||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||||
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||||
*/ | |||||
""" | |||||
if (g_id, h_id) not in self.__upper_bounds: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||||
return self.__upper_bounds[(g_id, h_id)] | |||||
def get_lower_bound(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Returns lower bound for edit distance between the input graphs. | |||||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||||
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||||
*/ | |||||
""" | |||||
if (g_id, h_id) not in self.__lower_bounds: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||||
return self.__lower_bounds[(g_id, h_id)] | |||||
def get_runtime(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Returns runtime. | |||||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||||
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. | |||||
*/ | |||||
""" | |||||
if (g_id, h_id) not in self.__runtimes: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') | |||||
return self.__runtimes[(g_id, h_id)] | |||||
def get_init_time(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns initialization time. | |||||
* @return Runtime of the last call to init_method(). | |||||
*/ | |||||
""" | |||||
return self.__ged_method.get_init_time() | |||||
def get_node_map(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Returns node map between the input graphs. | |||||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||||
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||||
*/ | |||||
""" | |||||
if (g_id, h_id) not in self.__node_maps: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') | |||||
return self.__node_maps[(g_id, h_id)] | |||||
def get_forward_map(self, g_id, h_id) : | |||||
""" | |||||
Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||||
:param g: The Id of the first compared graph | |||||
:param h: The Id of the second compared graph | |||||
:type g: size_t | |||||
:type h: size_t | |||||
:return: The forward map to the adjacence matrix between nodes of the two graphs | |||||
:rtype: list[npy_uint32] | |||||
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||||
.. warning:: run_method() between the same two graph must be called before this function. | |||||
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||||
""" | |||||
return self.get_node_map(g_id, h_id).forward_map | |||||
def get_backward_map(self, g_id, h_id) : | |||||
""" | |||||
Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||||
:param g: The Id of the first compared graph | |||||
:param h: The Id of the second compared graph | |||||
:type g: size_t | |||||
:type h: size_t | |||||
:return: The backward map to the adjacence matrix between nodes of the two graphs | |||||
:rtype: list[npy_uint32] | |||||
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||||
.. warning:: run_method() between the same two graph must be called before this function. | |||||
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||||
""" | |||||
return self.get_node_map(g_id, h_id).backward_map | |||||
def get_all_graph_ids(self): | |||||
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)] |
@@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020 | |||||
@author: ljia | @author: ljia | ||||
""" | """ | ||||
import numpy as np | import numpy as np | ||||
from gklearn.utils import dummy_node, undefined_node | |||||
class NodeMap(object): | class NodeMap(object): | ||||
def __init__(self, num_nodes_g, num_nodes_h): | def __init__(self, num_nodes_g, num_nodes_h): | ||||
self.__forward_map = [np.inf] * num_nodes_g | |||||
self.__backward_map = [np.inf] * num_nodes_h | |||||
self.__forward_map = [undefined_node()] * num_nodes_g | |||||
self.__backward_map = [undefined_node()] * num_nodes_h | |||||
self.__induced_cost = np.inf | self.__induced_cost = np.inf | ||||
def clear(self): | |||||
""" | |||||
/*! | |||||
* @brief Clears the node map. | |||||
*/ | |||||
""" | |||||
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))] | |||||
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))] | |||||
def num_source_nodes(self): | def num_source_nodes(self): | ||||
return len(self.__forward_map) | return len(self.__forward_map) | ||||
@@ -28,7 +40,7 @@ class NodeMap(object): | |||||
return self.__forward_map[node] | return self.__forward_map[node] | ||||
else: | else: | ||||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | ||||
return np.inf | |||||
return undefined_node() | |||||
def pre_image(self, node): | def pre_image(self, node): | ||||
@@ -36,28 +48,28 @@ class NodeMap(object): | |||||
return self.__backward_map[node] | return self.__backward_map[node] | ||||
else: | else: | ||||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | ||||
return np.inf | |||||
return undefined_node() | |||||
def as_relation(self, relation): | def as_relation(self, relation): | ||||
relation.clear() | relation.clear() | ||||
for i in range(0, len(self.__forward_map)): | for i in range(0, len(self.__forward_map)): | ||||
k = self.__forward_map[i] | k = self.__forward_map[i] | ||||
if k != np.inf: | |||||
if k != undefined_node(): | |||||
relation.append(tuple((i, k))) | relation.append(tuple((i, k))) | ||||
for k in range(0, len(self.__backward_map)): | for k in range(0, len(self.__backward_map)): | ||||
i = self.__backward_map[k] | i = self.__backward_map[k] | ||||
if i == np.inf: | |||||
if i == dummy_node(): | |||||
relation.append(tuple((i, k))) | relation.append(tuple((i, k))) | ||||
def add_assignment(self, i, k): | def add_assignment(self, i, k): | ||||
if i != np.inf: | |||||
if i != dummy_node(): | |||||
if i < len(self.__forward_map): | if i < len(self.__forward_map): | ||||
self.__forward_map[i] = k | self.__forward_map[i] = k | ||||
else: | else: | ||||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | ||||
if k != np.inf: | |||||
if k != dummy_node(): | |||||
if k < len(self.__backward_map): | if k < len(self.__backward_map): | ||||
self.__backward_map[k] = i | self.__backward_map[k] = i | ||||
else: | else: | ||||
@@ -0,0 +1,3 @@ | |||||
from gklearn.ged.methods.ged_method import GEDMethod | |||||
from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod | |||||
from gklearn.ged.methods.bipartite import Bipartite |
@@ -0,0 +1,117 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Jun 18 16:09:29 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
from gklearn.ged.methods import LSAPEBasedMethod | |||||
from gklearn.ged.util import LSAPESolver | |||||
from gklearn.utils import SpecialLabel | |||||
class Bipartite(LSAPEBasedMethod): | |||||
def __init__(self, ged_data): | |||||
super().__init__(ged_data) | |||||
self._compute_lower_bound = False | |||||
########################################################################### | |||||
# Inherited member functions from LSAPEBasedMethod. | |||||
########################################################################### | |||||
def _lsape_populate_instance(self, g, h, master_problem): | |||||
# #ifdef _OPENMP | |||||
for row_in_master in range(0, nx.number_of_nodes(g)): | |||||
for col_in_master in range(0, nx.number_of_nodes(h)): | |||||
master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||||
for row_in_master in range(0, nx.number_of_nodes(g)): | |||||
master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master) | |||||
for col_in_master in range(0, nx.number_of_nodes(h)): | |||||
master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||||
# for row_in_master in range(0, master_problem.shape[0]): | |||||
# for col_in_master in range(0, master_problem.shape[1]): | |||||
# if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h): | |||||
# master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||||
# elif row_in_master < nx.number_of_nodes(g): | |||||
# master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master) | |||||
# elif col_in_master < nx.number_of_nodes(h): | |||||
# master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||||
########################################################################### | |||||
# Helper member functions. | |||||
########################################################################### | |||||
def _compute_substitution_cost(self, g, h, u, v): | |||||
# Collect node substitution costs. | |||||
cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label']) | |||||
# Initialize subproblem. | |||||
d1, d2 = g.degree[u], h.degree[v] | |||||
subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf | |||||
subproblem[d1:, d2:] = 0 | |||||
# subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1)) | |||||
# Collect edge deletion costs. | |||||
i = 0 # @todo: should directed graphs be considered? | |||||
for label in g[u].values(): # all u's neighbor | |||||
subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||||
# subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||||
i += 1 | |||||
# Collect edge insertion costs. | |||||
i = 0 # @todo: should directed graphs be considered? | |||||
for label in h[v].values(): # all u's neighbor | |||||
subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||||
# subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||||
i += 1 | |||||
# Collect edge relabelling costs. | |||||
i = 0 | |||||
for label1 in g[u].values(): | |||||
j = 0 | |||||
for label2 in h[v].values(): | |||||
subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label']) | |||||
j += 1 | |||||
i += 1 | |||||
# Solve subproblem. | |||||
subproblem_solver = LSAPESolver(subproblem) | |||||
subproblem_solver.set_model(self._lsape_model) | |||||
subproblem_solver.solve() | |||||
# Update and return overall substitution cost. | |||||
cost += subproblem_solver.minimal_cost() | |||||
return cost | |||||
def _compute_deletion_cost(self, g, v): | |||||
# Collect node deletion cost. | |||||
cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY) | |||||
# Collect edge deletion costs. | |||||
for label in g[v].values(): | |||||
cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||||
# Return overall deletion cost. | |||||
return cost | |||||
def _compute_insertion_cost(self, g, v): | |||||
# Collect node insertion cost. | |||||
cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label']) | |||||
# Collect edge insertion costs. | |||||
for label in g[v].values(): | |||||
cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||||
# Return overall insertion cost. | |||||
return cost |
@@ -0,0 +1,195 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Jun 18 15:52:35 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import time | |||||
import networkx as nx | |||||
class GEDMethod(object): | |||||
def __init__(self, ged_data): | |||||
self._initialized = False | |||||
self._ged_data = ged_data | |||||
self._options = None | |||||
self._lower_bound = 0 | |||||
self._upper_bound = np.inf | |||||
self._node_map = [0, 0] # @todo | |||||
self._runtime = None | |||||
self._init_time = None | |||||
def init(self): | |||||
"""Initializes the method with options specified by set_options(). | |||||
""" | |||||
start = time.time() | |||||
self._ged_init() | |||||
end = time.time() | |||||
self._init_time = end - start | |||||
self._initialized = True | |||||
def set_options(self, options): | |||||
""" | |||||
/*! | |||||
* @brief Sets the options of the method. | |||||
* @param[in] options String of the form <tt>[--@<option@> @<arg@>] [...]</tt>, where @p option contains neither spaces nor single quotes, | |||||
* and @p arg contains neither spaces nor single quotes or is of the form <tt>'[--@<sub-option@> @<sub-arg@>] [...]'</tt>, | |||||
* where both @p sub-option and @p sub-arg contain neither spaces nor single quotes. | |||||
*/ | |||||
""" | |||||
self._ged_set_default_options() | |||||
for key, val in options.items(): | |||||
if not self._ged_parse_option(key, val): | |||||
raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented. | |||||
self._initialized = False | |||||
def run(self, g_id, h_id): | |||||
""" | |||||
/*! | |||||
* @brief Runs the method with options specified by set_options(). | |||||
* @param[in] g_id ID of input graph. | |||||
* @param[in] h_id ID of input graph. | |||||
*/ | |||||
""" | |||||
start = time.time() | |||||
result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id]) | |||||
end = time.time() | |||||
self._lower_bound = result['lower_bound'] | |||||
self._upper_bound = result['upper_bound'] | |||||
if len(result['node_maps']) > 0: | |||||
self._node_map = result['node_maps'][0] | |||||
self._runtime = end - start | |||||
def run_as_util(self, g, h): | |||||
""" | |||||
/*! | |||||
* @brief Runs the method with options specified by set_options(). | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[out] result Result variable. | |||||
*/ | |||||
""" | |||||
# Compute optimal solution and return if at least one of the two graphs is empty. | |||||
if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0: | |||||
print('This is not implemented.') | |||||
pass # @todo: | |||||
# Run the method. | |||||
return self._ged_run(g, h) | |||||
def get_upper_bound(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns an upper bound. | |||||
* @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound. | |||||
*/ | |||||
""" | |||||
return self._upper_bound | |||||
def get_lower_bound(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns a lower bound. | |||||
* @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound. | |||||
*/ | |||||
""" | |||||
return self._lower_bound | |||||
def get_runtime(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns the runtime. | |||||
* @return Runtime of last call to run() in seconds. | |||||
*/ | |||||
""" | |||||
return self._runtime | |||||
def get_init_time(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns the initialization time. | |||||
* @return Runtime of last call to init() in seconds. | |||||
*/ | |||||
""" | |||||
return self._init_time | |||||
def get_node_map(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns a graph matching. | |||||
* @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching. | |||||
*/ | |||||
""" | |||||
return self._node_map | |||||
def _ged_init(self): | |||||
""" | |||||
/*! | |||||
* @brief Initializes the method. | |||||
* @note Must be overridden by derived classes that require initialization. | |||||
*/ | |||||
""" | |||||
pass | |||||
def _ged_parse_option(self, option, arg): | |||||
""" | |||||
/*! | |||||
* @brief Parses one option. | |||||
* @param[in] option The name of the option. | |||||
* @param[in] arg The argument of the option. | |||||
* @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise. | |||||
* @note Must be overridden by derived classes that have options. | |||||
*/ | |||||
""" | |||||
return False | |||||
def _ged_run(self, g, h): | |||||
""" | |||||
/*! | |||||
* @brief Runs the method with options specified by set_options(). | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[out] result Result variable. | |||||
* @note Must be overridden by derived classes. | |||||
*/ | |||||
""" | |||||
return {} | |||||
def _ged_valid_options_string(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns string of all valid options. | |||||
* @return String of the form <tt>[--@<option@> @<arg@>] [...]</tt>. | |||||
* @note Must be overridden by derived classes that have options. | |||||
*/ | |||||
""" | |||||
return '' | |||||
def _ged_set_default_options(self): | |||||
""" | |||||
/*! | |||||
* @brief Sets all options to default values. | |||||
* @note Must be overridden by derived classes that have options. | |||||
*/ | |||||
""" | |||||
pass | |||||
@@ -0,0 +1,254 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Thu Jun 18 16:01:24 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
import networkx as nx | |||||
from gklearn.ged.methods import GEDMethod | |||||
from gklearn.ged.util import LSAPESolver, misc | |||||
from gklearn.ged.env import NodeMap | |||||
class LSAPEBasedMethod(GEDMethod): | |||||
def __init__(self, ged_data): | |||||
super().__init__(ged_data) | |||||
self._lsape_model = None # @todo: LSAPESolver::ECBP | |||||
self._greedy_method = None # @todo: LSAPESolver::BASIC | |||||
self._compute_lower_bound = True | |||||
self._solve_optimally = True | |||||
self._num_threads = 1 | |||||
self._centrality_method = 'NODE' # @todo | |||||
self._centrality_weight = 0.7 | |||||
self._centralities = {} | |||||
self._max_num_solutions = 1 | |||||
def populate_instance_and_run_as_util(self, g, h): #, lsape_instance): | |||||
""" | |||||
/*! | |||||
* @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance. | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[out] result Result variable. | |||||
* @param[out] lsape_instance LSAPE instance. | |||||
*/ | |||||
""" | |||||
result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf} | |||||
# Populate the LSAPE instance and set up the solver. | |||||
nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||||
lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf | |||||
# lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1)) | |||||
self.populate_instance(g, h, lsape_instance) | |||||
# nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||||
# lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf | |||||
# lsape_instance_new[nb1:, nb2:] = 0 | |||||
# lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2] | |||||
# for i in range(nb1): # all u's neighbor | |||||
# lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2] | |||||
# for i in range(nb2): # all u's neighbor | |||||
# lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i] | |||||
# lsape_solver = LSAPESolver(lsape_instance_new) | |||||
lsape_solver = LSAPESolver(lsape_instance) | |||||
# Solve the LSAPE instance. | |||||
if self._solve_optimally: | |||||
lsape_solver.set_model(self._lsape_model) | |||||
else: | |||||
lsape_solver.set_greedy_method(self._greedy_method) | |||||
lsape_solver.solve(self._max_num_solutions) | |||||
# Compute and store lower and upper bound. | |||||
if self._compute_lower_bound and self._solve_optimally: | |||||
result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test | |||||
for solution_id in range(0, lsape_solver.num_solutions()): | |||||
result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h))) | |||||
misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id) | |||||
self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1]) | |||||
# Add centralities and reoptimize. | |||||
if self._centrality_weight > 0 and self._centrality_method != 'NODE': | |||||
print('This is not implemented.') | |||||
pass # @todo | |||||
# Sort the node maps and set the upper bound. | |||||
if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions: | |||||
print('This is not implemented.') # @todo: | |||||
pass | |||||
if len(result['node_maps']) == 0: | |||||
result['upper_bound'] = np.inf | |||||
else: | |||||
result['upper_bound'] = result['node_maps'][0].induced_cost() | |||||
return result | |||||
def populate_instance(self, g, h, lsape_instance): | |||||
""" | |||||
/*! | |||||
* @brief Populates the LSAPE instance. | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[out] lsape_instance LSAPE instance. | |||||
*/ | |||||
""" | |||||
if not self._initialized: | |||||
pass | |||||
# @todo: if (not this->initialized_) { | |||||
self._lsape_populate_instance(g, h, lsape_instance) | |||||
lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0 | |||||
# lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0 | |||||
########################################################################### | |||||
# Member functions inherited from GEDMethod. | |||||
########################################################################### | |||||
def _ged_init(self): | |||||
self._lsape_pre_graph_init(False) | |||||
for graph in self._ged_data._graphs: | |||||
self._init_graph(graph) | |||||
self._lsape_init() | |||||
def _ged_run(self, g, h): | |||||
# lsape_instance = np.empty((0, 0)) | |||||
result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance) | |||||
return result | |||||
def _ged_parse_option(self, option, arg): | |||||
is_valid_option = False | |||||
if option == 'threads': # @todo: try.. catch... | |||||
self._num_threads = arg | |||||
is_valid_option = True | |||||
elif option == 'lsape_model': | |||||
self._lsape_model = arg # @todo | |||||
is_valid_option = True | |||||
elif option == 'greedy_method': | |||||
self._greedy_method = arg # @todo | |||||
is_valid_option = True | |||||
elif option == 'optimal': | |||||
self._solve_optimally = arg # @todo | |||||
is_valid_option = True | |||||
elif option == 'centrality_method': | |||||
self._centrality_method = arg # @todo | |||||
is_valid_option = True | |||||
elif option == 'centrality_weight': | |||||
self._centrality_weight = arg # @todo | |||||
is_valid_option = True | |||||
elif option == 'max_num_solutions': | |||||
if arg == 'ALL': | |||||
self._max_num_solutions = -1 | |||||
else: | |||||
self._max_num_solutions = arg # @todo | |||||
is_valid_option = True | |||||
is_valid_option = is_valid_option or self._lsape_parse_option(option, arg) | |||||
is_valid_option = True # @todo: this is not in the C++ code. | |||||
return is_valid_option | |||||
def _ged_set_default_options(self): | |||||
self._lsape_model = None # @todo: LSAPESolver::ECBP | |||||
self._greedy_method = None # @todo: LSAPESolver::BASIC | |||||
self._solve_optimally = True | |||||
self._num_threads = 1 | |||||
self._centrality_method = 'NODE' # @todo | |||||
self._centrality_weight = 0.7 | |||||
self._max_num_solutions = 1 | |||||
########################################################################### | |||||
# Private helper member functions. | |||||
########################################################################### | |||||
def _init_graph(self, graph): | |||||
if self._centrality_method != 'NODE': | |||||
self._init_centralities(graph) # @todo | |||||
self._lsape_init_graph(graph) | |||||
########################################################################### | |||||
# Virtual member functions to be overridden by derived classes. | |||||
########################################################################### | |||||
def _lsape_init(self): | |||||
""" | |||||
/*! | |||||
* @brief Initializes the method after initializing the global variables for the graphs. | |||||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization. | |||||
*/ | |||||
""" | |||||
pass | |||||
def _lsape_parse_option(self, option, arg): | |||||
""" | |||||
/*! | |||||
* @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||||
* @param[in] option The name of the option. | |||||
* @param[in] arg The argument of the option. | |||||
* @return Returns true if @p option is a valid option name for the method and false otherwise. | |||||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||||
*/ | |||||
""" | |||||
return False | |||||
def _lsape_set_default_options(self): | |||||
""" | |||||
/*! | |||||
* @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values. | |||||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||||
*/ | |||||
""" | |||||
pass | |||||
def _lsape_populate_instance(self, g, h, lsape_instance): | |||||
""" | |||||
/*! | |||||
* @brief Populates the LSAPE instance. | |||||
* @param[in] g Input graph. | |||||
* @param[in] h Input graph. | |||||
* @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion. | |||||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod. | |||||
*/ | |||||
""" | |||||
pass | |||||
def _lsape_init_graph(self, graph): | |||||
""" | |||||
/*! | |||||
* @brief Initializes global variables for one graph. | |||||
* @param[in] graph Graph for which the global variables have to be initialized. | |||||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables. | |||||
*/ | |||||
""" | |||||
pass | |||||
def _lsape_pre_graph_init(self, called_at_runtime): | |||||
""" | |||||
/*! | |||||
* @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs. | |||||
* @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization. | |||||
* @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs. | |||||
*/ | |||||
""" | |||||
pass |
@@ -1 +1,3 @@ | |||||
from gklearn.ged.util.lsape_solver import LSAPESolver | |||||
from gklearn.ged.util.util import compute_geds, ged_options_to_string | from gklearn.ged.util.util import compute_geds, ged_options_to_string | ||||
from gklearn.ged.util.util import compute_geds_cml |
@@ -0,0 +1,121 @@ | |||||
#!/usr/bin/env python3 | |||||
# -*- coding: utf-8 -*- | |||||
""" | |||||
Created on Mon Jun 22 15:37:36 2020 | |||||
@author: ljia | |||||
""" | |||||
import numpy as np | |||||
from scipy.optimize import linear_sum_assignment | |||||
class LSAPESolver(object): | |||||
def __init__(self, cost_matrix=None): | |||||
""" | |||||
/*! | |||||
* @brief Constructs solver for LSAPE problem instance. | |||||
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. | |||||
*/ | |||||
""" | |||||
self.__cost_matrix = cost_matrix | |||||
self.__model = 'ECBP' | |||||
self.__greedy_method = 'BASIC' | |||||
self.__solve_optimally = True | |||||
self.__minimal_cost = 0 | |||||
self.__row_to_col_assignments = [] | |||||
self.__col_to_row_assignments = [] | |||||
self.__dual_var_rows = [] # @todo | |||||
self.__dual_var_cols = [] # @todo | |||||
def clear_solution(self): | |||||
"""Clears a previously computed solution. | |||||
""" | |||||
self.__minimal_cost = 0 | |||||
self.__row_to_col_assignments.clear() | |||||
self.__col_to_row_assignments.clear() | |||||
self.__row_to_col_assignments.append([]) # @todo | |||||
self.__col_to_row_assignments.append([]) | |||||
self.__dual_var_rows = [] # @todo | |||||
self.__dual_var_cols = [] # @todo | |||||
def set_model(self, model): | |||||
""" | |||||
/*! | |||||
* @brief Makes the solver use a specific model for optimal solving. | |||||
* @param[in] model The model that should be used. | |||||
*/ | |||||
""" | |||||
self.__solve_optimally = True | |||||
self.__model = model | |||||
def solve(self, num_solutions=1): | |||||
""" | |||||
/*! | |||||
* @brief Solves the LSAPE problem instance. | |||||
* @param[in] num_solutions The maximal number of solutions that should be computed. | |||||
*/ | |||||
""" | |||||
self.clear_solution() | |||||
if self.__solve_optimally: | |||||
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||||
self.__row_to_col_assignments[0] = col_id | |||||
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id | |||||
self.__compute_cost_from_assignments() | |||||
if num_solutions > 1: | |||||
pass # @todo: | |||||
else: | |||||
print('here is non op.') | |||||
pass # @todo: greedy. | |||||
# self.__ | |||||
def minimal_cost(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns the cost of the computed solutions. | |||||
* @return Cost of computed solutions. | |||||
*/ | |||||
""" | |||||
return self.__minimal_cost | |||||
def get_assigned_col(self, row, solution_id=0): | |||||
""" | |||||
/*! | |||||
* @brief Returns the assigned column. | |||||
* @param[in] row Row whose assigned column should be returned. | |||||
* @param[in] solution_id ID of the solution where the assignment should be looked up. | |||||
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. | |||||
*/ | |||||
""" | |||||
return self.__row_to_col_assignments[solution_id][row] | |||||
def get_assigned_row(self, col, solution_id=0): | |||||
""" | |||||
/*! | |||||
* @brief Returns the assigned row. | |||||
* @param[in] col Column whose assigned row should be returned. | |||||
* @param[in] solution_id ID of the solution where the assignment should be looked up. | |||||
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. | |||||
*/ | |||||
""" | |||||
return self.__col_to_row_assignments[solution_id][col] | |||||
def num_solutions(self): | |||||
""" | |||||
/*! | |||||
* @brief Returns the number of solutions. | |||||
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. | |||||
*/ | |||||
""" | |||||
return len(self.__row_to_col_assignments) | |||||
def __compute_cost_from_assignments(self): # @todo | |||||
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]]) |
@@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020 | |||||
@author: ljia | @author: ljia | ||||
""" | """ | ||||
from gklearn.utils import dummy_node | |||||
def construct_node_map_from_solver(solver, node_map, solution_id): | |||||
node_map.clear() | |||||
num_nodes_g = node_map.num_source_nodes() | |||||
num_nodes_h = node_map.num_target_nodes() | |||||
# add deletions and substitutions | |||||
for row in range(0, num_nodes_g): | |||||
col = solver.get_assigned_col(row, solution_id) | |||||
if col >= num_nodes_h: | |||||
node_map.add_assignment(row, dummy_node()) | |||||
else: | |||||
node_map.add_assignment(row, col) | |||||
# insertions. | |||||
for col in range(0, num_nodes_h): | |||||
if solver.get_assigned_row(col, solution_id) >= num_nodes_g: | |||||
node_map.add_assignment(dummy_node(), col) | |||||
def options_string_to_options_map(options_string): | def options_string_to_options_map(options_string): | ||||
"""Transforms an options string into an options map. | """Transforms an options string into an options map. | ||||
@@ -13,6 +13,7 @@ from functools import partial | |||||
import sys | import sys | ||||
from tqdm import tqdm | from tqdm import tqdm | ||||
import networkx as nx | import networkx as nx | ||||
from gklearn.ged.env import GEDEnv | |||||
from gklearn.gedlib import librariesImport, gedlibpy | from gklearn.gedlib import librariesImport, gedlibpy | ||||
@@ -22,7 +23,7 @@ def compute_ged(g1, g2, options): | |||||
ged_env.add_nx_graph(g1, '') | ged_env.add_nx_graph(g1, '') | ||||
ged_env.add_nx_graph(g2, '') | ged_env.add_nx_graph(g2, '') | ||||
listID = ged_env.get_all_graph_ids() | listID = ged_env.get_all_graph_ids() | ||||
ged_env.init() | |||||
ged_env.init(init_type=options['init_option']) | |||||
ged_env.set_method(options['method'], ged_options_to_string(options)) | ged_env.set_method(options['method'], ged_options_to_string(options)) | ||||
ged_env.init_method() | ged_env.init_method() | ||||
@@ -46,6 +47,82 @@ def compute_ged(g1, g2, options): | |||||
return dis, pi_forward, pi_backward | return dis, pi_forward, pi_backward | ||||
def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): | |||||
# initialize ged env. | |||||
ged_env = GEDEnv() | |||||
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) | |||||
for g in graphs: | |||||
ged_env.add_nx_graph(g, '') | |||||
listID = ged_env.get_all_graph_ids() | |||||
ged_env.init(init_type=options['init_option']) | |||||
if parallel: | |||||
options['threads'] = 1 | |||||
ged_env.set_method(options['method'], options) | |||||
ged_env.init_method() | |||||
# compute ged. | |||||
neo_options = {'edit_cost': options['edit_cost'], | |||||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||||
ged_mat = np.zeros((len(graphs), len(graphs))) | |||||
if parallel: | |||||
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | |||||
ged_vec = [0 for i in range(len_itr)] | |||||
n_edit_operations = [0 for i in range(len_itr)] | |||||
itr = combinations(range(0, len(graphs)), 2) | |||||
n_jobs = multiprocessing.cpu_count() | |||||
if len_itr < 100 * n_jobs: | |||||
chunksize = int(len_itr / n_jobs) + 1 | |||||
else: | |||||
chunksize = 100 | |||||
def init_worker(graphs_toshare, ged_env_toshare, listID_toshare): | |||||
global G_graphs, G_ged_env, G_listID | |||||
G_graphs = graphs_toshare | |||||
G_ged_env = ged_env_toshare | |||||
G_listID = listID_toshare | |||||
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) | |||||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | |||||
if verbose: | |||||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||||
desc='computing GEDs', file=sys.stdout) | |||||
else: | |||||
iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||||
# iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||||
for i, j, dis, n_eo_tmp in iterator: | |||||
idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) | |||||
ged_vec[idx_itr] = dis | |||||
ged_mat[i][j] = dis | |||||
ged_mat[j][i] = dis | |||||
n_edit_operations[idx_itr] = n_eo_tmp | |||||
# print('\n-------------------------------------------') | |||||
# print(i, j, idx_itr, dis) | |||||
pool.close() | |||||
pool.join() | |||||
else: | |||||
ged_vec = [] | |||||
n_edit_operations = [] | |||||
if verbose: | |||||
iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) | |||||
else: | |||||
iterator = range(len(graphs)) | |||||
for i in iterator: | |||||
# for i in range(len(graphs)): | |||||
for j in range(i + 1, len(graphs)): | |||||
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort: | |||||
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) | |||||
else: | |||||
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i]) | |||||
ged_vec.append(dis) | |||||
ged_mat[i][j] = dis | |||||
ged_mat[j][i] = dis | |||||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||||
n_edit_operations.append(n_eo_tmp) | |||||
return ged_vec, ged_mat, n_edit_operations | |||||
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | ||||
# initialize ged env. | # initialize ged env. | ||||
ged_env = gedlibpy.GEDEnv() | ged_env = gedlibpy.GEDEnv() | ||||
@@ -13,5 +13,6 @@ __date__ = "March 2020" | |||||
from gklearn.preimage.preimage_generator import PreimageGenerator | from gklearn.preimage.preimage_generator import PreimageGenerator | ||||
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | ||||
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | ||||
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML | |||||
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | ||||
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class | from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class |
@@ -0,0 +1,57 @@ | |||||
"""Tests of GEDEnv. | |||||
""" | |||||
def test_GEDEnv(): | |||||
"""Test GEDEnv. | |||||
""" | |||||
"""**1. Get dataset.**""" | |||||
from gklearn.utils import Dataset | |||||
# Predefined dataset name, use dataset "MUTAG". | |||||
ds_name = 'MUTAG' | |||||
# Initialize a Dataset. | |||||
dataset = Dataset() | |||||
# Load predefined dataset "MUTAG". | |||||
dataset.load_predefined_dataset(ds_name) | |||||
graph1 = dataset.graphs[0] | |||||
graph2 = dataset.graphs[1] | |||||
"""**2. Compute graph edit distance.**""" | |||||
try: | |||||
from gklearn.ged.env import GEDEnv | |||||
ged_env = GEDEnv() # initailize GED environment. | |||||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||||
) | |||||
ged_env.add_nx_graph(graph1, '') # add graph1 | |||||
ged_env.add_nx_graph(graph2, '') # add graph2 | |||||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||||
options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||||
'threads': 1 # parallel threads. | |||||
} | |||||
ged_env.set_method('BIPARTITE', # GED method. | |||||
options # options for GED method. | |||||
) | |||||
ged_env.init_method() # initialize GED method. | |||||
ged_env.run_method(listID[0], listID[1]) # run. | |||||
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||||
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||||
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||||
import networkx as nx | |||||
assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
if __name__ == "__main__": | |||||
test_GEDEnv() |
@@ -68,4 +68,7 @@ def test_median_preimage_generator(): | |||||
print('\n-------------------------------------') | print('\n-------------------------------------') | ||||
print('fit method:', fit_method, '\n') | print('fit method:', fit_method, '\n') | ||||
mpg_options['fit_method'] = fit_method | mpg_options['fit_method'] = fit_method | ||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||||
try: | |||||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||||
except Exception as exception: | |||||
assert False, exception |
@@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||||
from gklearn.utils.timer import Timer | from gklearn.utils.timer import Timer | ||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
from gklearn.utils.utils import compute_gram_matrices_by_class | from gklearn.utils.utils import compute_gram_matrices_by_class | ||||
from gklearn.utils.utils import SpecialLabel | |||||
from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge | |||||
from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | ||||
from gklearn.utils.trie import Trie | from gklearn.utils.trie import Trie | ||||
from gklearn.utils.knn import knn_cv, knn_classification | from gklearn.utils.knn import knn_cv, knn_classification |
@@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||||
for ed, attrs in G.edges(data=True): | for ed, attrs in G.edges(data=True): | ||||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | attributes.append(tuple(attrs[aname] for aname in attr_names)) | ||||
return attributes | return attributes | ||||
@unique | |||||
class SpecialLabel(Enum): | |||||
"""can be used to define special labels. | |||||
""" | |||||
DUMMY = 1 # The dummy label. | |||||
# DUMMY = auto # enum.auto does not exist in Python 3.5. | |||||
def normalize_gram_matrix(gram_matrix): | def normalize_gram_matrix(gram_matrix): | ||||
@@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix): | |||||
dis_max = np.max(np.max(dis_mat)) | dis_max = np.max(np.max(dis_mat)) | ||||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | ||||
dis_mean = np.mean(np.mean(dis_mat)) | dis_mean = np.mean(np.mean(dis_mat)) | ||||
return dis_mat, dis_max, dis_min, dis_mean | |||||
return dis_mat, dis_max, dis_min, dis_mean | |||||
def dummy_node(): | |||||
""" | |||||
/*! | |||||
* @brief Returns a dummy node. | |||||
* @return ID of dummy node. | |||||
*/ | |||||
""" | |||||
return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why. | |||||
def undefined_node(): | |||||
""" | |||||
/*! | |||||
* @brief Returns an undefined node. | |||||
* @return ID of undefined node. | |||||
*/ | |||||
""" | |||||
return np.inf | |||||
def dummy_edge(): | |||||
""" | |||||
/*! | |||||
* @brief Returns a dummy edge. | |||||
* @return ID of dummy edge. | |||||
*/ | |||||
""" | |||||
return np.inf | |||||
@unique | |||||
class SpecialLabel(Enum): | |||||
"""can be used to define special labels. | |||||
""" | |||||
DUMMY = 1 # The dummy label. | |||||
# DUMMY = auto # enum.auto does not exist in Python 3.5. |