@@ -1,2 +1,4 @@ | |||
[run] | |||
omit = gklearn/tests/* | |||
omit = | |||
gklearn/tests/* | |||
gklearn/examples/* |
@@ -0,0 +1,58 @@ | |||
# -*- coding: utf-8 -*- | |||
"""compute_graph_edit_distance.ipynb | |||
Automatically generated by Colaboratory. | |||
Original file is located at | |||
https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3 | |||
**This script demonstrates how to compute a graph edit distance.** | |||
--- | |||
**0. Install `graphkit-learn`.** | |||
""" | |||
"""**1. Get dataset.**""" | |||
from gklearn.utils import Dataset | |||
# Predefined dataset name, use dataset "MUTAG". | |||
ds_name = 'MUTAG' | |||
# Initialize a Dataset. | |||
dataset = Dataset() | |||
# Load predefined dataset "MUTAG". | |||
dataset.load_predefined_dataset(ds_name) | |||
graph1 = dataset.graphs[0] | |||
graph2 = dataset.graphs[1] | |||
print(graph1, graph2) | |||
"""**2. Compute graph edit distance.**""" | |||
from gklearn.ged.env import GEDEnv | |||
ged_env = GEDEnv() # initailize GED environment. | |||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
) | |||
ged_env.add_nx_graph(graph1, '') # add graph1 | |||
ged_env.add_nx_graph(graph2, '') # add graph2 | |||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||
'threads': 1 # parallel threads. | |||
} | |||
ged_env.set_method('BIPARTITE', # GED method. | |||
options # options for GED method. | |||
) | |||
ged_env.init_method() # initialize GED method. | |||
ged_env.run_method(listID[0], listID[1]) # run. | |||
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
print(pi_forward) | |||
print(pi_backward) | |||
print(dis) |
@@ -0,0 +1,2 @@ | |||
from gklearn.ged.edit_costs.edit_cost import EditCost | |||
from gklearn.ged.edit_costs.constant import Constant |
@@ -0,0 +1,50 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Jun 17 17:52:23 2020 | |||
@author: ljia | |||
""" | |||
from gklearn.ged.edit_costs import EditCost | |||
class Constant(EditCost): | |||
"""Implements constant edit cost functions. | |||
""" | |||
def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): | |||
self.__node_ins_cost = node_ins_cost | |||
self.__node_del_cost = node_del_cost | |||
self.__node_rel_cost = node_rel_cost | |||
self.__edge_ins_cost = edge_ins_cost | |||
self.__edge_del_cost = edge_del_cost | |||
self.__edge_rel_cost = edge_rel_cost | |||
def node_ins_cost_fun(self, node_label): | |||
return self.__node_ins_cost | |||
def node_del_cost_fun(self, node_label): | |||
return self.__node_del_cost | |||
def node_rel_cost_fun(self, node_label_1, node_label_2): | |||
if node_label_1 != node_label_2: | |||
return self.__node_rel_cost | |||
return 0 | |||
def edge_ins_cost_fun(self, edge_label): | |||
return self.__edge_ins_cost | |||
def edge_del_cost_fun(self, edge_label): | |||
return self.__edge_del_cost | |||
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||
if edge_label_1 != edge_label_2: | |||
return self.__edge_rel_cost | |||
return 0 |
@@ -0,0 +1,88 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Jun 17 17:49:24 2020 | |||
@author: ljia | |||
""" | |||
class EditCost(object): | |||
def __init__(self): | |||
pass | |||
def node_ins_cost_fun(self, node_label): | |||
""" | |||
/*! | |||
* @brief Node insertions cost function. | |||
* @param[in] node_label A node label. | |||
* @return The cost of inserting a node with label @p node_label. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 | |||
def node_del_cost_fun(self, node_label): | |||
""" | |||
/*! | |||
* @brief Node deletion cost function. | |||
* @param[in] node_label A node label. | |||
* @return The cost of deleting a node with label @p node_label. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 | |||
def node_rel_cost_fun(self, node_label_1, node_label_2): | |||
""" | |||
/*! | |||
* @brief Node relabeling cost function. | |||
* @param[in] node_label_1 A node label. | |||
* @param[in] node_label_2 A node label. | |||
* @return The cost of changing a node's label from @p node_label_1 to @p node_label_2. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 | |||
def edge_ins_cost_fun(self, edge_label): | |||
""" | |||
/*! | |||
* @brief Edge insertion cost function. | |||
* @param[in] edge_label An edge label. | |||
* @return The cost of inserting an edge with label @p edge_label. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 | |||
def edge_del_cost_fun(self, edge_label): | |||
""" | |||
/*! | |||
* @brief Edge deletion cost function. | |||
* @param[in] edge_label An edge label. | |||
* @return The cost of deleting an edge with label @p edge_label. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 | |||
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | |||
""" | |||
/*! | |||
* @brief Edge relabeling cost function. | |||
* @param[in] edge_label_1 An edge label. | |||
* @param[in] edge_label_2 An edge label. | |||
* @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2. | |||
* @note Must be implemented by derived classes of ged::EditCosts. | |||
*/ | |||
""" | |||
return 0 |
@@ -1,2 +1,4 @@ | |||
from gklearn.ged.env.common_types import AlgorithmState | |||
from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState | |||
from gklearn.ged.env.ged_data import GEDData | |||
from gklearn.ged.env.ged_env import GEDEnv | |||
from gklearn.ged.env.node_map import NodeMap |
@@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020 | |||
from enum import Enum, unique | |||
class Options(object): | |||
"""Contains enums for options employed by ged::GEDEnv. | |||
""" | |||
@unique | |||
class GEDMethod(Enum): | |||
"""Selects the method. | |||
""" | |||
# @todo: what is this? #ifdef GUROBI | |||
F1 = 1 # Selects ged::F1. | |||
F2 = 2 # Selects ged::F2. | |||
COMPACT_MIP = 3 # Selects ged::CompactMIP. | |||
BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels. | |||
#endif /* GUROBI */ | |||
BRANCH = 5 # Selects ged::Branch. | |||
BRANCH_FAST = 6 # Selects ged::BranchFast. | |||
BRANCH_TIGHT = 7 # Selects ged::BranchTight. | |||
BRANCH_UNIFORM = 8 # Selects ged::BranchUniform. | |||
BRANCH_COMPACT = 9 # Selects ged::BranchCompact. | |||
PARTITION = 10 # Selects ged::Partition. | |||
HYBRID = 11 # Selects ged::Hybrid. | |||
RING = 12 # Selects ged::Ring. | |||
ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED. | |||
WALKS = 14 # Selects ged::Walks. | |||
IPFP = 15 # Selects ged::IPFP | |||
BIPARTITE = 16 # Selects ged::Bipartite. | |||
SUBGRAPH = 17 # Selects ged::Subgraph. | |||
NODE = 18 # Selects ged::Node. | |||
RING_ML = 19 # Selects ged::RingML. | |||
BIPARTITE_ML = 20 # Selects ged::BipartiteML. | |||
REFINE = 21 # Selects ged::Refine. | |||
BP_BEAM = 22 # Selects ged::BPBeam. | |||
SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing. | |||
HED = 24 # Selects ged::HED. | |||
STAR = 25 # Selects ged::Star. | |||
@unique | |||
class EditCosts(Enum): | |||
"""Selects the edit costs. | |||
""" | |||
CHEM_1 = 1 # Selects ged::CHEM1. | |||
CHEM_2 = 2 # Selects ged::CHEM2. | |||
CMU = 3 # Selects ged::CMU. | |||
GREC_1 = 4 # Selects ged::GREC1. | |||
GREC_2 = 5 # Selects ged::GREC2. | |||
PROTEIN = 6 # Selects ged::Protein. | |||
FINGERPRINT = 7 # Selects ged::Fingerprint. | |||
LETTER = 8 # Selects ged::Letter. | |||
LETTER2 = 9 # Selects ged:Letter2. | |||
NON_SYMBOLIC = 10 # Selects ged:NonSymbolic. | |||
CONSTANT = 11 # Selects ged::Constant. | |||
@unique | |||
class InitType(Enum): | |||
"""@brief Selects the initialization type of the environment. | |||
* @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment. | |||
* Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of | |||
* all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs. | |||
* In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence | |||
* compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies | |||
* are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself. | |||
""" | |||
LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed. | |||
EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed. | |||
LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed. | |||
EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed. | |||
@unique | |||
class AlgorithmState(Enum): | |||
"""can be used to specify the state of an algorithm. | |||
""" | |||
CALLED = 1 # The algorithm has been called. | |||
INITIALIZED = 2 # The algorithm has been initialized. | |||
CONVERGED = 3 # The algorithm has converged. | |||
TERMINATED = 4 # The algorithm has terminated. | |||
class OptionsStringMap(object): | |||
# Map of available computation methods between enum type and string. | |||
GEDMethod = { | |||
"BRANCH": Options.GEDMethod.BRANCH, | |||
"BRANCH_FAST": Options.GEDMethod.BRANCH_FAST, | |||
"BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT, | |||
"BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM, | |||
"BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT, | |||
"PARTITION": Options.GEDMethod.PARTITION, | |||
"HYBRID": Options.GEDMethod.HYBRID, | |||
"RING": Options.GEDMethod.RING, | |||
"ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED, | |||
"WALKS": Options.GEDMethod.WALKS, | |||
"IPFP": Options.GEDMethod.IPFP, | |||
"BIPARTITE": Options.GEDMethod.BIPARTITE, | |||
"SUBGRAPH": Options.GEDMethod.SUBGRAPH, | |||
"NODE": Options.GEDMethod.NODE, | |||
"RING_ML": Options.GEDMethod.RING_ML, | |||
"BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML, | |||
"REFINE": Options.GEDMethod.REFINE, | |||
"BP_BEAM": Options.GEDMethod.BP_BEAM, | |||
"SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING, | |||
"HED": Options.GEDMethod.HED, | |||
"STAR": Options.GEDMethod.STAR, | |||
# ifdef GUROBI | |||
"F1": Options.GEDMethod.F1, | |||
"F2": Options.GEDMethod.F2, | |||
"COMPACT_MIP": Options.GEDMethod.COMPACT_MIP, | |||
"BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS | |||
} | |||
# Map of available edit cost functions between enum type and string. | |||
EditCosts = { | |||
"CHEM_1": Options.EditCosts.CHEM_1, | |||
"CHEM_2": Options.EditCosts.CHEM_2, | |||
"CMU": Options.EditCosts.CMU, | |||
"GREC_1": Options.EditCosts.GREC_1, | |||
"GREC_2": Options.EditCosts.GREC_2, | |||
"LETTER": Options.EditCosts.LETTER, | |||
"LETTER2": Options.EditCosts.LETTER2, | |||
"NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC, | |||
"FINGERPRINT": Options.EditCosts.FINGERPRINT, | |||
"PROTEIN": Options.EditCosts.PROTEIN, | |||
"CONSTANT": Options.EditCosts.CONSTANT | |||
} | |||
# Map of available initialization types of the environment between enum type and string. | |||
InitType = { | |||
"LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES, | |||
"EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, | |||
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES, | |||
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES | |||
} | |||
@unique | |||
class AlgorithmState(Enum): | |||
"""can be used to specify the state of an algorithm. | |||
""" | |||
CALLED = 1 # The algorithm has been called. | |||
INITIALIZED = 2 # The algorithm has been initialized. | |||
CONVERGED = 3 # The algorithm has converged. | |||
TERMINATED = 4 # The algorithm has terminated. | |||
"""can be used to specify the state of an algorithm. | |||
""" | |||
CALLED = 1 # The algorithm has been called. | |||
INITIALIZED = 2 # The algorithm has been initialized. | |||
CONVERGED = 3 # The algorithm has converged. | |||
TERMINATED = 4 # The algorithm has terminated. | |||
@@ -0,0 +1,181 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Jun 17 15:05:01 2020 | |||
@author: ljia | |||
""" | |||
from gklearn.ged.env import Options, OptionsStringMap | |||
from gklearn.ged.edit_costs import Constant | |||
from gklearn.utils import SpecialLabel, dummy_node | |||
class GEDData(object): | |||
def __init__(self): | |||
self._graphs = [] | |||
self._graph_names = [] | |||
self._graph_classes = [] | |||
self._num_graphs_without_shuffled_copies = 0 | |||
self._strings_to_internal_node_ids = [] | |||
self._internal_node_ids_to_strings = [] | |||
self._edit_cost = None | |||
self._node_costs = None | |||
self._edge_costs = None | |||
self._node_labels = [] | |||
self._edge_labels = [] | |||
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES | |||
self._delete_edit_cost = True | |||
self._max_num_nodes = 0 | |||
self._max_num_edges = 0 | |||
def num_graphs(self): | |||
""" | |||
/*! | |||
* @brief Returns the number of graphs. | |||
* @return Number of graphs in the instance. | |||
*/ | |||
""" | |||
return len(self._graphs) | |||
def shuffled_graph_copies_available(self): | |||
""" | |||
/*! | |||
* @brief Checks if shuffled graph copies are available. | |||
* @return Boolean @p true if shuffled graph copies are available. | |||
*/ | |||
""" | |||
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) | |||
def node_cost(self, label1, label2): | |||
""" | |||
/*! | |||
* @brief Returns node relabeling, insertion, or deletion cost. | |||
* @param[in] label1 First node label. | |||
* @param[in] label2 Second node label. | |||
* @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||
* node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||
* node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||
* and 0 otherwise. | |||
*/ | |||
""" | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check | |||
if label2 == SpecialLabel.DUMMY: # @todo: check dummy | |||
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) | |||
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) | |||
def edge_cost(self, label1, label2): | |||
""" | |||
/*! | |||
* @brief Returns edge relabeling, insertion, or deletion cost. | |||
* @param[in] label1 First edge label. | |||
* @param[in] label2 Second edge label. | |||
* @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), | |||
* edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, | |||
* edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, | |||
* and 0 otherwise. | |||
*/ | |||
""" | |||
if self._eager_init(): # @todo: check if correct | |||
return self._node_costs[label1, label2] | |||
if label1 == label2: | |||
return 0 | |||
if label1 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) | |||
if label2 == SpecialLabel.DUMMY: | |||
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) | |||
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) | |||
def compute_induced_cost(self, g, h, node_map): | |||
""" | |||
/*! | |||
* @brief Computes the edit cost between two graphs induced by a node map. | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[in,out] node_map Node map whose induced edit cost is to be computed. | |||
*/ | |||
""" | |||
cost = 0 | |||
# collect node costs | |||
for node in g.nodes(): | |||
image = node_map.image(node) | |||
label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label']) | |||
cost += self.node_cost(g.nodes[node]['label'], label2) | |||
for node in h.nodes(): | |||
pre_image = node_map.pre_image(node) | |||
if pre_image == dummy_node(): | |||
cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label']) | |||
# collect edge costs | |||
for (n1, n2) in g.edges(): | |||
image1 = node_map.image(n1) | |||
image2 = node_map.image(n2) | |||
label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY) | |||
cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2) | |||
for (n1, n2) in h.edges(): | |||
if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)): | |||
cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label']) | |||
node_map.set_induced_cost(cost) | |||
def _set_edit_cost(self, edit_cost, edit_cost_constants): | |||
if self._delete_edit_cost: | |||
self._edit_cost = None | |||
if isinstance(edit_cost, str): | |||
edit_cost = OptionsStringMap.EditCosts[edit_cost] | |||
if edit_cost == Options.EditCosts.CHEM_1: | |||
if len(edit_cost_constants) == 4: | |||
self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3]) | |||
elif len(edit_cost_constants) == 0: | |||
self._edit_cost = CHEM1() | |||
else: | |||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.') | |||
elif edit_cost == Options.EditCosts.LETTER: | |||
if len(edit_cost_constants) == 3: | |||
self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2]) | |||
elif len(edit_cost_constants) == 0: | |||
self._edit_cost = Letter() | |||
else: | |||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.') | |||
elif edit_cost == Options.EditCosts.LETTER2: | |||
if len(edit_cost_constants) == 5: | |||
self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4]) | |||
elif len(edit_cost_constants) == 0: | |||
self._edit_cost = Letter2() | |||
else: | |||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.') | |||
elif edit_cost == Options.EditCosts.NON_SYMBOLIC: | |||
if len(edit_cost_constants) == 6: | |||
self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||
elif len(edit_cost_constants) == 0: | |||
self._edit_cost = NonSymbolic() | |||
else: | |||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||
elif edit_cost == Options.EditCosts.CONSTANT: | |||
if len(edit_cost_constants) == 6: | |||
self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) | |||
elif len(edit_cost_constants) == 0: | |||
self._edit_cost = Constant() | |||
else: | |||
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') | |||
self._delete_edit_cost = True | |||
def _eager_init(self): | |||
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) |
@@ -0,0 +1,369 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Wed Jun 17 12:02:36 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.ged.env import Options, OptionsStringMap | |||
from gklearn.ged.env import GEDData | |||
class GEDEnv(object): | |||
def __init__(self): | |||
self.__initialized = False | |||
self.__new_graph_ids = [] | |||
self.__ged_data = GEDData() | |||
# Variables needed for approximating ged_instance_. | |||
self.__lower_bounds = {} | |||
self.__upper_bounds = {} | |||
self.__runtimes = {} | |||
self.__node_maps = {} | |||
self.__original_to_internal_node_ids = [] | |||
self.__internal_to_original_node_ids = [] | |||
self.__ged_method = None | |||
def set_edit_cost(self, edit_cost, edit_cost_constants=[]): | |||
""" | |||
/*! | |||
* @brief Sets the edit costs to one of the predefined edit costs. | |||
* @param[in] edit_costs Select one of the predefined edit costs. | |||
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. | |||
*/ | |||
""" | |||
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants) | |||
def add_graph(self, graph_name='', graph_class=''): | |||
""" | |||
/*! | |||
* @brief Adds a new uninitialized graph to the environment. Call init() after calling this method. | |||
* @param[in] graph_name The name of the added graph. Empty if not specified. | |||
* @param[in] graph_class The class of the added graph. Empty if not specified. | |||
* @return The ID of the newly added graph. | |||
*/ | |||
""" | |||
# @todo: graphs are not uninitialized. | |||
self.__initialized = False | |||
graph_id = self.__ged_data._num_graphs_without_shuffled_copies | |||
self.__ged_data._num_graphs_without_shuffled_copies += 1 | |||
self.__new_graph_ids.append(graph_id) | |||
self.__ged_data._graphs.append(nx.Graph()) | |||
self.__ged_data._graph_names.append(graph_name) | |||
self.__ged_data._graph_classes.append(graph_class) | |||
self.__original_to_internal_node_ids.append({}) | |||
self.__internal_to_original_node_ids.append({}) | |||
self.__ged_data._strings_to_internal_node_ids.append({}) | |||
self.__ged_data._internal_node_ids_to_strings.append({}) | |||
return graph_id | |||
def add_node(self, graph_id, node_id, node_label): | |||
""" | |||
/*! | |||
* @brief Adds a labeled node. | |||
* @param[in] graph_id ID of graph that has been added to the environment. | |||
* @param[in] node_id The user-specific ID of the vertex that has to be added. | |||
* @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel. | |||
*/ | |||
""" | |||
# @todo: check ids. | |||
self.__initialized = False | |||
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id]) | |||
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) | |||
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id | |||
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||
# @todo: node_label_to_id_ | |||
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): | |||
""" | |||
/*! | |||
* @brief Adds a labeled edge. | |||
* @param[in] graph_id ID of graph that has been added to the environment. | |||
* @param[in] tail The user-specific ID of the tail of the edge that has to be added. | |||
* @param[in] head The user-specific ID of the head of the edge that has to be added. | |||
* @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel. | |||
* @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph. | |||
*/ | |||
""" | |||
# @todo: check everything. | |||
self.__initialized = False | |||
# @todo: check ignore_duplicates. | |||
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||
# @todo: edge_id and label_id, edge_label_to_id_. | |||
def add_nx_graph(self, g, classe, ignore_duplicates=True) : | |||
""" | |||
Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges. | |||
:param g: The graph to add (networkx graph) | |||
:param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default | |||
:type g: networkx.graph | |||
:type ignore_duplicates: bool | |||
:return: The ID of the newly added graphe | |||
:rtype: size_t | |||
.. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct. | |||
""" | |||
graph_id = self.add_graph(g.name, classe) # check if the graph name already exists. | |||
for node in g.nodes: # @todo: if the keys of labels include int and str at the same time. | |||
self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0]))) | |||
for edge in g.edges: | |||
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates) | |||
return graph_id | |||
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): | |||
if isinstance(init_type, str): | |||
init_type = OptionsStringMap.InitType[init_type] | |||
# Throw an exception if no edit costs have been selected. | |||
if self.__ged_data._edit_cost is None: | |||
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') | |||
# Return if the environment is initialized. | |||
if self.__initialized: | |||
return | |||
# Set initialization type. | |||
self.__ged_data._init_type = init_type | |||
# @todo: Construct shuffled graph copies if necessary. | |||
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). | |||
# @todo: setup_adjacency_matrix, don't know if neccessary. | |||
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs]) | |||
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs]) | |||
# Initialize cost matrices if necessary. | |||
if self.__ged_data._eager_init(): | |||
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. | |||
# Mark environment as initialized. | |||
self.__initialized = True | |||
self.__new_graph_ids.clear() | |||
def set_method(self, method, options=''): | |||
""" | |||
/*! | |||
* @brief Sets the GEDMethod to be used by run_method(). | |||
* @param[in] method Select the method that is to be used. | |||
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. | |||
*/ | |||
""" | |||
del self.__ged_method | |||
if isinstance(method, str): | |||
method = OptionsStringMap.GEDMethod[method] | |||
if method == Options.GEDMethod.BRANCH: | |||
self.__ged_method = Branch(self.__ged_data) | |||
elif method == Options.GEDMethod.BRANCH_FAST: | |||
self.__ged_method = BranchFast(self.__ged_data) | |||
elif method == Options.GEDMethod.BRANCH_FAST: | |||
self.__ged_method = BranchFast(self.__ged_data) | |||
elif method == Options.GEDMethod.BRANCH_TIGHT: | |||
self.__ged_method = BranchTight(self.__ged_data) | |||
elif method == Options.GEDMethod.BRANCH_UNIFORM: | |||
self.__ged_method = BranchUniform(self.__ged_data) | |||
elif method == Options.GEDMethod.BRANCH_COMPACT: | |||
self.__ged_method = BranchCompact(self.__ged_data) | |||
elif method == Options.GEDMethod.PARTITION: | |||
self.__ged_method = Partition(self.__ged_data) | |||
elif method == Options.GEDMethod.HYBRID: | |||
self.__ged_method = Hybrid(self.__ged_data) | |||
elif method == Options.GEDMethod.RING: | |||
self.__ged_method = Ring(self.__ged_data) | |||
elif method == Options.GEDMethod.ANCHOR_AWARE_GED: | |||
self.__ged_method = AnchorAwareGED(self.__ged_data) | |||
elif method == Options.GEDMethod.WALKS: | |||
self.__ged_method = Walks(self.__ged_data) | |||
elif method == Options.GEDMethod.IPFP: | |||
self.__ged_method = IPFP(self.__ged_data) | |||
elif method == Options.GEDMethod.BIPARTITE: | |||
from gklearn.ged.methods import Bipartite | |||
self.__ged_method = Bipartite(self.__ged_data) | |||
elif method == Options.GEDMethod.SUBGRAPH: | |||
self.__ged_method = Subgraph(self.__ged_data) | |||
elif method == Options.GEDMethod.NODE: | |||
self.__ged_method = Node(self.__ged_data) | |||
elif method == Options.GEDMethod.RING_ML: | |||
self.__ged_method = RingML(self.__ged_data) | |||
elif method == Options.GEDMethod.BIPARTITE_ML: | |||
self.__ged_method = BipartiteML(self.__ged_data) | |||
elif method == Options.GEDMethod.REFINE: | |||
self.__ged_method = Refine(self.__ged_data) | |||
elif method == Options.GEDMethod.BP_BEAM: | |||
self.__ged_method = BPBeam(self.__ged_data) | |||
elif method == Options.GEDMethod.SIMULATED_ANNEALING: | |||
self.__ged_method = SimulatedAnnealing(self.__ged_data) | |||
elif method == Options.GEDMethod.HED: | |||
self.__ged_method = HED(self.__ged_data) | |||
elif method == Options.GEDMethod.STAR: | |||
self.__ged_method = STAR(self.__ged_data) | |||
# #ifdef GUROBI | |||
elif method == Options.GEDMethod.F1: | |||
self.__ged_method = F1(self.__ged_data) | |||
elif method == Options.GEDMethod.F2: | |||
self.__ged_method = F2(self.__ged_data) | |||
elif method == Options.GEDMethod.COMPACT_MIP: | |||
self.__ged_method = CompactMIP(self.__ged_data) | |||
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: | |||
self.__ged_method = BLPNoEdgeLabels(self.__ged_data) | |||
self.__ged_method.set_options(options) | |||
def run_method(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id. | |||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||
*/ | |||
""" | |||
if g_id >= self.__ged_data.num_graphs(): | |||
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') | |||
if h_id >= self.__ged_data.num_graphs(): | |||
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') | |||
if not self.__initialized: | |||
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') | |||
if self.__ged_method is None: | |||
raise Exception('No method has been set. Call set_method() before calling run().') | |||
# Call selected GEDMethod and store results. | |||
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id): | |||
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? | |||
else: | |||
self.__ged_method.run(g_id, h_id) | |||
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound() | |||
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound() | |||
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime() | |||
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map() | |||
def init_method(self): | |||
"""Initializes the method specified by call to set_method(). | |||
""" | |||
if not self.__initialized: | |||
raise Exception('The environment is uninitialized. Call init() before calling init_method().') | |||
if self.__ged_method is None: | |||
raise Exception('No method has been set. Call set_method() before calling init_method().') | |||
self.__ged_method.init() | |||
def get_upper_bound(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Returns upper bound for edit distance between the input graphs. | |||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
*/ | |||
""" | |||
if (g_id, h_id) not in self.__upper_bounds: | |||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||
return self.__upper_bounds[(g_id, h_id)] | |||
def get_lower_bound(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Returns lower bound for edit distance between the input graphs. | |||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
*/ | |||
""" | |||
if (g_id, h_id) not in self.__lower_bounds: | |||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') | |||
return self.__lower_bounds[(g_id, h_id)] | |||
def get_runtime(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Returns runtime. | |||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. | |||
*/ | |||
""" | |||
if (g_id, h_id) not in self.__runtimes: | |||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') | |||
return self.__runtimes[(g_id, h_id)] | |||
def get_init_time(self): | |||
""" | |||
/*! | |||
* @brief Returns initialization time. | |||
* @return Runtime of the last call to init_method(). | |||
*/ | |||
""" | |||
return self.__ged_method.get_init_time() | |||
def get_node_map(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Returns node map between the input graphs. | |||
* @param[in] g_id ID of an input graph that has been added to the environment. | |||
* @param[in] h_id ID of an input graph that has been added to the environment. | |||
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. | |||
*/ | |||
""" | |||
if (g_id, h_id) not in self.__node_maps: | |||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') | |||
return self.__node_maps[(g_id, h_id)] | |||
def get_forward_map(self, g_id, h_id) : | |||
""" | |||
Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||
:param g: The Id of the first compared graph | |||
:param h: The Id of the second compared graph | |||
:type g: size_t | |||
:type h: size_t | |||
:return: The forward map to the adjacence matrix between nodes of the two graphs | |||
:rtype: list[npy_uint32] | |||
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||
.. warning:: run_method() between the same two graph must be called before this function. | |||
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||
""" | |||
return self.get_node_map(g_id, h_id).forward_map | |||
def get_backward_map(self, g_id, h_id) : | |||
""" | |||
Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. | |||
:param g: The Id of the first compared graph | |||
:param h: The Id of the second compared graph | |||
:type g: size_t | |||
:type h: size_t | |||
:return: The backward map to the adjacence matrix between nodes of the two graphs | |||
:rtype: list[npy_uint32] | |||
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() | |||
.. warning:: run_method() between the same two graph must be called before this function. | |||
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! | |||
""" | |||
return self.get_node_map(g_id, h_id).backward_map | |||
def get_all_graph_ids(self): | |||
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)] |
@@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
from gklearn.utils import dummy_node, undefined_node | |||
class NodeMap(object): | |||
def __init__(self, num_nodes_g, num_nodes_h): | |||
self.__forward_map = [np.inf] * num_nodes_g | |||
self.__backward_map = [np.inf] * num_nodes_h | |||
self.__forward_map = [undefined_node()] * num_nodes_g | |||
self.__backward_map = [undefined_node()] * num_nodes_h | |||
self.__induced_cost = np.inf | |||
def clear(self): | |||
""" | |||
/*! | |||
* @brief Clears the node map. | |||
*/ | |||
""" | |||
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))] | |||
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))] | |||
def num_source_nodes(self): | |||
return len(self.__forward_map) | |||
@@ -28,7 +40,7 @@ class NodeMap(object): | |||
return self.__forward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | |||
return np.inf | |||
return undefined_node() | |||
def pre_image(self, node): | |||
@@ -36,28 +48,28 @@ class NodeMap(object): | |||
return self.__backward_map[node] | |||
else: | |||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | |||
return np.inf | |||
return undefined_node() | |||
def as_relation(self, relation): | |||
relation.clear() | |||
for i in range(0, len(self.__forward_map)): | |||
k = self.__forward_map[i] | |||
if k != np.inf: | |||
if k != undefined_node(): | |||
relation.append(tuple((i, k))) | |||
for k in range(0, len(self.__backward_map)): | |||
i = self.__backward_map[k] | |||
if i == np.inf: | |||
if i == dummy_node(): | |||
relation.append(tuple((i, k))) | |||
def add_assignment(self, i, k): | |||
if i != np.inf: | |||
if i != dummy_node(): | |||
if i < len(self.__forward_map): | |||
self.__forward_map[i] = k | |||
else: | |||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | |||
if k != np.inf: | |||
if k != dummy_node(): | |||
if k < len(self.__backward_map): | |||
self.__backward_map[k] = i | |||
else: | |||
@@ -0,0 +1,3 @@ | |||
from gklearn.ged.methods.ged_method import GEDMethod | |||
from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod | |||
from gklearn.ged.methods.bipartite import Bipartite |
@@ -0,0 +1,117 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Jun 18 16:09:29 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.ged.methods import LSAPEBasedMethod | |||
from gklearn.ged.util import LSAPESolver | |||
from gklearn.utils import SpecialLabel | |||
class Bipartite(LSAPEBasedMethod): | |||
def __init__(self, ged_data): | |||
super().__init__(ged_data) | |||
self._compute_lower_bound = False | |||
########################################################################### | |||
# Inherited member functions from LSAPEBasedMethod. | |||
########################################################################### | |||
def _lsape_populate_instance(self, g, h, master_problem): | |||
# #ifdef _OPENMP | |||
for row_in_master in range(0, nx.number_of_nodes(g)): | |||
for col_in_master in range(0, nx.number_of_nodes(h)): | |||
master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||
for row_in_master in range(0, nx.number_of_nodes(g)): | |||
master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master) | |||
for col_in_master in range(0, nx.number_of_nodes(h)): | |||
master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||
# for row_in_master in range(0, master_problem.shape[0]): | |||
# for col_in_master in range(0, master_problem.shape[1]): | |||
# if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h): | |||
# master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) | |||
# elif row_in_master < nx.number_of_nodes(g): | |||
# master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master) | |||
# elif col_in_master < nx.number_of_nodes(h): | |||
# master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master) | |||
########################################################################### | |||
# Helper member functions. | |||
########################################################################### | |||
def _compute_substitution_cost(self, g, h, u, v): | |||
# Collect node substitution costs. | |||
cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label']) | |||
# Initialize subproblem. | |||
d1, d2 = g.degree[u], h.degree[v] | |||
subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf | |||
subproblem[d1:, d2:] = 0 | |||
# subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1)) | |||
# Collect edge deletion costs. | |||
i = 0 # @todo: should directed graphs be considered? | |||
for label in g[u].values(): # all u's neighbor | |||
subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
# subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
i += 1 | |||
# Collect edge insertion costs. | |||
i = 0 # @todo: should directed graphs be considered? | |||
for label in h[v].values(): # all u's neighbor | |||
subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
# subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
i += 1 | |||
# Collect edge relabelling costs. | |||
i = 0 | |||
for label1 in g[u].values(): | |||
j = 0 | |||
for label2 in h[v].values(): | |||
subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label']) | |||
j += 1 | |||
i += 1 | |||
# Solve subproblem. | |||
subproblem_solver = LSAPESolver(subproblem) | |||
subproblem_solver.set_model(self._lsape_model) | |||
subproblem_solver.solve() | |||
# Update and return overall substitution cost. | |||
cost += subproblem_solver.minimal_cost() | |||
return cost | |||
def _compute_deletion_cost(self, g, v): | |||
# Collect node deletion cost. | |||
cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY) | |||
# Collect edge deletion costs. | |||
for label in g[v].values(): | |||
cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) | |||
# Return overall deletion cost. | |||
return cost | |||
def _compute_insertion_cost(self, g, v): | |||
# Collect node insertion cost. | |||
cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label']) | |||
# Collect edge insertion costs. | |||
for label in g[v].values(): | |||
cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) | |||
# Return overall insertion cost. | |||
return cost |
@@ -0,0 +1,195 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Jun 18 15:52:35 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import time | |||
import networkx as nx | |||
class GEDMethod(object): | |||
def __init__(self, ged_data): | |||
self._initialized = False | |||
self._ged_data = ged_data | |||
self._options = None | |||
self._lower_bound = 0 | |||
self._upper_bound = np.inf | |||
self._node_map = [0, 0] # @todo | |||
self._runtime = None | |||
self._init_time = None | |||
def init(self): | |||
"""Initializes the method with options specified by set_options(). | |||
""" | |||
start = time.time() | |||
self._ged_init() | |||
end = time.time() | |||
self._init_time = end - start | |||
self._initialized = True | |||
def set_options(self, options): | |||
""" | |||
/*! | |||
* @brief Sets the options of the method. | |||
* @param[in] options String of the form <tt>[--@<option@> @<arg@>] [...]</tt>, where @p option contains neither spaces nor single quotes, | |||
* and @p arg contains neither spaces nor single quotes or is of the form <tt>'[--@<sub-option@> @<sub-arg@>] [...]'</tt>, | |||
* where both @p sub-option and @p sub-arg contain neither spaces nor single quotes. | |||
*/ | |||
""" | |||
self._ged_set_default_options() | |||
for key, val in options.items(): | |||
if not self._ged_parse_option(key, val): | |||
raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented. | |||
self._initialized = False | |||
def run(self, g_id, h_id): | |||
""" | |||
/*! | |||
* @brief Runs the method with options specified by set_options(). | |||
* @param[in] g_id ID of input graph. | |||
* @param[in] h_id ID of input graph. | |||
*/ | |||
""" | |||
start = time.time() | |||
result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id]) | |||
end = time.time() | |||
self._lower_bound = result['lower_bound'] | |||
self._upper_bound = result['upper_bound'] | |||
if len(result['node_maps']) > 0: | |||
self._node_map = result['node_maps'][0] | |||
self._runtime = end - start | |||
def run_as_util(self, g, h): | |||
""" | |||
/*! | |||
* @brief Runs the method with options specified by set_options(). | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[out] result Result variable. | |||
*/ | |||
""" | |||
# Compute optimal solution and return if at least one of the two graphs is empty. | |||
if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0: | |||
print('This is not implemented.') | |||
pass # @todo: | |||
# Run the method. | |||
return self._ged_run(g, h) | |||
def get_upper_bound(self): | |||
""" | |||
/*! | |||
* @brief Returns an upper bound. | |||
* @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound. | |||
*/ | |||
""" | |||
return self._upper_bound | |||
def get_lower_bound(self): | |||
""" | |||
/*! | |||
* @brief Returns a lower bound. | |||
* @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound. | |||
*/ | |||
""" | |||
return self._lower_bound | |||
def get_runtime(self): | |||
""" | |||
/*! | |||
* @brief Returns the runtime. | |||
* @return Runtime of last call to run() in seconds. | |||
*/ | |||
""" | |||
return self._runtime | |||
def get_init_time(self): | |||
""" | |||
/*! | |||
* @brief Returns the initialization time. | |||
* @return Runtime of last call to init() in seconds. | |||
*/ | |||
""" | |||
return self._init_time | |||
def get_node_map(self): | |||
""" | |||
/*! | |||
* @brief Returns a graph matching. | |||
* @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching. | |||
*/ | |||
""" | |||
return self._node_map | |||
def _ged_init(self): | |||
""" | |||
/*! | |||
* @brief Initializes the method. | |||
* @note Must be overridden by derived classes that require initialization. | |||
*/ | |||
""" | |||
pass | |||
def _ged_parse_option(self, option, arg): | |||
""" | |||
/*! | |||
* @brief Parses one option. | |||
* @param[in] option The name of the option. | |||
* @param[in] arg The argument of the option. | |||
* @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise. | |||
* @note Must be overridden by derived classes that have options. | |||
*/ | |||
""" | |||
return False | |||
def _ged_run(self, g, h): | |||
""" | |||
/*! | |||
* @brief Runs the method with options specified by set_options(). | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[out] result Result variable. | |||
* @note Must be overridden by derived classes. | |||
*/ | |||
""" | |||
return {} | |||
def _ged_valid_options_string(self): | |||
""" | |||
/*! | |||
* @brief Returns string of all valid options. | |||
* @return String of the form <tt>[--@<option@> @<arg@>] [...]</tt>. | |||
* @note Must be overridden by derived classes that have options. | |||
*/ | |||
""" | |||
return '' | |||
def _ged_set_default_options(self): | |||
""" | |||
/*! | |||
* @brief Sets all options to default values. | |||
* @note Must be overridden by derived classes that have options. | |||
*/ | |||
""" | |||
pass | |||
@@ -0,0 +1,254 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Thu Jun 18 16:01:24 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
import networkx as nx | |||
from gklearn.ged.methods import GEDMethod | |||
from gklearn.ged.util import LSAPESolver, misc | |||
from gklearn.ged.env import NodeMap | |||
class LSAPEBasedMethod(GEDMethod): | |||
def __init__(self, ged_data): | |||
super().__init__(ged_data) | |||
self._lsape_model = None # @todo: LSAPESolver::ECBP | |||
self._greedy_method = None # @todo: LSAPESolver::BASIC | |||
self._compute_lower_bound = True | |||
self._solve_optimally = True | |||
self._num_threads = 1 | |||
self._centrality_method = 'NODE' # @todo | |||
self._centrality_weight = 0.7 | |||
self._centralities = {} | |||
self._max_num_solutions = 1 | |||
def populate_instance_and_run_as_util(self, g, h): #, lsape_instance): | |||
""" | |||
/*! | |||
* @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance. | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[out] result Result variable. | |||
* @param[out] lsape_instance LSAPE instance. | |||
*/ | |||
""" | |||
result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf} | |||
# Populate the LSAPE instance and set up the solver. | |||
nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||
lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf | |||
# lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1)) | |||
self.populate_instance(g, h, lsape_instance) | |||
# nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) | |||
# lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf | |||
# lsape_instance_new[nb1:, nb2:] = 0 | |||
# lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2] | |||
# for i in range(nb1): # all u's neighbor | |||
# lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2] | |||
# for i in range(nb2): # all u's neighbor | |||
# lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i] | |||
# lsape_solver = LSAPESolver(lsape_instance_new) | |||
lsape_solver = LSAPESolver(lsape_instance) | |||
# Solve the LSAPE instance. | |||
if self._solve_optimally: | |||
lsape_solver.set_model(self._lsape_model) | |||
else: | |||
lsape_solver.set_greedy_method(self._greedy_method) | |||
lsape_solver.solve(self._max_num_solutions) | |||
# Compute and store lower and upper bound. | |||
if self._compute_lower_bound and self._solve_optimally: | |||
result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test | |||
for solution_id in range(0, lsape_solver.num_solutions()): | |||
result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h))) | |||
misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id) | |||
self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1]) | |||
# Add centralities and reoptimize. | |||
if self._centrality_weight > 0 and self._centrality_method != 'NODE': | |||
print('This is not implemented.') | |||
pass # @todo | |||
# Sort the node maps and set the upper bound. | |||
if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions: | |||
print('This is not implemented.') # @todo: | |||
pass | |||
if len(result['node_maps']) == 0: | |||
result['upper_bound'] = np.inf | |||
else: | |||
result['upper_bound'] = result['node_maps'][0].induced_cost() | |||
return result | |||
def populate_instance(self, g, h, lsape_instance): | |||
""" | |||
/*! | |||
* @brief Populates the LSAPE instance. | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[out] lsape_instance LSAPE instance. | |||
*/ | |||
""" | |||
if not self._initialized: | |||
pass | |||
# @todo: if (not this->initialized_) { | |||
self._lsape_populate_instance(g, h, lsape_instance) | |||
lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0 | |||
# lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0 | |||
########################################################################### | |||
# Member functions inherited from GEDMethod. | |||
########################################################################### | |||
def _ged_init(self): | |||
self._lsape_pre_graph_init(False) | |||
for graph in self._ged_data._graphs: | |||
self._init_graph(graph) | |||
self._lsape_init() | |||
def _ged_run(self, g, h): | |||
# lsape_instance = np.empty((0, 0)) | |||
result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance) | |||
return result | |||
def _ged_parse_option(self, option, arg): | |||
is_valid_option = False | |||
if option == 'threads': # @todo: try.. catch... | |||
self._num_threads = arg | |||
is_valid_option = True | |||
elif option == 'lsape_model': | |||
self._lsape_model = arg # @todo | |||
is_valid_option = True | |||
elif option == 'greedy_method': | |||
self._greedy_method = arg # @todo | |||
is_valid_option = True | |||
elif option == 'optimal': | |||
self._solve_optimally = arg # @todo | |||
is_valid_option = True | |||
elif option == 'centrality_method': | |||
self._centrality_method = arg # @todo | |||
is_valid_option = True | |||
elif option == 'centrality_weight': | |||
self._centrality_weight = arg # @todo | |||
is_valid_option = True | |||
elif option == 'max_num_solutions': | |||
if arg == 'ALL': | |||
self._max_num_solutions = -1 | |||
else: | |||
self._max_num_solutions = arg # @todo | |||
is_valid_option = True | |||
is_valid_option = is_valid_option or self._lsape_parse_option(option, arg) | |||
is_valid_option = True # @todo: this is not in the C++ code. | |||
return is_valid_option | |||
def _ged_set_default_options(self): | |||
self._lsape_model = None # @todo: LSAPESolver::ECBP | |||
self._greedy_method = None # @todo: LSAPESolver::BASIC | |||
self._solve_optimally = True | |||
self._num_threads = 1 | |||
self._centrality_method = 'NODE' # @todo | |||
self._centrality_weight = 0.7 | |||
self._max_num_solutions = 1 | |||
########################################################################### | |||
# Private helper member functions. | |||
########################################################################### | |||
def _init_graph(self, graph): | |||
if self._centrality_method != 'NODE': | |||
self._init_centralities(graph) # @todo | |||
self._lsape_init_graph(graph) | |||
########################################################################### | |||
# Virtual member functions to be overridden by derived classes. | |||
########################################################################### | |||
def _lsape_init(self): | |||
""" | |||
/*! | |||
* @brief Initializes the method after initializing the global variables for the graphs. | |||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization. | |||
*/ | |||
""" | |||
pass | |||
def _lsape_parse_option(self, option, arg): | |||
""" | |||
/*! | |||
* @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
* @param[in] option The name of the option. | |||
* @param[in] arg The argument of the option. | |||
* @return Returns true if @p option is a valid option name for the method and false otherwise. | |||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
*/ | |||
""" | |||
return False | |||
def _lsape_set_default_options(self): | |||
""" | |||
/*! | |||
* @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values. | |||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. | |||
*/ | |||
""" | |||
pass | |||
def _lsape_populate_instance(self, g, h, lsape_instance): | |||
""" | |||
/*! | |||
* @brief Populates the LSAPE instance. | |||
* @param[in] g Input graph. | |||
* @param[in] h Input graph. | |||
* @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion. | |||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod. | |||
*/ | |||
""" | |||
pass | |||
def _lsape_init_graph(self, graph): | |||
""" | |||
/*! | |||
* @brief Initializes global variables for one graph. | |||
* @param[in] graph Graph for which the global variables have to be initialized. | |||
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables. | |||
*/ | |||
""" | |||
pass | |||
def _lsape_pre_graph_init(self, called_at_runtime): | |||
""" | |||
/*! | |||
* @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs. | |||
* @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization. | |||
* @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs. | |||
*/ | |||
""" | |||
pass |
@@ -1 +1,3 @@ | |||
from gklearn.ged.util.lsape_solver import LSAPESolver | |||
from gklearn.ged.util.util import compute_geds, ged_options_to_string | |||
from gklearn.ged.util.util import compute_geds_cml |
@@ -0,0 +1,121 @@ | |||
#!/usr/bin/env python3 | |||
# -*- coding: utf-8 -*- | |||
""" | |||
Created on Mon Jun 22 15:37:36 2020 | |||
@author: ljia | |||
""" | |||
import numpy as np | |||
from scipy.optimize import linear_sum_assignment | |||
class LSAPESolver(object): | |||
def __init__(self, cost_matrix=None): | |||
""" | |||
/*! | |||
* @brief Constructs solver for LSAPE problem instance. | |||
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. | |||
*/ | |||
""" | |||
self.__cost_matrix = cost_matrix | |||
self.__model = 'ECBP' | |||
self.__greedy_method = 'BASIC' | |||
self.__solve_optimally = True | |||
self.__minimal_cost = 0 | |||
self.__row_to_col_assignments = [] | |||
self.__col_to_row_assignments = [] | |||
self.__dual_var_rows = [] # @todo | |||
self.__dual_var_cols = [] # @todo | |||
def clear_solution(self): | |||
"""Clears a previously computed solution. | |||
""" | |||
self.__minimal_cost = 0 | |||
self.__row_to_col_assignments.clear() | |||
self.__col_to_row_assignments.clear() | |||
self.__row_to_col_assignments.append([]) # @todo | |||
self.__col_to_row_assignments.append([]) | |||
self.__dual_var_rows = [] # @todo | |||
self.__dual_var_cols = [] # @todo | |||
def set_model(self, model): | |||
""" | |||
/*! | |||
* @brief Makes the solver use a specific model for optimal solving. | |||
* @param[in] model The model that should be used. | |||
*/ | |||
""" | |||
self.__solve_optimally = True | |||
self.__model = model | |||
def solve(self, num_solutions=1): | |||
""" | |||
/*! | |||
* @brief Solves the LSAPE problem instance. | |||
* @param[in] num_solutions The maximal number of solutions that should be computed. | |||
*/ | |||
""" | |||
self.clear_solution() | |||
if self.__solve_optimally: | |||
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||
self.__row_to_col_assignments[0] = col_id | |||
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id | |||
self.__compute_cost_from_assignments() | |||
if num_solutions > 1: | |||
pass # @todo: | |||
else: | |||
print('here is non op.') | |||
pass # @todo: greedy. | |||
# self.__ | |||
def minimal_cost(self): | |||
""" | |||
/*! | |||
* @brief Returns the cost of the computed solutions. | |||
* @return Cost of computed solutions. | |||
*/ | |||
""" | |||
return self.__minimal_cost | |||
def get_assigned_col(self, row, solution_id=0): | |||
""" | |||
/*! | |||
* @brief Returns the assigned column. | |||
* @param[in] row Row whose assigned column should be returned. | |||
* @param[in] solution_id ID of the solution where the assignment should be looked up. | |||
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. | |||
*/ | |||
""" | |||
return self.__row_to_col_assignments[solution_id][row] | |||
def get_assigned_row(self, col, solution_id=0): | |||
""" | |||
/*! | |||
* @brief Returns the assigned row. | |||
* @param[in] col Column whose assigned row should be returned. | |||
* @param[in] solution_id ID of the solution where the assignment should be looked up. | |||
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. | |||
*/ | |||
""" | |||
return self.__col_to_row_assignments[solution_id][col] | |||
def num_solutions(self): | |||
""" | |||
/*! | |||
* @brief Returns the number of solutions. | |||
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. | |||
*/ | |||
""" | |||
return len(self.__row_to_col_assignments) | |||
def __compute_cost_from_assignments(self): # @todo | |||
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]]) |
@@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020 | |||
@author: ljia | |||
""" | |||
from gklearn.utils import dummy_node | |||
def construct_node_map_from_solver(solver, node_map, solution_id): | |||
node_map.clear() | |||
num_nodes_g = node_map.num_source_nodes() | |||
num_nodes_h = node_map.num_target_nodes() | |||
# add deletions and substitutions | |||
for row in range(0, num_nodes_g): | |||
col = solver.get_assigned_col(row, solution_id) | |||
if col >= num_nodes_h: | |||
node_map.add_assignment(row, dummy_node()) | |||
else: | |||
node_map.add_assignment(row, col) | |||
# insertions. | |||
for col in range(0, num_nodes_h): | |||
if solver.get_assigned_row(col, solution_id) >= num_nodes_g: | |||
node_map.add_assignment(dummy_node(), col) | |||
def options_string_to_options_map(options_string): | |||
"""Transforms an options string into an options map. | |||
@@ -13,6 +13,7 @@ from functools import partial | |||
import sys | |||
from tqdm import tqdm | |||
import networkx as nx | |||
from gklearn.ged.env import GEDEnv | |||
from gklearn.gedlib import librariesImport, gedlibpy | |||
@@ -22,7 +23,7 @@ def compute_ged(g1, g2, options): | |||
ged_env.add_nx_graph(g1, '') | |||
ged_env.add_nx_graph(g2, '') | |||
listID = ged_env.get_all_graph_ids() | |||
ged_env.init() | |||
ged_env.init(init_type=options['init_option']) | |||
ged_env.set_method(options['method'], ged_options_to_string(options)) | |||
ged_env.init_method() | |||
@@ -46,6 +47,82 @@ def compute_ged(g1, g2, options): | |||
return dis, pi_forward, pi_backward | |||
def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
# initialize ged env. | |||
ged_env = GEDEnv() | |||
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) | |||
for g in graphs: | |||
ged_env.add_nx_graph(g, '') | |||
listID = ged_env.get_all_graph_ids() | |||
ged_env.init(init_type=options['init_option']) | |||
if parallel: | |||
options['threads'] = 1 | |||
ged_env.set_method(options['method'], options) | |||
ged_env.init_method() | |||
# compute ged. | |||
neo_options = {'edit_cost': options['edit_cost'], | |||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||
ged_mat = np.zeros((len(graphs), len(graphs))) | |||
if parallel: | |||
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | |||
ged_vec = [0 for i in range(len_itr)] | |||
n_edit_operations = [0 for i in range(len_itr)] | |||
itr = combinations(range(0, len(graphs)), 2) | |||
n_jobs = multiprocessing.cpu_count() | |||
if len_itr < 100 * n_jobs: | |||
chunksize = int(len_itr / n_jobs) + 1 | |||
else: | |||
chunksize = 100 | |||
def init_worker(graphs_toshare, ged_env_toshare, listID_toshare): | |||
global G_graphs, G_ged_env, G_listID | |||
G_graphs = graphs_toshare | |||
G_ged_env = ged_env_toshare | |||
G_listID = listID_toshare | |||
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) | |||
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) | |||
if verbose: | |||
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), | |||
desc='computing GEDs', file=sys.stdout) | |||
else: | |||
iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||
# iterator = pool.imap_unordered(do_partial, itr, chunksize) | |||
for i, j, dis, n_eo_tmp in iterator: | |||
idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) | |||
ged_vec[idx_itr] = dis | |||
ged_mat[i][j] = dis | |||
ged_mat[j][i] = dis | |||
n_edit_operations[idx_itr] = n_eo_tmp | |||
# print('\n-------------------------------------------') | |||
# print(i, j, idx_itr, dis) | |||
pool.close() | |||
pool.join() | |||
else: | |||
ged_vec = [] | |||
n_edit_operations = [] | |||
if verbose: | |||
iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) | |||
else: | |||
iterator = range(len(graphs)) | |||
for i in iterator: | |||
# for i in range(len(graphs)): | |||
for j in range(i + 1, len(graphs)): | |||
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort: | |||
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) | |||
else: | |||
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i]) | |||
ged_vec.append(dis) | |||
ged_mat[i][j] = dis | |||
ged_mat[j][i] = dis | |||
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) | |||
n_edit_operations.append(n_eo_tmp) | |||
return ged_vec, ged_mat, n_edit_operations | |||
def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): | |||
# initialize ged env. | |||
ged_env = gedlibpy.GEDEnv() | |||
@@ -13,5 +13,6 @@ __date__ = "March 2020" | |||
from gklearn.preimage.preimage_generator import PreimageGenerator | |||
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator | |||
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator | |||
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML | |||
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv | |||
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class |
@@ -0,0 +1,57 @@ | |||
"""Tests of GEDEnv. | |||
""" | |||
def test_GEDEnv(): | |||
"""Test GEDEnv. | |||
""" | |||
"""**1. Get dataset.**""" | |||
from gklearn.utils import Dataset | |||
# Predefined dataset name, use dataset "MUTAG". | |||
ds_name = 'MUTAG' | |||
# Initialize a Dataset. | |||
dataset = Dataset() | |||
# Load predefined dataset "MUTAG". | |||
dataset.load_predefined_dataset(ds_name) | |||
graph1 = dataset.graphs[0] | |||
graph2 = dataset.graphs[1] | |||
"""**2. Compute graph edit distance.**""" | |||
try: | |||
from gklearn.ged.env import GEDEnv | |||
ged_env = GEDEnv() # initailize GED environment. | |||
ged_env.set_edit_cost('CONSTANT', # GED cost type. | |||
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. | |||
) | |||
ged_env.add_nx_graph(graph1, '') # add graph1 | |||
ged_env.add_nx_graph(graph2, '') # add graph2 | |||
listID = ged_env.get_all_graph_ids() # get list IDs of graphs | |||
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. | |||
options = {'initialization_method': 'RANDOM', # or 'NODE', etc. | |||
'threads': 1 # parallel threads. | |||
} | |||
ged_env.set_method('BIPARTITE', # GED method. | |||
options # options for GED method. | |||
) | |||
ged_env.init_method() # initialize GED method. | |||
ged_env.run_method(listID[0], listID[1]) # run. | |||
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. | |||
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. | |||
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. | |||
import networkx as nx | |||
assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2) | |||
except Exception as exception: | |||
assert False, exception | |||
if __name__ == "__main__": | |||
test_GEDEnv() |
@@ -68,4 +68,7 @@ def test_median_preimage_generator(): | |||
print('\n-------------------------------------') | |||
print('fit method:', fit_method, '\n') | |||
mpg_options['fit_method'] = fit_method | |||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||
try: | |||
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) | |||
except Exception as exception: | |||
assert False, exception |
@@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset | |||
from gklearn.utils.timer import Timer | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
from gklearn.utils.utils import compute_gram_matrices_by_class | |||
from gklearn.utils.utils import SpecialLabel | |||
from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge | |||
from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix | |||
from gklearn.utils.trie import Trie | |||
from gklearn.utils.knn import knn_cv, knn_classification |
@@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names): | |||
for ed, attrs in G.edges(data=True): | |||
attributes.append(tuple(attrs[aname] for aname in attr_names)) | |||
return attributes | |||
@unique | |||
class SpecialLabel(Enum): | |||
"""can be used to define special labels. | |||
""" | |||
DUMMY = 1 # The dummy label. | |||
# DUMMY = auto # enum.auto does not exist in Python 3.5. | |||
def normalize_gram_matrix(gram_matrix): | |||
@@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix): | |||
dis_max = np.max(np.max(dis_mat)) | |||
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) | |||
dis_mean = np.mean(np.mean(dis_mat)) | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
def dummy_node(): | |||
""" | |||
/*! | |||
* @brief Returns a dummy node. | |||
* @return ID of dummy node. | |||
*/ | |||
""" | |||
return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why. | |||
def undefined_node(): | |||
""" | |||
/*! | |||
* @brief Returns an undefined node. | |||
* @return ID of undefined node. | |||
*/ | |||
""" | |||
return np.inf | |||
def dummy_edge(): | |||
""" | |||
/*! | |||
* @brief Returns a dummy edge. | |||
* @return ID of dummy edge. | |||
*/ | |||
""" | |||
return np.inf | |||
@unique | |||
class SpecialLabel(Enum): | |||
"""can be used to define special labels. | |||
""" | |||
DUMMY = 1 # The dummy label. | |||
# DUMMY = auto # enum.auto does not exist in Python 3.5. |