From 5eb69de7ea0bdad948534fa38b32f118dca5cd3c Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Wed, 24 Jun 2020 16:24:37 +0200 Subject: [PATCH] Add ged module. --- .coveragerc | 4 +- gklearn/examples/compute_graph_edit_distance.py | 58 ++++ gklearn/ged/edit_costs/__init__.py | 2 + gklearn/ged/edit_costs/constant.py | 50 ++++ gklearn/ged/edit_costs/edit_cost.py | 88 ++++++ gklearn/ged/env/__init__.py | 4 +- gklearn/ged/env/common_types.py | 153 +++++++++- gklearn/ged/env/ged_data.py | 181 ++++++++++++ gklearn/ged/env/ged_env.py | 369 ++++++++++++++++++++++++ gklearn/ged/env/node_map.py | 28 +- gklearn/ged/methods/__init__.py | 3 + gklearn/ged/methods/bipartite.py | 117 ++++++++ gklearn/ged/methods/ged_method.py | 195 +++++++++++++ gklearn/ged/methods/lsape_based_method.py | 254 ++++++++++++++++ gklearn/ged/util/__init__.py | 2 + gklearn/ged/util/lsape_solver.py | 121 ++++++++ gklearn/ged/util/misc.py | 21 ++ gklearn/ged/util/util.py | 79 ++++- gklearn/preimage/__init__.py | 1 + gklearn/tests/test_ged_env.py | 57 ++++ gklearn/tests/test_median_preimage_generator.py | 5 +- gklearn/utils/__init__.py | 2 +- gklearn/utils/utils.py | 50 +++- 23 files changed, 1816 insertions(+), 28 deletions(-) create mode 100644 gklearn/examples/compute_graph_edit_distance.py create mode 100644 gklearn/ged/edit_costs/__init__.py create mode 100644 gklearn/ged/edit_costs/constant.py create mode 100644 gklearn/ged/edit_costs/edit_cost.py create mode 100644 gklearn/ged/env/ged_data.py create mode 100644 gklearn/ged/env/ged_env.py create mode 100644 gklearn/ged/methods/__init__.py create mode 100644 gklearn/ged/methods/bipartite.py create mode 100644 gklearn/ged/methods/ged_method.py create mode 100644 gklearn/ged/methods/lsape_based_method.py create mode 100644 gklearn/ged/util/lsape_solver.py create mode 100644 gklearn/tests/test_ged_env.py diff --git a/.coveragerc b/.coveragerc index d7615cf..1acf861 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,4 @@ [run] -omit = gklearn/tests/* +omit = + gklearn/tests/* + gklearn/examples/* diff --git a/gklearn/examples/compute_graph_edit_distance.py b/gklearn/examples/compute_graph_edit_distance.py new file mode 100644 index 0000000..027d1e4 --- /dev/null +++ b/gklearn/examples/compute_graph_edit_distance.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +"""compute_graph_edit_distance.ipynb + +Automatically generated by Colaboratory. + +Original file is located at + https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3 + +**This script demonstrates how to compute a graph edit distance.** +--- + +**0. Install `graphkit-learn`.** +""" + +"""**1. Get dataset.**""" + +from gklearn.utils import Dataset + +# Predefined dataset name, use dataset "MUTAG". +ds_name = 'MUTAG' + +# Initialize a Dataset. +dataset = Dataset() +# Load predefined dataset "MUTAG". +dataset.load_predefined_dataset(ds_name) +graph1 = dataset.graphs[0] +graph2 = dataset.graphs[1] +print(graph1, graph2) + +"""**2. Compute graph edit distance.**""" + +from gklearn.ged.env import GEDEnv + + +ged_env = GEDEnv() # initailize GED environment. +ged_env.set_edit_cost('CONSTANT', # GED cost type. + edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. + ) +ged_env.add_nx_graph(graph1, '') # add graph1 +ged_env.add_nx_graph(graph2, '') # add graph2 +listID = ged_env.get_all_graph_ids() # get list IDs of graphs +ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. +options = {'initialization_method': 'RANDOM', # or 'NODE', etc. + 'threads': 1 # parallel threads. + } +ged_env.set_method('BIPARTITE', # GED method. + options # options for GED method. + ) +ged_env.init_method() # initialize GED method. + +ged_env.run_method(listID[0], listID[1]) # run. + +pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. +pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. +dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. +print(pi_forward) +print(pi_backward) +print(dis) \ No newline at end of file diff --git a/gklearn/ged/edit_costs/__init__.py b/gklearn/ged/edit_costs/__init__.py new file mode 100644 index 0000000..b2a2b12 --- /dev/null +++ b/gklearn/ged/edit_costs/__init__.py @@ -0,0 +1,2 @@ +from gklearn.ged.edit_costs.edit_cost import EditCost +from gklearn.ged.edit_costs.constant import Constant \ No newline at end of file diff --git a/gklearn/ged/edit_costs/constant.py b/gklearn/ged/edit_costs/constant.py new file mode 100644 index 0000000..641bab7 --- /dev/null +++ b/gklearn/ged/edit_costs/constant.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 17 17:52:23 2020 + +@author: ljia +""" +from gklearn.ged.edit_costs import EditCost + + +class Constant(EditCost): + """Implements constant edit cost functions. + """ + + + def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): + self.__node_ins_cost = node_ins_cost + self.__node_del_cost = node_del_cost + self.__node_rel_cost = node_rel_cost + self.__edge_ins_cost = edge_ins_cost + self.__edge_del_cost = edge_del_cost + self.__edge_rel_cost = edge_rel_cost + + + def node_ins_cost_fun(self, node_label): + return self.__node_ins_cost + + + def node_del_cost_fun(self, node_label): + return self.__node_del_cost + + + def node_rel_cost_fun(self, node_label_1, node_label_2): + if node_label_1 != node_label_2: + return self.__node_rel_cost + return 0 + + + def edge_ins_cost_fun(self, edge_label): + return self.__edge_ins_cost + + + def edge_del_cost_fun(self, edge_label): + return self.__edge_del_cost + + + def edge_rel_cost_fun(self, edge_label_1, edge_label_2): + if edge_label_1 != edge_label_2: + return self.__edge_rel_cost + return 0 \ No newline at end of file diff --git a/gklearn/ged/edit_costs/edit_cost.py b/gklearn/ged/edit_costs/edit_cost.py new file mode 100644 index 0000000..5d15827 --- /dev/null +++ b/gklearn/ged/edit_costs/edit_cost.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 17 17:49:24 2020 + +@author: ljia +""" + + +class EditCost(object): + + + def __init__(self): + pass + + + def node_ins_cost_fun(self, node_label): + """ + /*! + * @brief Node insertions cost function. + * @param[in] node_label A node label. + * @return The cost of inserting a node with label @p node_label. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 + + + def node_del_cost_fun(self, node_label): + """ + /*! + * @brief Node deletion cost function. + * @param[in] node_label A node label. + * @return The cost of deleting a node with label @p node_label. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 + + + def node_rel_cost_fun(self, node_label_1, node_label_2): + """ + /*! + * @brief Node relabeling cost function. + * @param[in] node_label_1 A node label. + * @param[in] node_label_2 A node label. + * @return The cost of changing a node's label from @p node_label_1 to @p node_label_2. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 + + + def edge_ins_cost_fun(self, edge_label): + """ + /*! + * @brief Edge insertion cost function. + * @param[in] edge_label An edge label. + * @return The cost of inserting an edge with label @p edge_label. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 + + + def edge_del_cost_fun(self, edge_label): + """ + /*! + * @brief Edge deletion cost function. + * @param[in] edge_label An edge label. + * @return The cost of deleting an edge with label @p edge_label. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 + + + def edge_rel_cost_fun(self, edge_label_1, edge_label_2): + """ + /*! + * @brief Edge relabeling cost function. + * @param[in] edge_label_1 An edge label. + * @param[in] edge_label_2 An edge label. + * @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2. + * @note Must be implemented by derived classes of ged::EditCosts. + */ + """ + return 0 \ No newline at end of file diff --git a/gklearn/ged/env/__init__.py b/gklearn/ged/env/__init__.py index 7db022c..1a5a0ce 100644 --- a/gklearn/ged/env/__init__.py +++ b/gklearn/ged/env/__init__.py @@ -1,2 +1,4 @@ -from gklearn.ged.env.common_types import AlgorithmState +from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState +from gklearn.ged.env.ged_data import GEDData +from gklearn.ged.env.ged_env import GEDEnv from gklearn.ged.env.node_map import NodeMap \ No newline at end of file diff --git a/gklearn/ged/env/common_types.py b/gklearn/ged/env/common_types.py index d195b11..091d952 100644 --- a/gklearn/ged/env/common_types.py +++ b/gklearn/ged/env/common_types.py @@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020 from enum import Enum, unique + +class Options(object): + """Contains enums for options employed by ged::GEDEnv. + """ + + + @unique + class GEDMethod(Enum): + """Selects the method. + """ +# @todo: what is this? #ifdef GUROBI + F1 = 1 # Selects ged::F1. + F2 = 2 # Selects ged::F2. + COMPACT_MIP = 3 # Selects ged::CompactMIP. + BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels. +#endif /* GUROBI */ + BRANCH = 5 # Selects ged::Branch. + BRANCH_FAST = 6 # Selects ged::BranchFast. + BRANCH_TIGHT = 7 # Selects ged::BranchTight. + BRANCH_UNIFORM = 8 # Selects ged::BranchUniform. + BRANCH_COMPACT = 9 # Selects ged::BranchCompact. + PARTITION = 10 # Selects ged::Partition. + HYBRID = 11 # Selects ged::Hybrid. + RING = 12 # Selects ged::Ring. + ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED. + WALKS = 14 # Selects ged::Walks. + IPFP = 15 # Selects ged::IPFP + BIPARTITE = 16 # Selects ged::Bipartite. + SUBGRAPH = 17 # Selects ged::Subgraph. + NODE = 18 # Selects ged::Node. + RING_ML = 19 # Selects ged::RingML. + BIPARTITE_ML = 20 # Selects ged::BipartiteML. + REFINE = 21 # Selects ged::Refine. + BP_BEAM = 22 # Selects ged::BPBeam. + SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing. + HED = 24 # Selects ged::HED. + STAR = 25 # Selects ged::Star. + + + @unique + class EditCosts(Enum): + """Selects the edit costs. + """ + CHEM_1 = 1 # Selects ged::CHEM1. + CHEM_2 = 2 # Selects ged::CHEM2. + CMU = 3 # Selects ged::CMU. + GREC_1 = 4 # Selects ged::GREC1. + GREC_2 = 5 # Selects ged::GREC2. + PROTEIN = 6 # Selects ged::Protein. + FINGERPRINT = 7 # Selects ged::Fingerprint. + LETTER = 8 # Selects ged::Letter. + LETTER2 = 9 # Selects ged:Letter2. + NON_SYMBOLIC = 10 # Selects ged:NonSymbolic. + CONSTANT = 11 # Selects ged::Constant. + + + @unique + class InitType(Enum): + """@brief Selects the initialization type of the environment. + * @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment. + * Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of + * all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs. + * In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence + * compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies + * are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself. + """ + LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed. + EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed. + LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed. + EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed. + + + @unique + class AlgorithmState(Enum): + """can be used to specify the state of an algorithm. + """ + CALLED = 1 # The algorithm has been called. + INITIALIZED = 2 # The algorithm has been initialized. + CONVERGED = 3 # The algorithm has converged. + TERMINATED = 4 # The algorithm has terminated. + + +class OptionsStringMap(object): + + + # Map of available computation methods between enum type and string. + GEDMethod = { + "BRANCH": Options.GEDMethod.BRANCH, + "BRANCH_FAST": Options.GEDMethod.BRANCH_FAST, + "BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT, + "BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM, + "BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT, + "PARTITION": Options.GEDMethod.PARTITION, + "HYBRID": Options.GEDMethod.HYBRID, + "RING": Options.GEDMethod.RING, + "ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED, + "WALKS": Options.GEDMethod.WALKS, + "IPFP": Options.GEDMethod.IPFP, + "BIPARTITE": Options.GEDMethod.BIPARTITE, + "SUBGRAPH": Options.GEDMethod.SUBGRAPH, + "NODE": Options.GEDMethod.NODE, + "RING_ML": Options.GEDMethod.RING_ML, + "BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML, + "REFINE": Options.GEDMethod.REFINE, + "BP_BEAM": Options.GEDMethod.BP_BEAM, + "SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING, + "HED": Options.GEDMethod.HED, + "STAR": Options.GEDMethod.STAR, + # ifdef GUROBI + "F1": Options.GEDMethod.F1, + "F2": Options.GEDMethod.F2, + "COMPACT_MIP": Options.GEDMethod.COMPACT_MIP, + "BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS + } + + + # Map of available edit cost functions between enum type and string. + EditCosts = { + "CHEM_1": Options.EditCosts.CHEM_1, + "CHEM_2": Options.EditCosts.CHEM_2, + "CMU": Options.EditCosts.CMU, + "GREC_1": Options.EditCosts.GREC_1, + "GREC_2": Options.EditCosts.GREC_2, + "LETTER": Options.EditCosts.LETTER, + "LETTER2": Options.EditCosts.LETTER2, + "NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC, + "FINGERPRINT": Options.EditCosts.FINGERPRINT, + "PROTEIN": Options.EditCosts.PROTEIN, + "CONSTANT": Options.EditCosts.CONSTANT + } + + # Map of available initialization types of the environment between enum type and string. + InitType = { + "LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES, + "EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, + "LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES, + "LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES + } + + @unique class AlgorithmState(Enum): - """can be used to specify the state of an algorithm. - """ - CALLED = 1 # The algorithm has been called. - INITIALIZED = 2 # The algorithm has been initialized. - CONVERGED = 3 # The algorithm has converged. - TERMINATED = 4 # The algorithm has terminated. \ No newline at end of file + """can be used to specify the state of an algorithm. + """ + CALLED = 1 # The algorithm has been called. + INITIALIZED = 2 # The algorithm has been initialized. + CONVERGED = 3 # The algorithm has converged. + TERMINATED = 4 # The algorithm has terminated. + diff --git a/gklearn/ged/env/ged_data.py b/gklearn/ged/env/ged_data.py new file mode 100644 index 0000000..b09805c --- /dev/null +++ b/gklearn/ged/env/ged_data.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 17 15:05:01 2020 + +@author: ljia +""" +from gklearn.ged.env import Options, OptionsStringMap +from gklearn.ged.edit_costs import Constant +from gklearn.utils import SpecialLabel, dummy_node + + +class GEDData(object): + + + def __init__(self): + self._graphs = [] + self._graph_names = [] + self._graph_classes = [] + self._num_graphs_without_shuffled_copies = 0 + self._strings_to_internal_node_ids = [] + self._internal_node_ids_to_strings = [] + self._edit_cost = None + self._node_costs = None + self._edge_costs = None + self._node_labels = [] + self._edge_labels = [] + self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES + self._delete_edit_cost = True + self._max_num_nodes = 0 + self._max_num_edges = 0 + + + def num_graphs(self): + """ + /*! + * @brief Returns the number of graphs. + * @return Number of graphs in the instance. + */ + """ + return len(self._graphs) + + + def shuffled_graph_copies_available(self): + """ + /*! + * @brief Checks if shuffled graph copies are available. + * @return Boolean @p true if shuffled graph copies are available. + */ + """ + return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES) + + + def node_cost(self, label1, label2): + """ + /*! + * @brief Returns node relabeling, insertion, or deletion cost. + * @param[in] label1 First node label. + * @param[in] label2 Second node label. + * @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), + * node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, + * node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, + * and 0 otherwise. + */ + """ + if self._eager_init(): # @todo: check if correct + return self._node_costs[label1, label2] + if label1 == label2: + return 0 + if label1 == SpecialLabel.DUMMY: # @todo: check dummy + return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check + if label2 == SpecialLabel.DUMMY: # @todo: check dummy + return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1]) + return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1]) + + + def edge_cost(self, label1, label2): + """ + /*! + * @brief Returns edge relabeling, insertion, or deletion cost. + * @param[in] label1 First edge label. + * @param[in] label2 Second edge label. + * @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(), + * edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not, + * edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does, + * and 0 otherwise. + */ + """ + if self._eager_init(): # @todo: check if correct + return self._node_costs[label1, label2] + if label1 == label2: + return 0 + if label1 == SpecialLabel.DUMMY: + return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1]) + if label2 == SpecialLabel.DUMMY: + return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1]) + return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1]) + + + def compute_induced_cost(self, g, h, node_map): + """ + /*! + * @brief Computes the edit cost between two graphs induced by a node map. + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[in,out] node_map Node map whose induced edit cost is to be computed. + */ + """ + cost = 0 + + # collect node costs + for node in g.nodes(): + image = node_map.image(node) + label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label']) + cost += self.node_cost(g.nodes[node]['label'], label2) + for node in h.nodes(): + pre_image = node_map.pre_image(node) + if pre_image == dummy_node(): + cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label']) + + # collect edge costs + for (n1, n2) in g.edges(): + image1 = node_map.image(n1) + image2 = node_map.image(n2) + label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY) + cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2) + for (n1, n2) in h.edges(): + if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)): + cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label']) + + node_map.set_induced_cost(cost) + + + def _set_edit_cost(self, edit_cost, edit_cost_constants): + if self._delete_edit_cost: + self._edit_cost = None + + if isinstance(edit_cost, str): + edit_cost = OptionsStringMap.EditCosts[edit_cost] + + if edit_cost == Options.EditCosts.CHEM_1: + if len(edit_cost_constants) == 4: + self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3]) + elif len(edit_cost_constants) == 0: + self._edit_cost = CHEM1() + else: + raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.') + elif edit_cost == Options.EditCosts.LETTER: + if len(edit_cost_constants) == 3: + self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2]) + elif len(edit_cost_constants) == 0: + self._edit_cost = Letter() + else: + raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.') + elif edit_cost == Options.EditCosts.LETTER2: + if len(edit_cost_constants) == 5: + self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4]) + elif len(edit_cost_constants) == 0: + self._edit_cost = Letter2() + else: + raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.') + elif edit_cost == Options.EditCosts.NON_SYMBOLIC: + if len(edit_cost_constants) == 6: + self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) + elif len(edit_cost_constants) == 0: + self._edit_cost = NonSymbolic() + else: + raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') + elif edit_cost == Options.EditCosts.CONSTANT: + if len(edit_cost_constants) == 6: + self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5]) + elif len(edit_cost_constants) == 0: + self._edit_cost = Constant() + else: + raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.') + + self._delete_edit_cost = True + + + def _eager_init(self): + return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES) \ No newline at end of file diff --git a/gklearn/ged/env/ged_env.py b/gklearn/ged/env/ged_env.py new file mode 100644 index 0000000..9fbdd4a --- /dev/null +++ b/gklearn/ged/env/ged_env.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Jun 17 12:02:36 2020 + +@author: ljia +""" +import numpy as np +import networkx as nx +from gklearn.ged.env import Options, OptionsStringMap +from gklearn.ged.env import GEDData + + +class GEDEnv(object): + + + def __init__(self): + self.__initialized = False + self.__new_graph_ids = [] + self.__ged_data = GEDData() + # Variables needed for approximating ged_instance_. + self.__lower_bounds = {} + self.__upper_bounds = {} + self.__runtimes = {} + self.__node_maps = {} + self.__original_to_internal_node_ids = [] + self.__internal_to_original_node_ids = [] + self.__ged_method = None + + + def set_edit_cost(self, edit_cost, edit_cost_constants=[]): + """ + /*! + * @brief Sets the edit costs to one of the predefined edit costs. + * @param[in] edit_costs Select one of the predefined edit costs. + * @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. + */ + """ + self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants) + + + def add_graph(self, graph_name='', graph_class=''): + """ + /*! + * @brief Adds a new uninitialized graph to the environment. Call init() after calling this method. + * @param[in] graph_name The name of the added graph. Empty if not specified. + * @param[in] graph_class The class of the added graph. Empty if not specified. + * @return The ID of the newly added graph. + */ + """ + # @todo: graphs are not uninitialized. + self.__initialized = False + graph_id = self.__ged_data._num_graphs_without_shuffled_copies + self.__ged_data._num_graphs_without_shuffled_copies += 1 + self.__new_graph_ids.append(graph_id) + self.__ged_data._graphs.append(nx.Graph()) + self.__ged_data._graph_names.append(graph_name) + self.__ged_data._graph_classes.append(graph_class) + self.__original_to_internal_node_ids.append({}) + self.__internal_to_original_node_ids.append({}) + self.__ged_data._strings_to_internal_node_ids.append({}) + self.__ged_data._internal_node_ids_to_strings.append({}) + return graph_id + + + def add_node(self, graph_id, node_id, node_label): + """ + /*! + * @brief Adds a labeled node. + * @param[in] graph_id ID of graph that has been added to the environment. + * @param[in] node_id The user-specific ID of the vertex that has to be added. + * @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel. + */ + """ + # @todo: check ids. + self.__initialized = False + internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id]) + self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) + self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id + self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id + self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id + self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) + # @todo: node_label_to_id_ + + + def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True): + """ + /*! + * @brief Adds a labeled edge. + * @param[in] graph_id ID of graph that has been added to the environment. + * @param[in] tail The user-specific ID of the tail of the edge that has to be added. + * @param[in] head The user-specific ID of the head of the edge that has to be added. + * @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel. + * @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph. + */ + """ + # @todo: check everything. + self.__initialized = False + # @todo: check ignore_duplicates. + self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) + # @todo: edge_id and label_id, edge_label_to_id_. + + + def add_nx_graph(self, g, classe, ignore_duplicates=True) : + """ + Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges. + + :param g: The graph to add (networkx graph) + :param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default + :type g: networkx.graph + :type ignore_duplicates: bool + :return: The ID of the newly added graphe + :rtype: size_t + + .. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct. + + """ + graph_id = self.add_graph(g.name, classe) # check if the graph name already exists. + for node in g.nodes: # @todo: if the keys of labels include int and str at the same time. + self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0]))) + for edge in g.edges: + self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates) + return graph_id + + + def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False): + if isinstance(init_type, str): + init_type = OptionsStringMap.InitType[init_type] + + # Throw an exception if no edit costs have been selected. + if self.__ged_data._edit_cost is None: + raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') + + # Return if the environment is initialized. + if self.__initialized: + return + + # Set initialization type. + self.__ged_data._init_type = init_type + + # @todo: Construct shuffled graph copies if necessary. + + # Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). + # @todo: setup_adjacency_matrix, don't know if neccessary. + self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs]) + self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs]) + + # Initialize cost matrices if necessary. + if self.__ged_data._eager_init(): + pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. + + # Mark environment as initialized. + self.__initialized = True + self.__new_graph_ids.clear() + + + def set_method(self, method, options=''): + """ + /*! + * @brief Sets the GEDMethod to be used by run_method(). + * @param[in] method Select the method that is to be used. + * @param[in] options An options string of the form @"[--@ @] [...]@" passed to the selected method. + */ + """ + del self.__ged_method + + if isinstance(method, str): + method = OptionsStringMap.GEDMethod[method] + + if method == Options.GEDMethod.BRANCH: + self.__ged_method = Branch(self.__ged_data) + elif method == Options.GEDMethod.BRANCH_FAST: + self.__ged_method = BranchFast(self.__ged_data) + elif method == Options.GEDMethod.BRANCH_FAST: + self.__ged_method = BranchFast(self.__ged_data) + elif method == Options.GEDMethod.BRANCH_TIGHT: + self.__ged_method = BranchTight(self.__ged_data) + elif method == Options.GEDMethod.BRANCH_UNIFORM: + self.__ged_method = BranchUniform(self.__ged_data) + elif method == Options.GEDMethod.BRANCH_COMPACT: + self.__ged_method = BranchCompact(self.__ged_data) + elif method == Options.GEDMethod.PARTITION: + self.__ged_method = Partition(self.__ged_data) + elif method == Options.GEDMethod.HYBRID: + self.__ged_method = Hybrid(self.__ged_data) + elif method == Options.GEDMethod.RING: + self.__ged_method = Ring(self.__ged_data) + elif method == Options.GEDMethod.ANCHOR_AWARE_GED: + self.__ged_method = AnchorAwareGED(self.__ged_data) + elif method == Options.GEDMethod.WALKS: + self.__ged_method = Walks(self.__ged_data) + elif method == Options.GEDMethod.IPFP: + self.__ged_method = IPFP(self.__ged_data) + elif method == Options.GEDMethod.BIPARTITE: + from gklearn.ged.methods import Bipartite + self.__ged_method = Bipartite(self.__ged_data) + elif method == Options.GEDMethod.SUBGRAPH: + self.__ged_method = Subgraph(self.__ged_data) + elif method == Options.GEDMethod.NODE: + self.__ged_method = Node(self.__ged_data) + elif method == Options.GEDMethod.RING_ML: + self.__ged_method = RingML(self.__ged_data) + elif method == Options.GEDMethod.BIPARTITE_ML: + self.__ged_method = BipartiteML(self.__ged_data) + elif method == Options.GEDMethod.REFINE: + self.__ged_method = Refine(self.__ged_data) + elif method == Options.GEDMethod.BP_BEAM: + self.__ged_method = BPBeam(self.__ged_data) + elif method == Options.GEDMethod.SIMULATED_ANNEALING: + self.__ged_method = SimulatedAnnealing(self.__ged_data) + elif method == Options.GEDMethod.HED: + self.__ged_method = HED(self.__ged_data) + elif method == Options.GEDMethod.STAR: + self.__ged_method = STAR(self.__ged_data) + # #ifdef GUROBI + elif method == Options.GEDMethod.F1: + self.__ged_method = F1(self.__ged_data) + elif method == Options.GEDMethod.F2: + self.__ged_method = F2(self.__ged_data) + elif method == Options.GEDMethod.COMPACT_MIP: + self.__ged_method = CompactMIP(self.__ged_data) + elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: + self.__ged_method = BLPNoEdgeLabels(self.__ged_data) + + self.__ged_method.set_options(options) + + + def run_method(self, g_id, h_id): + """ + /*! + * @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id. + * @param[in] g_id ID of an input graph that has been added to the environment. + * @param[in] h_id ID of an input graph that has been added to the environment. + */ + """ + if g_id >= self.__ged_data.num_graphs(): + raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') + if h_id >= self.__ged_data.num_graphs(): + raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') + if not self.__initialized: + raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') + if self.__ged_method is None: + raise Exception('No method has been set. Call set_method() before calling run().') + + # Call selected GEDMethod and store results. + if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id): + self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? + else: + self.__ged_method.run(g_id, h_id) + self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound() + self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound() + self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime() + self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map() + + + def init_method(self): + """Initializes the method specified by call to set_method(). + """ + if not self.__initialized: + raise Exception('The environment is uninitialized. Call init() before calling init_method().') + if self.__ged_method is None: + raise Exception('No method has been set. Call set_method() before calling init_method().') + self.__ged_method.init() + + + def get_upper_bound(self, g_id, h_id): + """ + /*! + * @brief Returns upper bound for edit distance between the input graphs. + * @param[in] g_id ID of an input graph that has been added to the environment. + * @param[in] h_id ID of an input graph that has been added to the environment. + * @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. + */ + """ + if (g_id, h_id) not in self.__upper_bounds: + raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') + return self.__upper_bounds[(g_id, h_id)] + + + def get_lower_bound(self, g_id, h_id): + """ + /*! + * @brief Returns lower bound for edit distance between the input graphs. + * @param[in] g_id ID of an input graph that has been added to the environment. + * @param[in] h_id ID of an input graph that has been added to the environment. + * @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. + */ + """ + if (g_id, h_id) not in self.__lower_bounds: + raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') + return self.__lower_bounds[(g_id, h_id)] + + + def get_runtime(self, g_id, h_id): + """ + /*! + * @brief Returns runtime. + * @param[in] g_id ID of an input graph that has been added to the environment. + * @param[in] h_id ID of an input graph that has been added to the environment. + * @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. + */ + """ + if (g_id, h_id) not in self.__runtimes: + raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') + return self.__runtimes[(g_id, h_id)] + + + def get_init_time(self): + """ + /*! + * @brief Returns initialization time. + * @return Runtime of the last call to init_method(). + */ + """ + return self.__ged_method.get_init_time() + + + def get_node_map(self, g_id, h_id): + """ + /*! + * @brief Returns node map between the input graphs. + * @param[in] g_id ID of an input graph that has been added to the environment. + * @param[in] h_id ID of an input graph that has been added to the environment. + * @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. + */ + """ + if (g_id, h_id) not in self.__node_maps: + raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') + return self.__node_maps[(g_id, h_id)] + + + def get_forward_map(self, g_id, h_id) : + """ + Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. + + :param g: The Id of the first compared graph + :param h: The Id of the second compared graph + :type g: size_t + :type h: size_t + :return: The forward map to the adjacence matrix between nodes of the two graphs + :rtype: list[npy_uint32] + + .. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() + .. warning:: run_method() between the same two graph must be called before this function. + .. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! + """ + return self.get_node_map(g_id, h_id).forward_map + + + def get_backward_map(self, g_id, h_id) : + """ + Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs. + + :param g: The Id of the first compared graph + :param h: The Id of the second compared graph + :type g: size_t + :type h: size_t + :return: The backward map to the adjacence matrix between nodes of the two graphs + :rtype: list[npy_uint32] + + .. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix() + .. warning:: run_method() between the same two graph must be called before this function. + .. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work ! + """ + return self.get_node_map(g_id, h_id).backward_map + + + def get_all_graph_ids(self): + return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)] \ No newline at end of file diff --git a/gklearn/ged/env/node_map.py b/gklearn/ged/env/node_map.py index dc3e3bf..7ca4798 100644 --- a/gklearn/ged/env/node_map.py +++ b/gklearn/ged/env/node_map.py @@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020 @author: ljia """ import numpy as np +from gklearn.utils import dummy_node, undefined_node + class NodeMap(object): def __init__(self, num_nodes_g, num_nodes_h): - self.__forward_map = [np.inf] * num_nodes_g - self.__backward_map = [np.inf] * num_nodes_h + self.__forward_map = [undefined_node()] * num_nodes_g + self.__backward_map = [undefined_node()] * num_nodes_h self.__induced_cost = np.inf + def clear(self): + """ + /*! + * @brief Clears the node map. + */ + """ + self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))] + self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))] + + def num_source_nodes(self): return len(self.__forward_map) @@ -28,7 +40,7 @@ class NodeMap(object): return self.__forward_map[node] else: raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') - return np.inf + return undefined_node() def pre_image(self, node): @@ -36,28 +48,28 @@ class NodeMap(object): return self.__backward_map[node] else: raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') - return np.inf + return undefined_node() def as_relation(self, relation): relation.clear() for i in range(0, len(self.__forward_map)): k = self.__forward_map[i] - if k != np.inf: + if k != undefined_node(): relation.append(tuple((i, k))) for k in range(0, len(self.__backward_map)): i = self.__backward_map[k] - if i == np.inf: + if i == dummy_node(): relation.append(tuple((i, k))) def add_assignment(self, i, k): - if i != np.inf: + if i != dummy_node(): if i < len(self.__forward_map): self.__forward_map[i] = k else: raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') - if k != np.inf: + if k != dummy_node(): if k < len(self.__backward_map): self.__backward_map[k] = i else: diff --git a/gklearn/ged/methods/__init__.py b/gklearn/ged/methods/__init__.py new file mode 100644 index 0000000..5879b9c --- /dev/null +++ b/gklearn/ged/methods/__init__.py @@ -0,0 +1,3 @@ +from gklearn.ged.methods.ged_method import GEDMethod +from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod +from gklearn.ged.methods.bipartite import Bipartite diff --git a/gklearn/ged/methods/bipartite.py b/gklearn/ged/methods/bipartite.py new file mode 100644 index 0000000..aa295c4 --- /dev/null +++ b/gklearn/ged/methods/bipartite.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 18 16:09:29 2020 + +@author: ljia +""" +import numpy as np +import networkx as nx +from gklearn.ged.methods import LSAPEBasedMethod +from gklearn.ged.util import LSAPESolver +from gklearn.utils import SpecialLabel + + +class Bipartite(LSAPEBasedMethod): + + + def __init__(self, ged_data): + super().__init__(ged_data) + self._compute_lower_bound = False + + + ########################################################################### + # Inherited member functions from LSAPEBasedMethod. + ########################################################################### + + + def _lsape_populate_instance(self, g, h, master_problem): + # #ifdef _OPENMP + for row_in_master in range(0, nx.number_of_nodes(g)): + for col_in_master in range(0, nx.number_of_nodes(h)): + master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) + for row_in_master in range(0, nx.number_of_nodes(g)): + master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master) + for col_in_master in range(0, nx.number_of_nodes(h)): + master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master) + +# for row_in_master in range(0, master_problem.shape[0]): +# for col_in_master in range(0, master_problem.shape[1]): +# if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h): +# master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master) +# elif row_in_master < nx.number_of_nodes(g): +# master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master) +# elif col_in_master < nx.number_of_nodes(h): +# master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master) + + + ########################################################################### + # Helper member functions. + ########################################################################### + + + def _compute_substitution_cost(self, g, h, u, v): + # Collect node substitution costs. + cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label']) + + # Initialize subproblem. + d1, d2 = g.degree[u], h.degree[v] + subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf + subproblem[d1:, d2:] = 0 +# subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1)) + + # Collect edge deletion costs. + i = 0 # @todo: should directed graphs be considered? + for label in g[u].values(): # all u's neighbor + subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) +# subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) + i += 1 + + # Collect edge insertion costs. + i = 0 # @todo: should directed graphs be considered? + for label in h[v].values(): # all u's neighbor + subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) +# subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) + i += 1 + + # Collect edge relabelling costs. + i = 0 + for label1 in g[u].values(): + j = 0 + for label2 in h[v].values(): + subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label']) + j += 1 + i += 1 + + # Solve subproblem. + subproblem_solver = LSAPESolver(subproblem) + subproblem_solver.set_model(self._lsape_model) + subproblem_solver.solve() + + # Update and return overall substitution cost. + cost += subproblem_solver.minimal_cost() + return cost + + + def _compute_deletion_cost(self, g, v): + # Collect node deletion cost. + cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY) + + # Collect edge deletion costs. + for label in g[v].values(): + cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY) + + # Return overall deletion cost. + return cost + + + def _compute_insertion_cost(self, g, v): + # Collect node insertion cost. + cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label']) + + # Collect edge insertion costs. + for label in g[v].values(): + cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label']) + + # Return overall insertion cost. + return cost \ No newline at end of file diff --git a/gklearn/ged/methods/ged_method.py b/gklearn/ged/methods/ged_method.py new file mode 100644 index 0000000..aecd16b --- /dev/null +++ b/gklearn/ged/methods/ged_method.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 18 15:52:35 2020 + +@author: ljia +""" +import numpy as np +import time +import networkx as nx + + +class GEDMethod(object): + + + def __init__(self, ged_data): + self._initialized = False + self._ged_data = ged_data + self._options = None + self._lower_bound = 0 + self._upper_bound = np.inf + self._node_map = [0, 0] # @todo + self._runtime = None + self._init_time = None + + + def init(self): + """Initializes the method with options specified by set_options(). + """ + start = time.time() + self._ged_init() + end = time.time() + self._init_time = end - start + self._initialized = True + + + def set_options(self, options): + """ + /*! + * @brief Sets the options of the method. + * @param[in] options String of the form [--@ @] [...], where @p option contains neither spaces nor single quotes, + * and @p arg contains neither spaces nor single quotes or is of the form '[--@ @] [...]', + * where both @p sub-option and @p sub-arg contain neither spaces nor single quotes. + */ + """ + self._ged_set_default_options() + for key, val in options.items(): + if not self._ged_parse_option(key, val): + raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented. + self._initialized = False + + + def run(self, g_id, h_id): + """ + /*! + * @brief Runs the method with options specified by set_options(). + * @param[in] g_id ID of input graph. + * @param[in] h_id ID of input graph. + */ + """ + start = time.time() + result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id]) + end = time.time() + self._lower_bound = result['lower_bound'] + self._upper_bound = result['upper_bound'] + if len(result['node_maps']) > 0: + self._node_map = result['node_maps'][0] + self._runtime = end - start + + + def run_as_util(self, g, h): + """ + /*! + * @brief Runs the method with options specified by set_options(). + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[out] result Result variable. + */ + """ + # Compute optimal solution and return if at least one of the two graphs is empty. + if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0: + print('This is not implemented.') + pass # @todo: + + # Run the method. + return self._ged_run(g, h) + + + def get_upper_bound(self): + """ + /*! + * @brief Returns an upper bound. + * @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound. + */ + """ + return self._upper_bound + + + def get_lower_bound(self): + """ + /*! + * @brief Returns a lower bound. + * @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound. + */ + """ + return self._lower_bound + + + def get_runtime(self): + """ + /*! + * @brief Returns the runtime. + * @return Runtime of last call to run() in seconds. + */ + """ + return self._runtime + + + def get_init_time(self): + """ + /*! + * @brief Returns the initialization time. + * @return Runtime of last call to init() in seconds. + */ + """ + return self._init_time + + + def get_node_map(self): + """ + /*! + * @brief Returns a graph matching. + * @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching. + */ + """ + return self._node_map + + + def _ged_init(self): + """ + /*! + * @brief Initializes the method. + * @note Must be overridden by derived classes that require initialization. + */ + """ + pass + + + def _ged_parse_option(self, option, arg): + """ + /*! + * @brief Parses one option. + * @param[in] option The name of the option. + * @param[in] arg The argument of the option. + * @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise. + * @note Must be overridden by derived classes that have options. + */ + """ + return False + + + def _ged_run(self, g, h): + """ + /*! + * @brief Runs the method with options specified by set_options(). + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[out] result Result variable. + * @note Must be overridden by derived classes. + */ + """ + return {} + + + + def _ged_valid_options_string(self): + """ + /*! + * @brief Returns string of all valid options. + * @return String of the form [--@ @] [...]. + * @note Must be overridden by derived classes that have options. + */ + """ + return '' + + + def _ged_set_default_options(self): + """ + /*! + * @brief Sets all options to default values. + * @note Must be overridden by derived classes that have options. + */ + """ + pass + \ No newline at end of file diff --git a/gklearn/ged/methods/lsape_based_method.py b/gklearn/ged/methods/lsape_based_method.py new file mode 100644 index 0000000..79f7b9c --- /dev/null +++ b/gklearn/ged/methods/lsape_based_method.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 18 16:01:24 2020 + +@author: ljia +""" +import numpy as np +import networkx as nx +from gklearn.ged.methods import GEDMethod +from gklearn.ged.util import LSAPESolver, misc +from gklearn.ged.env import NodeMap + + +class LSAPEBasedMethod(GEDMethod): + + + def __init__(self, ged_data): + super().__init__(ged_data) + self._lsape_model = None # @todo: LSAPESolver::ECBP + self._greedy_method = None # @todo: LSAPESolver::BASIC + self._compute_lower_bound = True + self._solve_optimally = True + self._num_threads = 1 + self._centrality_method = 'NODE' # @todo + self._centrality_weight = 0.7 + self._centralities = {} + self._max_num_solutions = 1 + + + def populate_instance_and_run_as_util(self, g, h): #, lsape_instance): + """ + /*! + * @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance. + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[out] result Result variable. + * @param[out] lsape_instance LSAPE instance. + */ + """ + result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf} + + # Populate the LSAPE instance and set up the solver. + nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) + lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf +# lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1)) + self.populate_instance(g, h, lsape_instance) + +# nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h) +# lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf +# lsape_instance_new[nb1:, nb2:] = 0 +# lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2] +# for i in range(nb1): # all u's neighbor +# lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2] +# for i in range(nb2): # all u's neighbor +# lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i] +# lsape_solver = LSAPESolver(lsape_instance_new) + + lsape_solver = LSAPESolver(lsape_instance) + + # Solve the LSAPE instance. + if self._solve_optimally: + lsape_solver.set_model(self._lsape_model) + else: + lsape_solver.set_greedy_method(self._greedy_method) + lsape_solver.solve(self._max_num_solutions) + + # Compute and store lower and upper bound. + if self._compute_lower_bound and self._solve_optimally: + result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test + + for solution_id in range(0, lsape_solver.num_solutions()): + result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h))) + misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id) + self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1]) + + # Add centralities and reoptimize. + if self._centrality_weight > 0 and self._centrality_method != 'NODE': + print('This is not implemented.') + pass # @todo + + # Sort the node maps and set the upper bound. + if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions: + print('This is not implemented.') # @todo: + pass + if len(result['node_maps']) == 0: + result['upper_bound'] = np.inf + else: + result['upper_bound'] = result['node_maps'][0].induced_cost() + + return result + + + + def populate_instance(self, g, h, lsape_instance): + """ + /*! + * @brief Populates the LSAPE instance. + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[out] lsape_instance LSAPE instance. + */ + """ + if not self._initialized: + pass + # @todo: if (not this->initialized_) { + self._lsape_populate_instance(g, h, lsape_instance) + lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0 +# lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0 + + + ########################################################################### + # Member functions inherited from GEDMethod. + ########################################################################### + + + def _ged_init(self): + self._lsape_pre_graph_init(False) + for graph in self._ged_data._graphs: + self._init_graph(graph) + self._lsape_init() + + + def _ged_run(self, g, h): +# lsape_instance = np.empty((0, 0)) + result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance) + return result + + + def _ged_parse_option(self, option, arg): + is_valid_option = False + + if option == 'threads': # @todo: try.. catch... + self._num_threads = arg + is_valid_option = True + elif option == 'lsape_model': + self._lsape_model = arg # @todo + is_valid_option = True + elif option == 'greedy_method': + self._greedy_method = arg # @todo + is_valid_option = True + elif option == 'optimal': + self._solve_optimally = arg # @todo + is_valid_option = True + elif option == 'centrality_method': + self._centrality_method = arg # @todo + is_valid_option = True + elif option == 'centrality_weight': + self._centrality_weight = arg # @todo + is_valid_option = True + elif option == 'max_num_solutions': + if arg == 'ALL': + self._max_num_solutions = -1 + else: + self._max_num_solutions = arg # @todo + is_valid_option = True + + is_valid_option = is_valid_option or self._lsape_parse_option(option, arg) + is_valid_option = True # @todo: this is not in the C++ code. + return is_valid_option + + + def _ged_set_default_options(self): + self._lsape_model = None # @todo: LSAPESolver::ECBP + self._greedy_method = None # @todo: LSAPESolver::BASIC + self._solve_optimally = True + self._num_threads = 1 + self._centrality_method = 'NODE' # @todo + self._centrality_weight = 0.7 + self._max_num_solutions = 1 + + + ########################################################################### + # Private helper member functions. + ########################################################################### + + + def _init_graph(self, graph): + if self._centrality_method != 'NODE': + self._init_centralities(graph) # @todo + self._lsape_init_graph(graph) + + + ########################################################################### + # Virtual member functions to be overridden by derived classes. + ########################################################################### + + + def _lsape_init(self): + """ + /*! + * @brief Initializes the method after initializing the global variables for the graphs. + * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization. + */ + """ + pass + + + def _lsape_parse_option(self, option, arg): + """ + /*! + * @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod. + * @param[in] option The name of the option. + * @param[in] arg The argument of the option. + * @return Returns true if @p option is a valid option name for the method and false otherwise. + * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. + */ + """ + return False + + + def _lsape_set_default_options(self): + """ + /*! + * @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values. + * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod. + */ + """ + pass + + + def _lsape_populate_instance(self, g, h, lsape_instance): + """ + /*! + * @brief Populates the LSAPE instance. + * @param[in] g Input graph. + * @param[in] h Input graph. + * @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion. + * @note Must be overridden by derived classes of ged::LSAPEBasedMethod. + */ + """ + pass + + + def _lsape_init_graph(self, graph): + """ + /*! + * @brief Initializes global variables for one graph. + * @param[in] graph Graph for which the global variables have to be initialized. + * @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables. + */ + """ + pass + + + def _lsape_pre_graph_init(self, called_at_runtime): + """ + /*! + * @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs. + * @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization. + * @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs. + */ + """ + pass \ No newline at end of file diff --git a/gklearn/ged/util/__init__.py b/gklearn/ged/util/__init__.py index a288557..b2305d3 100644 --- a/gklearn/ged/util/__init__.py +++ b/gklearn/ged/util/__init__.py @@ -1 +1,3 @@ +from gklearn.ged.util.lsape_solver import LSAPESolver from gklearn.ged.util.util import compute_geds, ged_options_to_string +from gklearn.ged.util.util import compute_geds_cml \ No newline at end of file diff --git a/gklearn/ged/util/lsape_solver.py b/gklearn/ged/util/lsape_solver.py new file mode 100644 index 0000000..955f543 --- /dev/null +++ b/gklearn/ged/util/lsape_solver.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jun 22 15:37:36 2020 + +@author: ljia +""" +import numpy as np +from scipy.optimize import linear_sum_assignment + +class LSAPESolver(object): + + + def __init__(self, cost_matrix=None): + """ + /*! + * @brief Constructs solver for LSAPE problem instance. + * @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. + */ + """ + self.__cost_matrix = cost_matrix + self.__model = 'ECBP' + self.__greedy_method = 'BASIC' + self.__solve_optimally = True + self.__minimal_cost = 0 + self.__row_to_col_assignments = [] + self.__col_to_row_assignments = [] + self.__dual_var_rows = [] # @todo + self.__dual_var_cols = [] # @todo + + + def clear_solution(self): + """Clears a previously computed solution. + """ + self.__minimal_cost = 0 + self.__row_to_col_assignments.clear() + self.__col_to_row_assignments.clear() + self.__row_to_col_assignments.append([]) # @todo + self.__col_to_row_assignments.append([]) + self.__dual_var_rows = [] # @todo + self.__dual_var_cols = [] # @todo + + + def set_model(self, model): + """ + /*! + * @brief Makes the solver use a specific model for optimal solving. + * @param[in] model The model that should be used. + */ + """ + self.__solve_optimally = True + self.__model = model + + + def solve(self, num_solutions=1): + """ + /*! + * @brief Solves the LSAPE problem instance. + * @param[in] num_solutions The maximal number of solutions that should be computed. + */ + """ + self.clear_solution() + if self.__solve_optimally: + row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. + self.__row_to_col_assignments[0] = col_id + self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id + self.__compute_cost_from_assignments() + if num_solutions > 1: + pass # @todo: + else: + print('here is non op.') + pass # @todo: greedy. +# self.__ + + + def minimal_cost(self): + """ + /*! + * @brief Returns the cost of the computed solutions. + * @return Cost of computed solutions. + */ + """ + return self.__minimal_cost + + + def get_assigned_col(self, row, solution_id=0): + """ + /*! + * @brief Returns the assigned column. + * @param[in] row Row whose assigned column should be returned. + * @param[in] solution_id ID of the solution where the assignment should be looked up. + * @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. + */ + """ + return self.__row_to_col_assignments[solution_id][row] + + + def get_assigned_row(self, col, solution_id=0): + """ + /*! + * @brief Returns the assigned row. + * @param[in] col Column whose assigned row should be returned. + * @param[in] solution_id ID of the solution where the assignment should be looked up. + * @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. + */ + """ + return self.__col_to_row_assignments[solution_id][col] + + + def num_solutions(self): + """ + /*! + * @brief Returns the number of solutions. + * @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. + */ + """ + return len(self.__row_to_col_assignments) + + + def __compute_cost_from_assignments(self): # @todo + self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]]) \ No newline at end of file diff --git a/gklearn/ged/util/misc.py b/gklearn/ged/util/misc.py index 18682c8..457d276 100644 --- a/gklearn/ged/util/misc.py +++ b/gklearn/ged/util/misc.py @@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020 @author: ljia """ +from gklearn.utils import dummy_node + + +def construct_node_map_from_solver(solver, node_map, solution_id): + node_map.clear() + num_nodes_g = node_map.num_source_nodes() + num_nodes_h = node_map.num_target_nodes() + + # add deletions and substitutions + for row in range(0, num_nodes_g): + col = solver.get_assigned_col(row, solution_id) + if col >= num_nodes_h: + node_map.add_assignment(row, dummy_node()) + else: + node_map.add_assignment(row, col) + + # insertions. + for col in range(0, num_nodes_h): + if solver.get_assigned_row(col, solution_id) >= num_nodes_g: + node_map.add_assignment(dummy_node(), col) + def options_string_to_options_map(options_string): """Transforms an options string into an options map. diff --git a/gklearn/ged/util/util.py b/gklearn/ged/util/util.py index 7032345..098834c 100644 --- a/gklearn/ged/util/util.py +++ b/gklearn/ged/util/util.py @@ -13,6 +13,7 @@ from functools import partial import sys from tqdm import tqdm import networkx as nx +from gklearn.ged.env import GEDEnv from gklearn.gedlib import librariesImport, gedlibpy @@ -22,7 +23,7 @@ def compute_ged(g1, g2, options): ged_env.add_nx_graph(g1, '') ged_env.add_nx_graph(g2, '') listID = ged_env.get_all_graph_ids() - ged_env.init() + ged_env.init(init_type=options['init_option']) ged_env.set_method(options['method'], ged_options_to_string(options)) ged_env.init_method() @@ -46,6 +47,82 @@ def compute_ged(g1, g2, options): return dis, pi_forward, pi_backward +def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True): + # initialize ged env. + ged_env = GEDEnv() + ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants']) + for g in graphs: + ged_env.add_nx_graph(g, '') + listID = ged_env.get_all_graph_ids() + ged_env.init(init_type=options['init_option']) + if parallel: + options['threads'] = 1 + ged_env.set_method(options['method'], options) + ged_env.init_method() + + # compute ged. + neo_options = {'edit_cost': options['edit_cost'], + 'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], + 'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} + ged_mat = np.zeros((len(graphs), len(graphs))) + if parallel: + len_itr = int(len(graphs) * (len(graphs) - 1) / 2) + ged_vec = [0 for i in range(len_itr)] + n_edit_operations = [0 for i in range(len_itr)] + itr = combinations(range(0, len(graphs)), 2) + n_jobs = multiprocessing.cpu_count() + if len_itr < 100 * n_jobs: + chunksize = int(len_itr / n_jobs) + 1 + else: + chunksize = 100 + def init_worker(graphs_toshare, ged_env_toshare, listID_toshare): + global G_graphs, G_ged_env, G_listID + G_graphs = graphs_toshare + G_ged_env = ged_env_toshare + G_listID = listID_toshare + do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort) + pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID)) + if verbose: + iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize), + desc='computing GEDs', file=sys.stdout) + else: + iterator = pool.imap_unordered(do_partial, itr, chunksize) +# iterator = pool.imap_unordered(do_partial, itr, chunksize) + for i, j, dis, n_eo_tmp in iterator: + idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2) + ged_vec[idx_itr] = dis + ged_mat[i][j] = dis + ged_mat[j][i] = dis + n_edit_operations[idx_itr] = n_eo_tmp +# print('\n-------------------------------------------') +# print(i, j, idx_itr, dis) + pool.close() + pool.join() + + else: + ged_vec = [] + n_edit_operations = [] + if verbose: + iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout) + else: + iterator = range(len(graphs)) + for i in iterator: +# for i in range(len(graphs)): + for j in range(i + 1, len(graphs)): + if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort: + dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j]) + else: + dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i]) + ged_vec.append(dis) + ged_mat[i][j] = dis + ged_mat[j][i] = dis + n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options) + n_edit_operations.append(n_eo_tmp) + + return ged_vec, ged_mat, n_edit_operations + + + def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): # initialize ged env. ged_env = gedlibpy.GEDEnv() diff --git a/gklearn/preimage/__init__.py b/gklearn/preimage/__init__.py index 7972820..9713a65 100644 --- a/gklearn/preimage/__init__.py +++ b/gklearn/preimage/__init__.py @@ -13,5 +13,6 @@ __date__ = "March 2020" from gklearn.preimage.preimage_generator import PreimageGenerator from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator +from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML from gklearn.preimage.kernel_knn_cv import kernel_knn_cv from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class diff --git a/gklearn/tests/test_ged_env.py b/gklearn/tests/test_ged_env.py new file mode 100644 index 0000000..bf4a7e4 --- /dev/null +++ b/gklearn/tests/test_ged_env.py @@ -0,0 +1,57 @@ +"""Tests of GEDEnv. +""" + + +def test_GEDEnv(): + """Test GEDEnv. + """ + """**1. Get dataset.**""" + + from gklearn.utils import Dataset + + # Predefined dataset name, use dataset "MUTAG". + ds_name = 'MUTAG' + + # Initialize a Dataset. + dataset = Dataset() + # Load predefined dataset "MUTAG". + dataset.load_predefined_dataset(ds_name) + graph1 = dataset.graphs[0] + graph2 = dataset.graphs[1] + + """**2. Compute graph edit distance.**""" + + try: + from gklearn.ged.env import GEDEnv + + ged_env = GEDEnv() # initailize GED environment. + ged_env.set_edit_cost('CONSTANT', # GED cost type. + edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs. + ) + ged_env.add_nx_graph(graph1, '') # add graph1 + ged_env.add_nx_graph(graph2, '') # add graph2 + listID = ged_env.get_all_graph_ids() # get list IDs of graphs + ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment. + options = {'initialization_method': 'RANDOM', # or 'NODE', etc. + 'threads': 1 # parallel threads. + } + ged_env.set_method('BIPARTITE', # GED method. + options # options for GED method. + ) + ged_env.init_method() # initialize GED method. + + ged_env.run_method(listID[0], listID[1]) # run. + + pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map. + pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map. + dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs. + + import networkx as nx + assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2) + + except Exception as exception: + assert False, exception + + +if __name__ == "__main__": + test_GEDEnv() \ No newline at end of file diff --git a/gklearn/tests/test_median_preimage_generator.py b/gklearn/tests/test_median_preimage_generator.py index c81bb7c..a376c7a 100644 --- a/gklearn/tests/test_median_preimage_generator.py +++ b/gklearn/tests/test_median_preimage_generator.py @@ -68,4 +68,7 @@ def test_median_preimage_generator(): print('\n-------------------------------------') print('fit method:', fit_method, '\n') mpg_options['fit_method'] = fit_method - generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) \ No newline at end of file + try: + generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4)) + except Exception as exception: + assert False, exception \ No newline at end of file diff --git a/gklearn/utils/__init__.py b/gklearn/utils/__init__.py index af9c751..149a3af 100644 --- a/gklearn/utils/__init__.py +++ b/gklearn/utils/__init__.py @@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset from gklearn.utils.timer import Timer from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import compute_gram_matrices_by_class -from gklearn.utils.utils import SpecialLabel +from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix from gklearn.utils.trie import Trie from gklearn.utils.knn import knn_cv, knn_classification diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py index faa4ae6..19e8db4 100644 --- a/gklearn/utils/utils.py +++ b/gklearn/utils/utils.py @@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names): for ed, attrs in G.edges(data=True): attributes.append(tuple(attrs[aname] for aname in attr_names)) return attributes - - -@unique -class SpecialLabel(Enum): - """can be used to define special labels. - """ - DUMMY = 1 # The dummy label. - # DUMMY = auto # enum.auto does not exist in Python 3.5. def normalize_gram_matrix(gram_matrix): @@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix): dis_max = np.max(np.max(dis_mat)) dis_min = np.min(np.min(dis_mat[dis_mat != 0])) dis_mean = np.mean(np.mean(dis_mat)) - return dis_mat, dis_max, dis_min, dis_mean \ No newline at end of file + return dis_mat, dis_max, dis_min, dis_mean + + +def dummy_node(): + """ + /*! + * @brief Returns a dummy node. + * @return ID of dummy node. + */ + """ + return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why. + + +def undefined_node(): + """ + /*! + * @brief Returns an undefined node. + * @return ID of undefined node. + */ + + """ + return np.inf + + +def dummy_edge(): + """ + /*! + * @brief Returns a dummy edge. + * @return ID of dummy edge. + */ + + """ + return np.inf + + +@unique +class SpecialLabel(Enum): + """can be used to define special labels. + """ + DUMMY = 1 # The dummy label. + # DUMMY = auto # enum.auto does not exist in Python 3.5. \ No newline at end of file