Browse Source

Add ged module.

v0.2.x
jajupmochi 5 years ago
parent
commit
5eb69de7ea
23 changed files with 1816 additions and 28 deletions
  1. +3
    -1
      .coveragerc
  2. +58
    -0
      gklearn/examples/compute_graph_edit_distance.py
  3. +2
    -0
      gklearn/ged/edit_costs/__init__.py
  4. +50
    -0
      gklearn/ged/edit_costs/constant.py
  5. +88
    -0
      gklearn/ged/edit_costs/edit_cost.py
  6. +3
    -1
      gklearn/ged/env/__init__.py
  7. +147
    -6
      gklearn/ged/env/common_types.py
  8. +181
    -0
      gklearn/ged/env/ged_data.py
  9. +369
    -0
      gklearn/ged/env/ged_env.py
  10. +20
    -8
      gklearn/ged/env/node_map.py
  11. +3
    -0
      gklearn/ged/methods/__init__.py
  12. +117
    -0
      gklearn/ged/methods/bipartite.py
  13. +195
    -0
      gklearn/ged/methods/ged_method.py
  14. +254
    -0
      gklearn/ged/methods/lsape_based_method.py
  15. +2
    -0
      gklearn/ged/util/__init__.py
  16. +121
    -0
      gklearn/ged/util/lsape_solver.py
  17. +21
    -0
      gklearn/ged/util/misc.py
  18. +78
    -1
      gklearn/ged/util/util.py
  19. +1
    -0
      gklearn/preimage/__init__.py
  20. +57
    -0
      gklearn/tests/test_ged_env.py
  21. +4
    -1
      gklearn/tests/test_median_preimage_generator.py
  22. +1
    -1
      gklearn/utils/__init__.py
  23. +41
    -9
      gklearn/utils/utils.py

+ 3
- 1
.coveragerc View File

@@ -1,2 +1,4 @@
[run]
omit = gklearn/tests/*
omit =
gklearn/tests/*
gklearn/examples/*

+ 58
- 0
gklearn/examples/compute_graph_edit_distance.py View File

@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
"""compute_graph_edit_distance.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3

**This script demonstrates how to compute a graph edit distance.**
---

**0. Install `graphkit-learn`.**
"""

"""**1. Get dataset.**"""

from gklearn.utils import Dataset

# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'

# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
print(graph1, graph2)

"""**2. Compute graph edit distance.**"""

from gklearn.ged.env import GEDEnv


ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
ged_env.add_nx_graph(graph1, '') # add graph1
ged_env.add_nx_graph(graph2, '') # add graph2
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.

ged_env.run_method(listID[0], listID[1]) # run.

pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
print(pi_forward)
print(pi_backward)
print(dis)

+ 2
- 0
gklearn/ged/edit_costs/__init__.py View File

@@ -0,0 +1,2 @@
from gklearn.ged.edit_costs.edit_cost import EditCost
from gklearn.ged.edit_costs.constant import Constant

+ 50
- 0
gklearn/ged/edit_costs/constant.py View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 17:52:23 2020

@author: ljia
"""
from gklearn.ged.edit_costs import EditCost


class Constant(EditCost):
"""Implements constant edit cost functions.
"""

def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1):
self.__node_ins_cost = node_ins_cost
self.__node_del_cost = node_del_cost
self.__node_rel_cost = node_rel_cost
self.__edge_ins_cost = edge_ins_cost
self.__edge_del_cost = edge_del_cost
self.__edge_rel_cost = edge_rel_cost
def node_ins_cost_fun(self, node_label):
return self.__node_ins_cost
def node_del_cost_fun(self, node_label):
return self.__node_del_cost
def node_rel_cost_fun(self, node_label_1, node_label_2):
if node_label_1 != node_label_2:
return self.__node_rel_cost
return 0
def edge_ins_cost_fun(self, edge_label):
return self.__edge_ins_cost
def edge_del_cost_fun(self, edge_label):
return self.__edge_del_cost
def edge_rel_cost_fun(self, edge_label_1, edge_label_2):
if edge_label_1 != edge_label_2:
return self.__edge_rel_cost
return 0

+ 88
- 0
gklearn/ged/edit_costs/edit_cost.py View File

@@ -0,0 +1,88 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 17:49:24 2020

@author: ljia
"""


class EditCost(object):
def __init__(self):
pass
def node_ins_cost_fun(self, node_label):
"""
/*!
* @brief Node insertions cost function.
* @param[in] node_label A node label.
* @return The cost of inserting a node with label @p node_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def node_del_cost_fun(self, node_label):
"""
/*!
* @brief Node deletion cost function.
* @param[in] node_label A node label.
* @return The cost of deleting a node with label @p node_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def node_rel_cost_fun(self, node_label_1, node_label_2):
"""
/*!
* @brief Node relabeling cost function.
* @param[in] node_label_1 A node label.
* @param[in] node_label_2 A node label.
* @return The cost of changing a node's label from @p node_label_1 to @p node_label_2.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_ins_cost_fun(self, edge_label):
"""
/*!
* @brief Edge insertion cost function.
* @param[in] edge_label An edge label.
* @return The cost of inserting an edge with label @p edge_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_del_cost_fun(self, edge_label):
"""
/*!
* @brief Edge deletion cost function.
* @param[in] edge_label An edge label.
* @return The cost of deleting an edge with label @p edge_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_rel_cost_fun(self, edge_label_1, edge_label_2):
"""
/*!
* @brief Edge relabeling cost function.
* @param[in] edge_label_1 An edge label.
* @param[in] edge_label_2 An edge label.
* @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

+ 3
- 1
gklearn/ged/env/__init__.py View File

@@ -1,2 +1,4 @@
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState
from gklearn.ged.env.ged_data import GEDData
from gklearn.ged.env.ged_env import GEDEnv
from gklearn.ged.env.node_map import NodeMap

+ 147
- 6
gklearn/ged/env/common_types.py View File

@@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020

from enum import Enum, unique


class Options(object):
"""Contains enums for options employed by ged::GEDEnv.
"""


@unique
class GEDMethod(Enum):
"""Selects the method.
"""
# @todo: what is this? #ifdef GUROBI
F1 = 1 # Selects ged::F1.
F2 = 2 # Selects ged::F2.
COMPACT_MIP = 3 # Selects ged::CompactMIP.
BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels.
#endif /* GUROBI */
BRANCH = 5 # Selects ged::Branch.
BRANCH_FAST = 6 # Selects ged::BranchFast.
BRANCH_TIGHT = 7 # Selects ged::BranchTight.
BRANCH_UNIFORM = 8 # Selects ged::BranchUniform.
BRANCH_COMPACT = 9 # Selects ged::BranchCompact.
PARTITION = 10 # Selects ged::Partition.
HYBRID = 11 # Selects ged::Hybrid.
RING = 12 # Selects ged::Ring.
ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED.
WALKS = 14 # Selects ged::Walks.
IPFP = 15 # Selects ged::IPFP
BIPARTITE = 16 # Selects ged::Bipartite.
SUBGRAPH = 17 # Selects ged::Subgraph.
NODE = 18 # Selects ged::Node.
RING_ML = 19 # Selects ged::RingML.
BIPARTITE_ML = 20 # Selects ged::BipartiteML.
REFINE = 21 # Selects ged::Refine.
BP_BEAM = 22 # Selects ged::BPBeam.
SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing.
HED = 24 # Selects ged::HED.
STAR = 25 # Selects ged::Star.


@unique
class EditCosts(Enum):
"""Selects the edit costs.
"""
CHEM_1 = 1 # Selects ged::CHEM1.
CHEM_2 = 2 # Selects ged::CHEM2.
CMU = 3 # Selects ged::CMU.
GREC_1 = 4 # Selects ged::GREC1.
GREC_2 = 5 # Selects ged::GREC2.
PROTEIN = 6 # Selects ged::Protein.
FINGERPRINT = 7 # Selects ged::Fingerprint.
LETTER = 8 # Selects ged::Letter.
LETTER2 = 9 # Selects ged:Letter2.
NON_SYMBOLIC = 10 # Selects ged:NonSymbolic.
CONSTANT = 11 # Selects ged::Constant.
@unique
class InitType(Enum):
"""@brief Selects the initialization type of the environment.
* @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment.
* Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of
* all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs.
* In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence
* compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies
* are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself.
"""
LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed.
EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed.
LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed.
EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed.
@unique
class AlgorithmState(Enum):
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.


class OptionsStringMap(object):
# Map of available computation methods between enum type and string.
GEDMethod = {
"BRANCH": Options.GEDMethod.BRANCH,
"BRANCH_FAST": Options.GEDMethod.BRANCH_FAST,
"BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT,
"BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM,
"BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT,
"PARTITION": Options.GEDMethod.PARTITION,
"HYBRID": Options.GEDMethod.HYBRID,
"RING": Options.GEDMethod.RING,
"ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED,
"WALKS": Options.GEDMethod.WALKS,
"IPFP": Options.GEDMethod.IPFP,
"BIPARTITE": Options.GEDMethod.BIPARTITE,
"SUBGRAPH": Options.GEDMethod.SUBGRAPH,
"NODE": Options.GEDMethod.NODE,
"RING_ML": Options.GEDMethod.RING_ML,
"BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML,
"REFINE": Options.GEDMethod.REFINE,
"BP_BEAM": Options.GEDMethod.BP_BEAM,
"SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING,
"HED": Options.GEDMethod.HED,
"STAR": Options.GEDMethod.STAR,
# ifdef GUROBI
"F1": Options.GEDMethod.F1,
"F2": Options.GEDMethod.F2,
"COMPACT_MIP": Options.GEDMethod.COMPACT_MIP,
"BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS
}

# Map of available edit cost functions between enum type and string.
EditCosts = {
"CHEM_1": Options.EditCosts.CHEM_1,
"CHEM_2": Options.EditCosts.CHEM_2,
"CMU": Options.EditCosts.CMU,
"GREC_1": Options.EditCosts.GREC_1,
"GREC_2": Options.EditCosts.GREC_2,
"LETTER": Options.EditCosts.LETTER,
"LETTER2": Options.EditCosts.LETTER2,
"NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC,
"FINGERPRINT": Options.EditCosts.FINGERPRINT,
"PROTEIN": Options.EditCosts.PROTEIN,
"CONSTANT": Options.EditCosts.CONSTANT
}
# Map of available initialization types of the environment between enum type and string.
InitType = {
"LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES,
"EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES,
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES,
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES
}

@unique
class AlgorithmState(Enum):
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.

+ 181
- 0
gklearn/ged/env/ged_data.py View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 15:05:01 2020

@author: ljia
"""
from gklearn.ged.env import Options, OptionsStringMap
from gklearn.ged.edit_costs import Constant
from gklearn.utils import SpecialLabel, dummy_node


class GEDData(object):

def __init__(self):
self._graphs = []
self._graph_names = []
self._graph_classes = []
self._num_graphs_without_shuffled_copies = 0
self._strings_to_internal_node_ids = []
self._internal_node_ids_to_strings = []
self._edit_cost = None
self._node_costs = None
self._edge_costs = None
self._node_labels = []
self._edge_labels = []
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES
self._delete_edit_cost = True
self._max_num_nodes = 0
self._max_num_edges = 0
def num_graphs(self):
"""
/*!
* @brief Returns the number of graphs.
* @return Number of graphs in the instance.
*/
"""
return len(self._graphs)
def shuffled_graph_copies_available(self):
"""
/*!
* @brief Checks if shuffled graph copies are available.
* @return Boolean @p true if shuffled graph copies are available.
*/
"""
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES)
def node_cost(self, label1, label2):
"""
/*!
* @brief Returns node relabeling, insertion, or deletion cost.
* @param[in] label1 First node label.
* @param[in] label2 Second node label.
* @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
* node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
* node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
* and 0 otherwise.
*/
"""
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
def edge_cost(self, label1, label2):
"""
/*!
* @brief Returns edge relabeling, insertion, or deletion cost.
* @param[in] label1 First edge label.
* @param[in] label2 Second edge label.
* @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
* edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
* edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
* and 0 otherwise.
*/
"""
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY:
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1])
if label2 == SpecialLabel.DUMMY:
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1])
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1])
def compute_induced_cost(self, g, h, node_map):
"""
/*!
* @brief Computes the edit cost between two graphs induced by a node map.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[in,out] node_map Node map whose induced edit cost is to be computed.
*/
"""
cost = 0
# collect node costs
for node in g.nodes():
image = node_map.image(node)
label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label'])
cost += self.node_cost(g.nodes[node]['label'], label2)
for node in h.nodes():
pre_image = node_map.pre_image(node)
if pre_image == dummy_node():
cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label'])
# collect edge costs
for (n1, n2) in g.edges():
image1 = node_map.image(n1)
image2 = node_map.image(n2)
label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY)
cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2)
for (n1, n2) in h.edges():
if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)):
cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label'])
node_map.set_induced_cost(cost)
def _set_edit_cost(self, edit_cost, edit_cost_constants):
if self._delete_edit_cost:
self._edit_cost = None
if isinstance(edit_cost, str):
edit_cost = OptionsStringMap.EditCosts[edit_cost]
if edit_cost == Options.EditCosts.CHEM_1:
if len(edit_cost_constants) == 4:
self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3])
elif len(edit_cost_constants) == 0:
self._edit_cost = CHEM1()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.LETTER:
if len(edit_cost_constants) == 3:
self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2])
elif len(edit_cost_constants) == 0:
self._edit_cost = Letter()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.LETTER2:
if len(edit_cost_constants) == 5:
self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4])
elif len(edit_cost_constants) == 0:
self._edit_cost = Letter2()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.NON_SYMBOLIC:
if len(edit_cost_constants) == 6:
self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
elif len(edit_cost_constants) == 0:
self._edit_cost = NonSymbolic()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.CONSTANT:
if len(edit_cost_constants) == 6:
self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
elif len(edit_cost_constants) == 0:
self._edit_cost = Constant()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
self._delete_edit_cost = True
def _eager_init(self):
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES)

+ 369
- 0
gklearn/ged/env/ged_env.py View File

@@ -0,0 +1,369 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 12:02:36 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.env import Options, OptionsStringMap
from gklearn.ged.env import GEDData


class GEDEnv(object):

def __init__(self):
self.__initialized = False
self.__new_graph_ids = []
self.__ged_data = GEDData()
# Variables needed for approximating ged_instance_.
self.__lower_bounds = {}
self.__upper_bounds = {}
self.__runtimes = {}
self.__node_maps = {}
self.__original_to_internal_node_ids = []
self.__internal_to_original_node_ids = []
self.__ged_method = None
def set_edit_cost(self, edit_cost, edit_cost_constants=[]):
"""
/*!
* @brief Sets the edit costs to one of the predefined edit costs.
* @param[in] edit_costs Select one of the predefined edit costs.
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs.
*/
"""
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants)
def add_graph(self, graph_name='', graph_class=''):
"""
/*!
* @brief Adds a new uninitialized graph to the environment. Call init() after calling this method.
* @param[in] graph_name The name of the added graph. Empty if not specified.
* @param[in] graph_class The class of the added graph. Empty if not specified.
* @return The ID of the newly added graph.
*/
"""
# @todo: graphs are not uninitialized.
self.__initialized = False
graph_id = self.__ged_data._num_graphs_without_shuffled_copies
self.__ged_data._num_graphs_without_shuffled_copies += 1
self.__new_graph_ids.append(graph_id)
self.__ged_data._graphs.append(nx.Graph())
self.__ged_data._graph_names.append(graph_name)
self.__ged_data._graph_classes.append(graph_class)
self.__original_to_internal_node_ids.append({})
self.__internal_to_original_node_ids.append({})
self.__ged_data._strings_to_internal_node_ids.append({})
self.__ged_data._internal_node_ids_to_strings.append({})
return graph_id
def add_node(self, graph_id, node_id, node_label):
"""
/*!
* @brief Adds a labeled node.
* @param[in] graph_id ID of graph that has been added to the environment.
* @param[in] node_id The user-specific ID of the vertex that has to be added.
* @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel.
*/
"""
# @todo: check ids.
self.__initialized = False
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id])
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label)
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id)
# @todo: node_label_to_id_
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True):
"""
/*!
* @brief Adds a labeled edge.
* @param[in] graph_id ID of graph that has been added to the environment.
* @param[in] tail The user-specific ID of the tail of the edge that has to be added.
* @param[in] head The user-specific ID of the head of the edge that has to be added.
* @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel.
* @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph.
*/
"""
# @todo: check everything.
self.__initialized = False
# @todo: check ignore_duplicates.
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label)
# @todo: edge_id and label_id, edge_label_to_id_.
def add_nx_graph(self, g, classe, ignore_duplicates=True) :
"""
Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges.
:param g: The graph to add (networkx graph)
:param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default
:type g: networkx.graph
:type ignore_duplicates: bool
:return: The ID of the newly added graphe
:rtype: size_t
.. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct.
"""
graph_id = self.add_graph(g.name, classe) # check if the graph name already exists.
for node in g.nodes: # @todo: if the keys of labels include int and str at the same time.
self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0])))
for edge in g.edges:
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates)
return graph_id
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False):
if isinstance(init_type, str):
init_type = OptionsStringMap.InitType[init_type]
# Throw an exception if no edit costs have been selected.
if self.__ged_data._edit_cost is None:
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().')
# Return if the environment is initialized.
if self.__initialized:
return
# Set initialization type.
self.__ged_data._init_type = init_type
# @todo: Construct shuffled graph copies if necessary.
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation).
# @todo: setup_adjacency_matrix, don't know if neccessary.
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs])
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs])
# Initialize cost matrices if necessary.
if self.__ged_data._eager_init():
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment.
# Mark environment as initialized.
self.__initialized = True
self.__new_graph_ids.clear()
def set_method(self, method, options=''):
"""
/*!
* @brief Sets the GEDMethod to be used by run_method().
* @param[in] method Select the method that is to be used.
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method.
*/
"""
del self.__ged_method
if isinstance(method, str):
method = OptionsStringMap.GEDMethod[method]

if method == Options.GEDMethod.BRANCH:
self.__ged_method = Branch(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_TIGHT:
self.__ged_method = BranchTight(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_UNIFORM:
self.__ged_method = BranchUniform(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_COMPACT:
self.__ged_method = BranchCompact(self.__ged_data)
elif method == Options.GEDMethod.PARTITION:
self.__ged_method = Partition(self.__ged_data)
elif method == Options.GEDMethod.HYBRID:
self.__ged_method = Hybrid(self.__ged_data)
elif method == Options.GEDMethod.RING:
self.__ged_method = Ring(self.__ged_data)
elif method == Options.GEDMethod.ANCHOR_AWARE_GED:
self.__ged_method = AnchorAwareGED(self.__ged_data)
elif method == Options.GEDMethod.WALKS:
self.__ged_method = Walks(self.__ged_data)
elif method == Options.GEDMethod.IPFP:
self.__ged_method = IPFP(self.__ged_data)
elif method == Options.GEDMethod.BIPARTITE:
from gklearn.ged.methods import Bipartite
self.__ged_method = Bipartite(self.__ged_data)
elif method == Options.GEDMethod.SUBGRAPH:
self.__ged_method = Subgraph(self.__ged_data)
elif method == Options.GEDMethod.NODE:
self.__ged_method = Node(self.__ged_data)
elif method == Options.GEDMethod.RING_ML:
self.__ged_method = RingML(self.__ged_data)
elif method == Options.GEDMethod.BIPARTITE_ML:
self.__ged_method = BipartiteML(self.__ged_data)
elif method == Options.GEDMethod.REFINE:
self.__ged_method = Refine(self.__ged_data)
elif method == Options.GEDMethod.BP_BEAM:
self.__ged_method = BPBeam(self.__ged_data)
elif method == Options.GEDMethod.SIMULATED_ANNEALING:
self.__ged_method = SimulatedAnnealing(self.__ged_data)
elif method == Options.GEDMethod.HED:
self.__ged_method = HED(self.__ged_data)
elif method == Options.GEDMethod.STAR:
self.__ged_method = STAR(self.__ged_data)
# #ifdef GUROBI
elif method == Options.GEDMethod.F1:
self.__ged_method = F1(self.__ged_data)
elif method == Options.GEDMethod.F2:
self.__ged_method = F2(self.__ged_data)
elif method == Options.GEDMethod.COMPACT_MIP:
self.__ged_method = CompactMIP(self.__ged_data)
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS:
self.__ged_method = BLPNoEdgeLabels(self.__ged_data)

self.__ged_method.set_options(options)
def run_method(self, g_id, h_id):
"""
/*!
* @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
*/
"""
if g_id >= self.__ged_data.num_graphs():
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.')
if h_id >= self.__ged_data.num_graphs():
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.')
if not self.__initialized:
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.')
if self.__ged_method is None:
raise Exception('No method has been set. Call set_method() before calling run().')
# Call selected GEDMethod and store results.
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id):
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle?
else:
self.__ged_method.run(g_id, h_id)
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound()
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound()
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime()
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map()
def init_method(self):
"""Initializes the method specified by call to set_method().
"""
if not self.__initialized:
raise Exception('The environment is uninitialized. Call init() before calling init_method().')
if self.__ged_method is None:
raise Exception('No method has been set. Call set_method() before calling init_method().')
self.__ged_method.init()
def get_upper_bound(self, g_id, h_id):
"""
/*!
* @brief Returns upper bound for edit distance between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__upper_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__upper_bounds[(g_id, h_id)]
def get_lower_bound(self, g_id, h_id):
"""
/*!
* @brief Returns lower bound for edit distance between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__lower_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__lower_bounds[(g_id, h_id)]
def get_runtime(self, g_id, h_id):
"""
/*!
* @brief Returns runtime.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__runtimes:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').')
return self.__runtimes[(g_id, h_id)]

def get_init_time(self):
"""
/*!
* @brief Returns initialization time.
* @return Runtime of the last call to init_method().
*/
"""
return self.__ged_method.get_init_time()


def get_node_map(self, g_id, h_id):
"""
/*!
* @brief Returns node map between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__node_maps:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').')
return self.__node_maps[(g_id, h_id)]

def get_forward_map(self, g_id, h_id) :
"""
Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs.
:param g: The Id of the first compared graph
:param h: The Id of the second compared graph
:type g: size_t
:type h: size_t
:return: The forward map to the adjacence matrix between nodes of the two graphs
:rtype: list[npy_uint32]
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function.
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work !
"""
return self.get_node_map(g_id, h_id).forward_map
def get_backward_map(self, g_id, h_id) :
"""
Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs.
:param g: The Id of the first compared graph
:param h: The Id of the second compared graph
:type g: size_t
:type h: size_t
:return: The backward map to the adjacence matrix between nodes of the two graphs
:rtype: list[npy_uint32]
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function.
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work !
"""
return self.get_node_map(g_id, h_id).backward_map
def get_all_graph_ids(self):
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)]

+ 20
- 8
gklearn/ged/env/node_map.py View File

@@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020
@author: ljia
"""
import numpy as np
from gklearn.utils import dummy_node, undefined_node


class NodeMap(object):
def __init__(self, num_nodes_g, num_nodes_h):
self.__forward_map = [np.inf] * num_nodes_g
self.__backward_map = [np.inf] * num_nodes_h
self.__forward_map = [undefined_node()] * num_nodes_g
self.__backward_map = [undefined_node()] * num_nodes_h
self.__induced_cost = np.inf
def clear(self):
"""
/*!
* @brief Clears the node map.
*/
"""
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))]
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))]
def num_source_nodes(self):
return len(self.__forward_map)
@@ -28,7 +40,7 @@ class NodeMap(object):
return self.__forward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return np.inf
return undefined_node()
def pre_image(self, node):
@@ -36,28 +48,28 @@ class NodeMap(object):
return self.__backward_map[node]
else:
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.')
return np.inf
return undefined_node()
def as_relation(self, relation):
relation.clear()
for i in range(0, len(self.__forward_map)):
k = self.__forward_map[i]
if k != np.inf:
if k != undefined_node():
relation.append(tuple((i, k)))
for k in range(0, len(self.__backward_map)):
i = self.__backward_map[k]
if i == np.inf:
if i == dummy_node():
relation.append(tuple((i, k)))
def add_assignment(self, i, k):
if i != np.inf:
if i != dummy_node():
if i < len(self.__forward_map):
self.__forward_map[i] = k
else:
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.')
if k != np.inf:
if k != dummy_node():
if k < len(self.__backward_map):
self.__backward_map[k] = i
else:


+ 3
- 0
gklearn/ged/methods/__init__.py View File

@@ -0,0 +1,3 @@
from gklearn.ged.methods.ged_method import GEDMethod
from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod
from gklearn.ged.methods.bipartite import Bipartite

+ 117
- 0
gklearn/ged/methods/bipartite.py View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 16:09:29 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.methods import LSAPEBasedMethod
from gklearn.ged.util import LSAPESolver
from gklearn.utils import SpecialLabel


class Bipartite(LSAPEBasedMethod):
def __init__(self, ged_data):
super().__init__(ged_data)
self._compute_lower_bound = False
###########################################################################
# Inherited member functions from LSAPEBasedMethod.
###########################################################################
def _lsape_populate_instance(self, g, h, master_problem):
# #ifdef _OPENMP
for row_in_master in range(0, nx.number_of_nodes(g)):
for col_in_master in range(0, nx.number_of_nodes(h)):
master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master)
for row_in_master in range(0, nx.number_of_nodes(g)):
master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master)
for col_in_master in range(0, nx.number_of_nodes(h)):
master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master)

# for row_in_master in range(0, master_problem.shape[0]):
# for col_in_master in range(0, master_problem.shape[1]):
# if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h):
# master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master)
# elif row_in_master < nx.number_of_nodes(g):
# master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master)
# elif col_in_master < nx.number_of_nodes(h):
# master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master)


###########################################################################
# Helper member functions.
###########################################################################


def _compute_substitution_cost(self, g, h, u, v):
# Collect node substitution costs.
cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label'])
# Initialize subproblem.
d1, d2 = g.degree[u], h.degree[v]
subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf
subproblem[d1:, d2:] = 0
# subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1))
# Collect edge deletion costs.
i = 0 # @todo: should directed graphs be considered?
for label in g[u].values(): # all u's neighbor
subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
# subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
i += 1
# Collect edge insertion costs.
i = 0 # @todo: should directed graphs be considered?
for label in h[v].values(): # all u's neighbor
subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
# subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
i += 1
# Collect edge relabelling costs.
i = 0
for label1 in g[u].values():
j = 0
for label2 in h[v].values():
subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label'])
j += 1
i += 1
# Solve subproblem.
subproblem_solver = LSAPESolver(subproblem)
subproblem_solver.set_model(self._lsape_model)
subproblem_solver.solve()
# Update and return overall substitution cost.
cost += subproblem_solver.minimal_cost()
return cost
def _compute_deletion_cost(self, g, v):
# Collect node deletion cost.
cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY)
# Collect edge deletion costs.
for label in g[v].values():
cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
# Return overall deletion cost.
return cost
def _compute_insertion_cost(self, g, v):
# Collect node insertion cost.
cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label'])
# Collect edge insertion costs.
for label in g[v].values():
cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
# Return overall insertion cost.
return cost

+ 195
- 0
gklearn/ged/methods/ged_method.py View File

@@ -0,0 +1,195 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 15:52:35 2020

@author: ljia
"""
import numpy as np
import time
import networkx as nx


class GEDMethod(object):
def __init__(self, ged_data):
self._initialized = False
self._ged_data = ged_data
self._options = None
self._lower_bound = 0
self._upper_bound = np.inf
self._node_map = [0, 0] # @todo
self._runtime = None
self._init_time = None
def init(self):
"""Initializes the method with options specified by set_options().
"""
start = time.time()
self._ged_init()
end = time.time()
self._init_time = end - start
self._initialized = True
def set_options(self, options):
"""
/*!
* @brief Sets the options of the method.
* @param[in] options String of the form <tt>[--@<option@> @<arg@>] [...]</tt>, where @p option contains neither spaces nor single quotes,
* and @p arg contains neither spaces nor single quotes or is of the form <tt>'[--@<sub-option@> @<sub-arg@>] [...]'</tt>,
* where both @p sub-option and @p sub-arg contain neither spaces nor single quotes.
*/
"""
self._ged_set_default_options()
for key, val in options.items():
if not self._ged_parse_option(key, val):
raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented.
self._initialized = False
def run(self, g_id, h_id):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g_id ID of input graph.
* @param[in] h_id ID of input graph.
*/
"""
start = time.time()
result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id])
end = time.time()
self._lower_bound = result['lower_bound']
self._upper_bound = result['upper_bound']
if len(result['node_maps']) > 0:
self._node_map = result['node_maps'][0]
self._runtime = end - start
def run_as_util(self, g, h):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
*/
"""
# Compute optimal solution and return if at least one of the two graphs is empty.
if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0:
print('This is not implemented.')
pass # @todo:
# Run the method.
return self._ged_run(g, h)
def get_upper_bound(self):
"""
/*!
* @brief Returns an upper bound.
* @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound.
*/
"""
return self._upper_bound
def get_lower_bound(self):
"""
/*!
* @brief Returns a lower bound.
* @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound.
*/
"""
return self._lower_bound
def get_runtime(self):
"""
/*!
* @brief Returns the runtime.
* @return Runtime of last call to run() in seconds.
*/
"""
return self._runtime

def get_init_time(self):
"""
/*!
* @brief Returns the initialization time.
* @return Runtime of last call to init() in seconds.
*/
"""
return self._init_time


def get_node_map(self):
"""
/*!
* @brief Returns a graph matching.
* @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching.
*/
"""
return self._node_map
def _ged_init(self):
"""
/*!
* @brief Initializes the method.
* @note Must be overridden by derived classes that require initialization.
*/
"""
pass
def _ged_parse_option(self, option, arg):
"""
/*!
* @brief Parses one option.
* @param[in] option The name of the option.
* @param[in] arg The argument of the option.
* @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise.
* @note Must be overridden by derived classes that have options.
*/
"""
return False
def _ged_run(self, g, h):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
* @note Must be overridden by derived classes.
*/
"""
return {}
def _ged_valid_options_string(self):
"""
/*!
* @brief Returns string of all valid options.
* @return String of the form <tt>[--@<option@> @<arg@>] [...]</tt>.
* @note Must be overridden by derived classes that have options.
*/
"""
return ''
def _ged_set_default_options(self):
"""
/*!
* @brief Sets all options to default values.
* @note Must be overridden by derived classes that have options.
*/
"""
pass

+ 254
- 0
gklearn/ged/methods/lsape_based_method.py View File

@@ -0,0 +1,254 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 16:01:24 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.methods import GEDMethod
from gklearn.ged.util import LSAPESolver, misc
from gklearn.ged.env import NodeMap

class LSAPEBasedMethod(GEDMethod):
def __init__(self, ged_data):
super().__init__(ged_data)
self._lsape_model = None # @todo: LSAPESolver::ECBP
self._greedy_method = None # @todo: LSAPESolver::BASIC
self._compute_lower_bound = True
self._solve_optimally = True
self._num_threads = 1
self._centrality_method = 'NODE' # @todo
self._centrality_weight = 0.7
self._centralities = {}
self._max_num_solutions = 1
def populate_instance_and_run_as_util(self, g, h): #, lsape_instance):
"""
/*!
* @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
* @param[out] lsape_instance LSAPE instance.
*/
"""
result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf}
# Populate the LSAPE instance and set up the solver.
nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf
# lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1))
self.populate_instance(g, h, lsape_instance)
# nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
# lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf
# lsape_instance_new[nb1:, nb2:] = 0
# lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2]
# for i in range(nb1): # all u's neighbor
# lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2]
# for i in range(nb2): # all u's neighbor
# lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i]
# lsape_solver = LSAPESolver(lsape_instance_new)
lsape_solver = LSAPESolver(lsape_instance)
# Solve the LSAPE instance.
if self._solve_optimally:
lsape_solver.set_model(self._lsape_model)
else:
lsape_solver.set_greedy_method(self._greedy_method)
lsape_solver.solve(self._max_num_solutions)
# Compute and store lower and upper bound.
if self._compute_lower_bound and self._solve_optimally:
result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test
for solution_id in range(0, lsape_solver.num_solutions()):
result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h)))
misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id)
self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1])
# Add centralities and reoptimize.
if self._centrality_weight > 0 and self._centrality_method != 'NODE':
print('This is not implemented.')
pass # @todo
# Sort the node maps and set the upper bound.
if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions:
print('This is not implemented.') # @todo:
pass
if len(result['node_maps']) == 0:
result['upper_bound'] = np.inf
else:
result['upper_bound'] = result['node_maps'][0].induced_cost()
return result
def populate_instance(self, g, h, lsape_instance):
"""
/*!
* @brief Populates the LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] lsape_instance LSAPE instance.
*/
"""
if not self._initialized:
pass
# @todo: if (not this->initialized_) {
self._lsape_populate_instance(g, h, lsape_instance)
lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0
# lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0
###########################################################################
# Member functions inherited from GEDMethod.
###########################################################################
def _ged_init(self):
self._lsape_pre_graph_init(False)
for graph in self._ged_data._graphs:
self._init_graph(graph)
self._lsape_init()
def _ged_run(self, g, h):
# lsape_instance = np.empty((0, 0))
result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance)
return result
def _ged_parse_option(self, option, arg):
is_valid_option = False
if option == 'threads': # @todo: try.. catch...
self._num_threads = arg
is_valid_option = True
elif option == 'lsape_model':
self._lsape_model = arg # @todo
is_valid_option = True
elif option == 'greedy_method':
self._greedy_method = arg # @todo
is_valid_option = True
elif option == 'optimal':
self._solve_optimally = arg # @todo
is_valid_option = True
elif option == 'centrality_method':
self._centrality_method = arg # @todo
is_valid_option = True
elif option == 'centrality_weight':
self._centrality_weight = arg # @todo
is_valid_option = True
elif option == 'max_num_solutions':
if arg == 'ALL':
self._max_num_solutions = -1
else:
self._max_num_solutions = arg # @todo
is_valid_option = True
is_valid_option = is_valid_option or self._lsape_parse_option(option, arg)
is_valid_option = True # @todo: this is not in the C++ code.
return is_valid_option
def _ged_set_default_options(self):
self._lsape_model = None # @todo: LSAPESolver::ECBP
self._greedy_method = None # @todo: LSAPESolver::BASIC
self._solve_optimally = True
self._num_threads = 1
self._centrality_method = 'NODE' # @todo
self._centrality_weight = 0.7
self._max_num_solutions = 1
###########################################################################
# Private helper member functions.
###########################################################################
def _init_graph(self, graph):
if self._centrality_method != 'NODE':
self._init_centralities(graph) # @todo
self._lsape_init_graph(graph)
###########################################################################
# Virtual member functions to be overridden by derived classes.
###########################################################################
def _lsape_init(self):
"""
/*!
* @brief Initializes the method after initializing the global variables for the graphs.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization.
*/
"""
pass
def _lsape_parse_option(self, option, arg):
"""
/*!
* @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
* @param[in] option The name of the option.
* @param[in] arg The argument of the option.
* @return Returns true if @p option is a valid option name for the method and false otherwise.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
*/
"""
return False
def _lsape_set_default_options(self):
"""
/*!
* @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
*/
"""
pass
def _lsape_populate_instance(self, g, h, lsape_instance):
"""
/*!
* @brief Populates the LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod.
*/
"""
pass
def _lsape_init_graph(self, graph):
"""
/*!
* @brief Initializes global variables for one graph.
* @param[in] graph Graph for which the global variables have to be initialized.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables.
*/
"""
pass
def _lsape_pre_graph_init(self, called_at_runtime):
"""
/*!
* @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs.
* @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization.
* @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs.
*/
"""
pass

+ 2
- 0
gklearn/ged/util/__init__.py View File

@@ -1 +1,3 @@
from gklearn.ged.util.lsape_solver import LSAPESolver
from gklearn.ged.util.util import compute_geds, ged_options_to_string
from gklearn.ged.util.util import compute_geds_cml

+ 121
- 0
gklearn/ged/util/lsape_solver.py View File

@@ -0,0 +1,121 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 22 15:37:36 2020

@author: ljia
"""
import numpy as np
from scipy.optimize import linear_sum_assignment

class LSAPESolver(object):
def __init__(self, cost_matrix=None):
"""
/*!
* @brief Constructs solver for LSAPE problem instance.
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved.
*/
"""
self.__cost_matrix = cost_matrix
self.__model = 'ECBP'
self.__greedy_method = 'BASIC'
self.__solve_optimally = True
self.__minimal_cost = 0
self.__row_to_col_assignments = []
self.__col_to_row_assignments = []
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
def clear_solution(self):
"""Clears a previously computed solution.
"""
self.__minimal_cost = 0
self.__row_to_col_assignments.clear()
self.__col_to_row_assignments.clear()
self.__row_to_col_assignments.append([]) # @todo
self.__col_to_row_assignments.append([])
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
def set_model(self, model):
"""
/*!
* @brief Makes the solver use a specific model for optimal solving.
* @param[in] model The model that should be used.
*/
"""
self.__solve_optimally = True
self.__model = model
def solve(self, num_solutions=1):
"""
/*!
* @brief Solves the LSAPE problem instance.
* @param[in] num_solutions The maximal number of solutions that should be computed.
*/
"""
self.clear_solution()
if self.__solve_optimally:
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self.__row_to_col_assignments[0] = col_id
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id
self.__compute_cost_from_assignments()
if num_solutions > 1:
pass # @todo:
else:
print('here is non op.')
pass # @todo: greedy.
# self.__

def minimal_cost(self):
"""
/*!
* @brief Returns the cost of the computed solutions.
* @return Cost of computed solutions.
*/
"""
return self.__minimal_cost
def get_assigned_col(self, row, solution_id=0):
"""
/*!
* @brief Returns the assigned column.
* @param[in] row Row whose assigned column should be returned.
* @param[in] solution_id ID of the solution where the assignment should be looked up.
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column.
*/
"""
return self.__row_to_col_assignments[solution_id][row]
def get_assigned_row(self, col, solution_id=0):
"""
/*!
* @brief Returns the assigned row.
* @param[in] col Column whose assigned row should be returned.
* @param[in] solution_id ID of the solution where the assignment should be looked up.
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row.
*/
"""
return self.__col_to_row_assignments[solution_id][col]
def num_solutions(self):
"""
/*!
* @brief Returns the number of solutions.
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions.
*/
"""
return len(self.__row_to_col_assignments)


def __compute_cost_from_assignments(self): # @todo
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]])

+ 21
- 0
gklearn/ged/util/misc.py View File

@@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020

@author: ljia
"""
from gklearn.utils import dummy_node


def construct_node_map_from_solver(solver, node_map, solution_id):
node_map.clear()
num_nodes_g = node_map.num_source_nodes()
num_nodes_h = node_map.num_target_nodes()
# add deletions and substitutions
for row in range(0, num_nodes_g):
col = solver.get_assigned_col(row, solution_id)
if col >= num_nodes_h:
node_map.add_assignment(row, dummy_node())
else:
node_map.add_assignment(row, col)
# insertions.
for col in range(0, num_nodes_h):
if solver.get_assigned_row(col, solution_id) >= num_nodes_g:
node_map.add_assignment(dummy_node(), col)

def options_string_to_options_map(options_string):
"""Transforms an options string into an options map.


+ 78
- 1
gklearn/ged/util/util.py View File

@@ -13,6 +13,7 @@ from functools import partial
import sys
from tqdm import tqdm
import networkx as nx
from gklearn.ged.env import GEDEnv
from gklearn.gedlib import librariesImport, gedlibpy


@@ -22,7 +23,7 @@ def compute_ged(g1, g2, options):
ged_env.add_nx_graph(g1, '')
ged_env.add_nx_graph(g2, '')
listID = ged_env.get_all_graph_ids()
ged_env.init()
ged_env.init(init_type=options['init_option'])
ged_env.set_method(options['method'], ged_options_to_string(options))
ged_env.init_method()

@@ -46,6 +47,82 @@ def compute_ged(g1, g2, options):
return dis, pi_forward, pi_backward


def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
# initialize ged env.
ged_env = GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
for g in graphs:
ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
ged_env.init(init_type=options['init_option'])
if parallel:
options['threads'] = 1
ged_env.set_method(options['method'], options)
ged_env.init_method()

# compute ged.
neo_options = {'edit_cost': options['edit_cost'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
ged_vec = [0 for i in range(len_itr)]
n_edit_operations = [0 for i in range(len_itr)]
itr = combinations(range(0, len(graphs)), 2)
n_jobs = multiprocessing.cpu_count()
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
global G_graphs, G_ged_env, G_listID
G_graphs = graphs_toshare
G_ged_env = ged_env_toshare
G_listID = listID_toshare
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
if verbose:
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
desc='computing GEDs', file=sys.stdout)
else:
iterator = pool.imap_unordered(do_partial, itr, chunksize)
# iterator = pool.imap_unordered(do_partial, itr, chunksize)
for i, j, dis, n_eo_tmp in iterator:
idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
ged_vec[idx_itr] = dis
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_edit_operations[idx_itr] = n_eo_tmp
# print('\n-------------------------------------------')
# print(i, j, idx_itr, dis)
pool.close()
pool.join()
else:
ged_vec = []
n_edit_operations = []
if verbose:
iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout)
else:
iterator = range(len(graphs))
for i in iterator:
# for i in range(len(graphs)):
for j in range(i + 1, len(graphs)):
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort:
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
else:
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i])
ged_vec.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp)

return ged_vec, ged_mat, n_edit_operations



def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
# initialize ged env.
ged_env = gedlibpy.GEDEnv()


+ 1
- 0
gklearn/preimage/__init__.py View File

@@ -13,5 +13,6 @@ __date__ = "March 2020"
from gklearn.preimage.preimage_generator import PreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class

+ 57
- 0
gklearn/tests/test_ged_env.py View File

@@ -0,0 +1,57 @@
"""Tests of GEDEnv.
"""


def test_GEDEnv():
"""Test GEDEnv.
"""
"""**1. Get dataset.**"""

from gklearn.utils import Dataset
# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'
# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
"""**2. Compute graph edit distance.**"""
try:
from gklearn.ged.env import GEDEnv
ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
ged_env.add_nx_graph(graph1, '') # add graph1
ged_env.add_nx_graph(graph2, '') # add graph2
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.
ged_env.run_method(listID[0], listID[1]) # run.
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
import networkx as nx
assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2)
except Exception as exception:
assert False, exception


if __name__ == "__main__":
test_GEDEnv()

+ 4
- 1
gklearn/tests/test_median_preimage_generator.py View File

@@ -68,4 +68,7 @@ def test_median_preimage_generator():
print('\n-------------------------------------')
print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4))
try:
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4))
except Exception as exception:
assert False, exception

+ 1
- 1
gklearn/utils/__init__.py View File

@@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset
from gklearn.utils.timer import Timer
from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import compute_gram_matrices_by_class
from gklearn.utils.utils import SpecialLabel
from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge
from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix
from gklearn.utils.trie import Trie
from gklearn.utils.knn import knn_cv, knn_classification

+ 41
- 9
gklearn/utils/utils.py View File

@@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names):
for ed, attrs in G.edges(data=True):
attributes.append(tuple(attrs[aname] for aname in attr_names))
return attributes


@unique
class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = 1 # The dummy label.
# DUMMY = auto # enum.auto does not exist in Python 3.5.
def normalize_gram_matrix(gram_matrix):
@@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix):
dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean
return dis_mat, dis_max, dis_min, dis_mean


def dummy_node():
"""
/*!
* @brief Returns a dummy node.
* @return ID of dummy node.
*/
"""
return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why.


def undefined_node():
"""
/*!
* @brief Returns an undefined node.
* @return ID of undefined node.
*/

"""
return np.inf


def dummy_edge():
"""
/*!
* @brief Returns a dummy edge.
* @return ID of dummy edge.
*/

"""
return np.inf


@unique
class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = 1 # The dummy label.
# DUMMY = auto # enum.auto does not exist in Python 3.5.

Loading…
Cancel
Save