Browse Source

Add ged module.

v0.2.x
jajupmochi 5 years ago
parent
commit
5eb69de7ea
23 changed files with 1816 additions and 28 deletions
  1. +3
    -1
      .coveragerc
  2. +58
    -0
      gklearn/examples/compute_graph_edit_distance.py
  3. +2
    -0
      gklearn/ged/edit_costs/__init__.py
  4. +50
    -0
      gklearn/ged/edit_costs/constant.py
  5. +88
    -0
      gklearn/ged/edit_costs/edit_cost.py
  6. +3
    -1
      gklearn/ged/env/__init__.py
  7. +147
    -6
      gklearn/ged/env/common_types.py
  8. +181
    -0
      gklearn/ged/env/ged_data.py
  9. +369
    -0
      gklearn/ged/env/ged_env.py
  10. +20
    -8
      gklearn/ged/env/node_map.py
  11. +3
    -0
      gklearn/ged/methods/__init__.py
  12. +117
    -0
      gklearn/ged/methods/bipartite.py
  13. +195
    -0
      gklearn/ged/methods/ged_method.py
  14. +254
    -0
      gklearn/ged/methods/lsape_based_method.py
  15. +2
    -0
      gklearn/ged/util/__init__.py
  16. +121
    -0
      gklearn/ged/util/lsape_solver.py
  17. +21
    -0
      gklearn/ged/util/misc.py
  18. +78
    -1
      gklearn/ged/util/util.py
  19. +1
    -0
      gklearn/preimage/__init__.py
  20. +57
    -0
      gklearn/tests/test_ged_env.py
  21. +4
    -1
      gklearn/tests/test_median_preimage_generator.py
  22. +1
    -1
      gklearn/utils/__init__.py
  23. +41
    -9
      gklearn/utils/utils.py

+ 3
- 1
.coveragerc View File

@@ -1,2 +1,4 @@
[run] [run]
omit = gklearn/tests/*
omit =
gklearn/tests/*
gklearn/examples/*

+ 58
- 0
gklearn/examples/compute_graph_edit_distance.py View File

@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
"""compute_graph_edit_distance.ipynb

Automatically generated by Colaboratory.

Original file is located at
https://colab.research.google.com/drive/1Wfgn7WVuyOQQgwOvdUQBz0BzEVdp0YM3

**This script demonstrates how to compute a graph edit distance.**
---

**0. Install `graphkit-learn`.**
"""

"""**1. Get dataset.**"""

from gklearn.utils import Dataset

# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'

# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
print(graph1, graph2)

"""**2. Compute graph edit distance.**"""

from gklearn.ged.env import GEDEnv


ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
ged_env.add_nx_graph(graph1, '') # add graph1
ged_env.add_nx_graph(graph2, '') # add graph2
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.

ged_env.run_method(listID[0], listID[1]) # run.

pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
print(pi_forward)
print(pi_backward)
print(dis)

+ 2
- 0
gklearn/ged/edit_costs/__init__.py View File

@@ -0,0 +1,2 @@
from gklearn.ged.edit_costs.edit_cost import EditCost
from gklearn.ged.edit_costs.constant import Constant

+ 50
- 0
gklearn/ged/edit_costs/constant.py View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 17:52:23 2020

@author: ljia
"""
from gklearn.ged.edit_costs import EditCost


class Constant(EditCost):
"""Implements constant edit cost functions.
"""

def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1):
self.__node_ins_cost = node_ins_cost
self.__node_del_cost = node_del_cost
self.__node_rel_cost = node_rel_cost
self.__edge_ins_cost = edge_ins_cost
self.__edge_del_cost = edge_del_cost
self.__edge_rel_cost = edge_rel_cost
def node_ins_cost_fun(self, node_label):
return self.__node_ins_cost
def node_del_cost_fun(self, node_label):
return self.__node_del_cost
def node_rel_cost_fun(self, node_label_1, node_label_2):
if node_label_1 != node_label_2:
return self.__node_rel_cost
return 0
def edge_ins_cost_fun(self, edge_label):
return self.__edge_ins_cost
def edge_del_cost_fun(self, edge_label):
return self.__edge_del_cost
def edge_rel_cost_fun(self, edge_label_1, edge_label_2):
if edge_label_1 != edge_label_2:
return self.__edge_rel_cost
return 0

+ 88
- 0
gklearn/ged/edit_costs/edit_cost.py View File

@@ -0,0 +1,88 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 17:49:24 2020

@author: ljia
"""


class EditCost(object):
def __init__(self):
pass
def node_ins_cost_fun(self, node_label):
"""
/*!
* @brief Node insertions cost function.
* @param[in] node_label A node label.
* @return The cost of inserting a node with label @p node_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def node_del_cost_fun(self, node_label):
"""
/*!
* @brief Node deletion cost function.
* @param[in] node_label A node label.
* @return The cost of deleting a node with label @p node_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def node_rel_cost_fun(self, node_label_1, node_label_2):
"""
/*!
* @brief Node relabeling cost function.
* @param[in] node_label_1 A node label.
* @param[in] node_label_2 A node label.
* @return The cost of changing a node's label from @p node_label_1 to @p node_label_2.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_ins_cost_fun(self, edge_label):
"""
/*!
* @brief Edge insertion cost function.
* @param[in] edge_label An edge label.
* @return The cost of inserting an edge with label @p edge_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_del_cost_fun(self, edge_label):
"""
/*!
* @brief Edge deletion cost function.
* @param[in] edge_label An edge label.
* @return The cost of deleting an edge with label @p edge_label.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

def edge_rel_cost_fun(self, edge_label_1, edge_label_2):
"""
/*!
* @brief Edge relabeling cost function.
* @param[in] edge_label_1 An edge label.
* @param[in] edge_label_2 An edge label.
* @return The cost of changing an edge's label from @p edge_label_1 to @p edge_label_2.
* @note Must be implemented by derived classes of ged::EditCosts.
*/
"""
return 0

+ 3
- 1
gklearn/ged/env/__init__.py View File

@@ -1,2 +1,4 @@
from gklearn.ged.env.common_types import AlgorithmState
from gklearn.ged.env.common_types import Options, OptionsStringMap, AlgorithmState
from gklearn.ged.env.ged_data import GEDData
from gklearn.ged.env.ged_env import GEDEnv
from gklearn.ged.env.node_map import NodeMap from gklearn.ged.env.node_map import NodeMap

+ 147
- 6
gklearn/ged/env/common_types.py View File

@@ -8,11 +8,152 @@ Created on Thu Mar 19 18:17:38 2020


from enum import Enum, unique from enum import Enum, unique



class Options(object):
"""Contains enums for options employed by ged::GEDEnv.
"""


@unique
class GEDMethod(Enum):
"""Selects the method.
"""
# @todo: what is this? #ifdef GUROBI
F1 = 1 # Selects ged::F1.
F2 = 2 # Selects ged::F2.
COMPACT_MIP = 3 # Selects ged::CompactMIP.
BLP_NO_EDGE_LABELS = 4 # Selects ged::BLPNoEdgeLabels.
#endif /* GUROBI */
BRANCH = 5 # Selects ged::Branch.
BRANCH_FAST = 6 # Selects ged::BranchFast.
BRANCH_TIGHT = 7 # Selects ged::BranchTight.
BRANCH_UNIFORM = 8 # Selects ged::BranchUniform.
BRANCH_COMPACT = 9 # Selects ged::BranchCompact.
PARTITION = 10 # Selects ged::Partition.
HYBRID = 11 # Selects ged::Hybrid.
RING = 12 # Selects ged::Ring.
ANCHOR_AWARE_GED = 13 # Selects ged::AnchorAwareGED.
WALKS = 14 # Selects ged::Walks.
IPFP = 15 # Selects ged::IPFP
BIPARTITE = 16 # Selects ged::Bipartite.
SUBGRAPH = 17 # Selects ged::Subgraph.
NODE = 18 # Selects ged::Node.
RING_ML = 19 # Selects ged::RingML.
BIPARTITE_ML = 20 # Selects ged::BipartiteML.
REFINE = 21 # Selects ged::Refine.
BP_BEAM = 22 # Selects ged::BPBeam.
SIMULATED_ANNEALING = 23 # Selects ged::SimulatedAnnealing.
HED = 24 # Selects ged::HED.
STAR = 25 # Selects ged::Star.


@unique
class EditCosts(Enum):
"""Selects the edit costs.
"""
CHEM_1 = 1 # Selects ged::CHEM1.
CHEM_2 = 2 # Selects ged::CHEM2.
CMU = 3 # Selects ged::CMU.
GREC_1 = 4 # Selects ged::GREC1.
GREC_2 = 5 # Selects ged::GREC2.
PROTEIN = 6 # Selects ged::Protein.
FINGERPRINT = 7 # Selects ged::Fingerprint.
LETTER = 8 # Selects ged::Letter.
LETTER2 = 9 # Selects ged:Letter2.
NON_SYMBOLIC = 10 # Selects ged:NonSymbolic.
CONSTANT = 11 # Selects ged::Constant.
@unique
class InitType(Enum):
"""@brief Selects the initialization type of the environment.
* @details If eager initialization is selected, all edit costs are pre-computed when initializing the environment.
* Otherwise, they are computed at runtime. If initialization with shuffled copies is selected, shuffled copies of
* all graphs are created. These copies are used when calling ged::GEDEnv::run_method() with two identical graph IDs.
* In this case, one of the IDs is internally replaced by the ID of the shuffled copy and the graph is hence
* compared to an isomorphic but non-identical graph. If initialization without shuffled copies is selected, no shuffled copies
* are created and calling ged::GEDEnv::run_method() with two identical graph IDs amounts to comparing a graph to itself.
"""
LAZY_WITHOUT_SHUFFLED_COPIES = 1 # Lazy initialization, no shuffled graph copies are constructed.
EAGER_WITHOUT_SHUFFLED_COPIES = 2 # Eager initialization, no shuffled graph copies are constructed.
LAZY_WITH_SHUFFLED_COPIES = 3 # Lazy initialization, shuffled graph copies are constructed.
EAGER_WITH_SHUFFLED_COPIES = 4 # Eager initialization, shuffled graph copies are constructed.
@unique
class AlgorithmState(Enum):
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.


class OptionsStringMap(object):
# Map of available computation methods between enum type and string.
GEDMethod = {
"BRANCH": Options.GEDMethod.BRANCH,
"BRANCH_FAST": Options.GEDMethod.BRANCH_FAST,
"BRANCH_TIGHT": Options.GEDMethod.BRANCH_TIGHT,
"BRANCH_UNIFORM": Options.GEDMethod.BRANCH_UNIFORM,
"BRANCH_COMPACT": Options.GEDMethod.BRANCH_COMPACT,
"PARTITION": Options.GEDMethod.PARTITION,
"HYBRID": Options.GEDMethod.HYBRID,
"RING": Options.GEDMethod.RING,
"ANCHOR_AWARE_GED": Options.GEDMethod.ANCHOR_AWARE_GED,
"WALKS": Options.GEDMethod.WALKS,
"IPFP": Options.GEDMethod.IPFP,
"BIPARTITE": Options.GEDMethod.BIPARTITE,
"SUBGRAPH": Options.GEDMethod.SUBGRAPH,
"NODE": Options.GEDMethod.NODE,
"RING_ML": Options.GEDMethod.RING_ML,
"BIPARTITE_ML": Options.GEDMethod.BIPARTITE_ML,
"REFINE": Options.GEDMethod.REFINE,
"BP_BEAM": Options.GEDMethod.BP_BEAM,
"SIMULATED_ANNEALING": Options.GEDMethod.SIMULATED_ANNEALING,
"HED": Options.GEDMethod.HED,
"STAR": Options.GEDMethod.STAR,
# ifdef GUROBI
"F1": Options.GEDMethod.F1,
"F2": Options.GEDMethod.F2,
"COMPACT_MIP": Options.GEDMethod.COMPACT_MIP,
"BLP_NO_EDGE_LABELS": Options.GEDMethod.BLP_NO_EDGE_LABELS
}

# Map of available edit cost functions between enum type and string.
EditCosts = {
"CHEM_1": Options.EditCosts.CHEM_1,
"CHEM_2": Options.EditCosts.CHEM_2,
"CMU": Options.EditCosts.CMU,
"GREC_1": Options.EditCosts.GREC_1,
"GREC_2": Options.EditCosts.GREC_2,
"LETTER": Options.EditCosts.LETTER,
"LETTER2": Options.EditCosts.LETTER2,
"NON_SYMBOLIC": Options.EditCosts.NON_SYMBOLIC,
"FINGERPRINT": Options.EditCosts.FINGERPRINT,
"PROTEIN": Options.EditCosts.PROTEIN,
"CONSTANT": Options.EditCosts.CONSTANT
}
# Map of available initialization types of the environment between enum type and string.
InitType = {
"LAZY_WITHOUT_SHUFFLED_COPIES": Options.InitType.LAZY_WITHOUT_SHUFFLED_COPIES,
"EAGER_WITHOUT_SHUFFLED_COPIES": Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES,
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES,
"LAZY_WITH_SHUFFLED_COPIES": Options.InitType.LAZY_WITH_SHUFFLED_COPIES
}

@unique @unique
class AlgorithmState(Enum): class AlgorithmState(Enum):
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.
"""can be used to specify the state of an algorithm.
"""
CALLED = 1 # The algorithm has been called.
INITIALIZED = 2 # The algorithm has been initialized.
CONVERGED = 3 # The algorithm has converged.
TERMINATED = 4 # The algorithm has terminated.

+ 181
- 0
gklearn/ged/env/ged_data.py View File

@@ -0,0 +1,181 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 15:05:01 2020

@author: ljia
"""
from gklearn.ged.env import Options, OptionsStringMap
from gklearn.ged.edit_costs import Constant
from gklearn.utils import SpecialLabel, dummy_node


class GEDData(object):

def __init__(self):
self._graphs = []
self._graph_names = []
self._graph_classes = []
self._num_graphs_without_shuffled_copies = 0
self._strings_to_internal_node_ids = []
self._internal_node_ids_to_strings = []
self._edit_cost = None
self._node_costs = None
self._edge_costs = None
self._node_labels = []
self._edge_labels = []
self._init_type = Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES
self._delete_edit_cost = True
self._max_num_nodes = 0
self._max_num_edges = 0
def num_graphs(self):
"""
/*!
* @brief Returns the number of graphs.
* @return Number of graphs in the instance.
*/
"""
return len(self._graphs)
def shuffled_graph_copies_available(self):
"""
/*!
* @brief Checks if shuffled graph copies are available.
* @return Boolean @p true if shuffled graph copies are available.
*/
"""
return (self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES or self._init_type == Options.InitType.LAZY_WITH_SHUFFLED_COPIES)
def node_cost(self, label1, label2):
"""
/*!
* @brief Returns node relabeling, insertion, or deletion cost.
* @param[in] label1 First node label.
* @param[in] label2 Second node label.
* @return Node relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
* node insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
* node deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
* and 0 otherwise.
*/
"""
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_ins_cost_fun(label2) # self._node_labels[label2 - 1]) # @todo: check
if label2 == SpecialLabel.DUMMY: # @todo: check dummy
return self._edit_cost.node_del_cost_fun(label1) # self._node_labels[label1 - 1])
return self._edit_cost.node_rel_cost_fun(label1, label2) # self._node_labels[label1 - 1], self._node_labels[label2 - 1])
def edge_cost(self, label1, label2):
"""
/*!
* @brief Returns edge relabeling, insertion, or deletion cost.
* @param[in] label1 First edge label.
* @param[in] label2 Second edge label.
* @return Edge relabeling cost if @p label1 and @p label2 are both different from ged::dummy_label(),
* edge insertion cost if @p label1 equals ged::dummy_label and @p label2 does not,
* edge deletion cost if @p label1 does not equal ged::dummy_label and @p label2 does,
* and 0 otherwise.
*/
"""
if self._eager_init(): # @todo: check if correct
return self._node_costs[label1, label2]
if label1 == label2:
return 0
if label1 == SpecialLabel.DUMMY:
return self._edit_cost.edge_ins_cost_fun(label2) # self._edge_labels[label2 - 1])
if label2 == SpecialLabel.DUMMY:
return self._edit_cost.edge_del_cost_fun(label1) # self._edge_labels[label1 - 1])
return self._edit_cost.edge_rel_cost_fun(label1, label2) # self._edge_labels[label1 - 1], self._edge_labels[label2 - 1])
def compute_induced_cost(self, g, h, node_map):
"""
/*!
* @brief Computes the edit cost between two graphs induced by a node map.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[in,out] node_map Node map whose induced edit cost is to be computed.
*/
"""
cost = 0
# collect node costs
for node in g.nodes():
image = node_map.image(node)
label2 = (SpecialLabel.DUMMY if image == dummy_node() else h.nodes[image]['label'])
cost += self.node_cost(g.nodes[node]['label'], label2)
for node in h.nodes():
pre_image = node_map.pre_image(node)
if pre_image == dummy_node():
cost += self.node_cost(SpecialLabel.DUMMY, h.nodes[node]['label'])
# collect edge costs
for (n1, n2) in g.edges():
image1 = node_map.image(n1)
image2 = node_map.image(n2)
label2 = (h.edges[(image2, image1)]['label'] if h.has_edge(image2, image1) else SpecialLabel.DUMMY)
cost += self.edge_cost(g.edges[(n1, n2)]['label'], label2)
for (n1, n2) in h.edges():
if not g.has_edge(node_map.pre_image(n2), node_map.pre_image(n1)):
cost += self.edge_cost(SpecialLabel.DUMMY, h.edges[(n1, n2)]['label'])
node_map.set_induced_cost(cost)
def _set_edit_cost(self, edit_cost, edit_cost_constants):
if self._delete_edit_cost:
self._edit_cost = None
if isinstance(edit_cost, str):
edit_cost = OptionsStringMap.EditCosts[edit_cost]
if edit_cost == Options.EditCosts.CHEM_1:
if len(edit_cost_constants) == 4:
self._edit_cost = CHEM1(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3])
elif len(edit_cost_constants) == 0:
self._edit_cost = CHEM1()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CHEM_1. Expected: 4 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.LETTER:
if len(edit_cost_constants) == 3:
self._edit_cost = Letter(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2])
elif len(edit_cost_constants) == 0:
self._edit_cost = Letter()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER. Expected: 3 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.LETTER2:
if len(edit_cost_constants) == 5:
self._edit_cost = Letter2(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4])
elif len(edit_cost_constants) == 0:
self._edit_cost = Letter2()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::LETTER2. Expected: 5 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.NON_SYMBOLIC:
if len(edit_cost_constants) == 6:
self._edit_cost = NonSymbolic(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
elif len(edit_cost_constants) == 0:
self._edit_cost = NonSymbolic()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::NON_SYMBOLIC. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
elif edit_cost == Options.EditCosts.CONSTANT:
if len(edit_cost_constants) == 6:
self._edit_cost = Constant(edit_cost_constants[0], edit_cost_constants[1], edit_cost_constants[2], edit_cost_constants[3], edit_cost_constants[4], edit_cost_constants[5])
elif len(edit_cost_constants) == 0:
self._edit_cost = Constant()
else:
raise Exception('Wrong number of constants for selected edit costs Options::EditCosts::CONSTANT. Expected: 6 or 0; actual:', len(edit_cost_constants), '.')
self._delete_edit_cost = True
def _eager_init(self):
return (self._init_type == Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES or self._init_type == Options.InitType.EAGER_WITH_SHUFFLED_COPIES)

+ 369
- 0
gklearn/ged/env/ged_env.py View File

@@ -0,0 +1,369 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 17 12:02:36 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.env import Options, OptionsStringMap
from gklearn.ged.env import GEDData


class GEDEnv(object):

def __init__(self):
self.__initialized = False
self.__new_graph_ids = []
self.__ged_data = GEDData()
# Variables needed for approximating ged_instance_.
self.__lower_bounds = {}
self.__upper_bounds = {}
self.__runtimes = {}
self.__node_maps = {}
self.__original_to_internal_node_ids = []
self.__internal_to_original_node_ids = []
self.__ged_method = None
def set_edit_cost(self, edit_cost, edit_cost_constants=[]):
"""
/*!
* @brief Sets the edit costs to one of the predefined edit costs.
* @param[in] edit_costs Select one of the predefined edit costs.
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs.
*/
"""
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants)
def add_graph(self, graph_name='', graph_class=''):
"""
/*!
* @brief Adds a new uninitialized graph to the environment. Call init() after calling this method.
* @param[in] graph_name The name of the added graph. Empty if not specified.
* @param[in] graph_class The class of the added graph. Empty if not specified.
* @return The ID of the newly added graph.
*/
"""
# @todo: graphs are not uninitialized.
self.__initialized = False
graph_id = self.__ged_data._num_graphs_without_shuffled_copies
self.__ged_data._num_graphs_without_shuffled_copies += 1
self.__new_graph_ids.append(graph_id)
self.__ged_data._graphs.append(nx.Graph())
self.__ged_data._graph_names.append(graph_name)
self.__ged_data._graph_classes.append(graph_class)
self.__original_to_internal_node_ids.append({})
self.__internal_to_original_node_ids.append({})
self.__ged_data._strings_to_internal_node_ids.append({})
self.__ged_data._internal_node_ids_to_strings.append({})
return graph_id
def add_node(self, graph_id, node_id, node_label):
"""
/*!
* @brief Adds a labeled node.
* @param[in] graph_id ID of graph that has been added to the environment.
* @param[in] node_id The user-specific ID of the vertex that has to be added.
* @param[in] node_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserNodeLabel equals ged::NoLabel.
*/
"""
# @todo: check ids.
self.__initialized = False
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id])
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label)
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id)
# @todo: node_label_to_id_
def add_edge(self, graph_id, nd_from, nd_to, edge_label, ignore_duplicates=True):
"""
/*!
* @brief Adds a labeled edge.
* @param[in] graph_id ID of graph that has been added to the environment.
* @param[in] tail The user-specific ID of the tail of the edge that has to be added.
* @param[in] head The user-specific ID of the head of the edge that has to be added.
* @param[in] edge_label The label of the vertex that has to be added. Set to ged::NoLabel() if template parameter @p UserEdgeLabel equals ged::NoLabel.
* @param[in] ignore_duplicates If @p true, duplicate edges are ignores. Otherwise, an exception is thrown if an existing edge is added to the graph.
*/
"""
# @todo: check everything.
self.__initialized = False
# @todo: check ignore_duplicates.
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label)
# @todo: edge_id and label_id, edge_label_to_id_.
def add_nx_graph(self, g, classe, ignore_duplicates=True) :
"""
Add a Graph (made by networkx) on the environment. Be careful to respect the same format as GXL graphs for labelling nodes and edges.
:param g: The graph to add (networkx graph)
:param ignore_duplicates: If True, duplicate edges are ignored, otherwise it's raise an error if an existing edge is added. True by default
:type g: networkx.graph
:type ignore_duplicates: bool
:return: The ID of the newly added graphe
:rtype: size_t
.. note:: The NX graph must respect the GXL structure. Please see how a GXL graph is construct.
"""
graph_id = self.add_graph(g.name, classe) # check if the graph name already exists.
for node in g.nodes: # @todo: if the keys of labels include int and str at the same time.
self.add_node(graph_id, node, tuple(sorted(g.nodes[node].items(), key=lambda kv: kv[0])))
for edge in g.edges:
self.add_edge(graph_id, edge[0], edge[1], tuple(sorted(g.edges[(edge[0], edge[1])].items(), key=lambda kv: kv[0])), ignore_duplicates)
return graph_id
def init(self, init_type=Options.InitType.EAGER_WITHOUT_SHUFFLED_COPIES, print_to_stdout=False):
if isinstance(init_type, str):
init_type = OptionsStringMap.InitType[init_type]
# Throw an exception if no edit costs have been selected.
if self.__ged_data._edit_cost is None:
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().')
# Return if the environment is initialized.
if self.__initialized:
return
# Set initialization type.
self.__ged_data._init_type = init_type
# @todo: Construct shuffled graph copies if necessary.
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation).
# @todo: setup_adjacency_matrix, don't know if neccessary.
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs])
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs])
# Initialize cost matrices if necessary.
if self.__ged_data._eager_init():
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment.
# Mark environment as initialized.
self.__initialized = True
self.__new_graph_ids.clear()
def set_method(self, method, options=''):
"""
/*!
* @brief Sets the GEDMethod to be used by run_method().
* @param[in] method Select the method that is to be used.
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method.
*/
"""
del self.__ged_method
if isinstance(method, str):
method = OptionsStringMap.GEDMethod[method]

if method == Options.GEDMethod.BRANCH:
self.__ged_method = Branch(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_TIGHT:
self.__ged_method = BranchTight(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_UNIFORM:
self.__ged_method = BranchUniform(self.__ged_data)
elif method == Options.GEDMethod.BRANCH_COMPACT:
self.__ged_method = BranchCompact(self.__ged_data)
elif method == Options.GEDMethod.PARTITION:
self.__ged_method = Partition(self.__ged_data)
elif method == Options.GEDMethod.HYBRID:
self.__ged_method = Hybrid(self.__ged_data)
elif method == Options.GEDMethod.RING:
self.__ged_method = Ring(self.__ged_data)
elif method == Options.GEDMethod.ANCHOR_AWARE_GED:
self.__ged_method = AnchorAwareGED(self.__ged_data)
elif method == Options.GEDMethod.WALKS:
self.__ged_method = Walks(self.__ged_data)
elif method == Options.GEDMethod.IPFP:
self.__ged_method = IPFP(self.__ged_data)
elif method == Options.GEDMethod.BIPARTITE:
from gklearn.ged.methods import Bipartite
self.__ged_method = Bipartite(self.__ged_data)
elif method == Options.GEDMethod.SUBGRAPH:
self.__ged_method = Subgraph(self.__ged_data)
elif method == Options.GEDMethod.NODE:
self.__ged_method = Node(self.__ged_data)
elif method == Options.GEDMethod.RING_ML:
self.__ged_method = RingML(self.__ged_data)
elif method == Options.GEDMethod.BIPARTITE_ML:
self.__ged_method = BipartiteML(self.__ged_data)
elif method == Options.GEDMethod.REFINE:
self.__ged_method = Refine(self.__ged_data)
elif method == Options.GEDMethod.BP_BEAM:
self.__ged_method = BPBeam(self.__ged_data)
elif method == Options.GEDMethod.SIMULATED_ANNEALING:
self.__ged_method = SimulatedAnnealing(self.__ged_data)
elif method == Options.GEDMethod.HED:
self.__ged_method = HED(self.__ged_data)
elif method == Options.GEDMethod.STAR:
self.__ged_method = STAR(self.__ged_data)
# #ifdef GUROBI
elif method == Options.GEDMethod.F1:
self.__ged_method = F1(self.__ged_data)
elif method == Options.GEDMethod.F2:
self.__ged_method = F2(self.__ged_data)
elif method == Options.GEDMethod.COMPACT_MIP:
self.__ged_method = CompactMIP(self.__ged_data)
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS:
self.__ged_method = BLPNoEdgeLabels(self.__ged_data)

self.__ged_method.set_options(options)
def run_method(self, g_id, h_id):
"""
/*!
* @brief Runs the GED method specified by call to set_method() between the graphs with IDs @p g_id and @p h_id.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
*/
"""
if g_id >= self.__ged_data.num_graphs():
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.')
if h_id >= self.__ged_data.num_graphs():
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.')
if not self.__initialized:
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.')
if self.__ged_method is None:
raise Exception('No method has been set. Call set_method() before calling run().')
# Call selected GEDMethod and store results.
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id):
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle?
else:
self.__ged_method.run(g_id, h_id)
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound()
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound()
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime()
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map()
def init_method(self):
"""Initializes the method specified by call to set_method().
"""
if not self.__initialized:
raise Exception('The environment is uninitialized. Call init() before calling init_method().')
if self.__ged_method is None:
raise Exception('No method has been set. Call set_method() before calling init_method().')
self.__ged_method.init()
def get_upper_bound(self, g_id, h_id):
"""
/*!
* @brief Returns upper bound for edit distance between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__upper_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__upper_bounds[(g_id, h_id)]
def get_lower_bound(self, g_id, h_id):
"""
/*!
* @brief Returns lower bound for edit distance between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__lower_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__lower_bounds[(g_id, h_id)]
def get_runtime(self, g_id, h_id):
"""
/*!
* @brief Returns runtime.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__runtimes:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').')
return self.__runtimes[(g_id, h_id)]

def get_init_time(self):
"""
/*!
* @brief Returns initialization time.
* @return Runtime of the last call to init_method().
*/
"""
return self.__ged_method.get_init_time()


def get_node_map(self, g_id, h_id):
"""
/*!
* @brief Returns node map between the input graphs.
* @param[in] g_id ID of an input graph that has been added to the environment.
* @param[in] h_id ID of an input graph that has been added to the environment.
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/
"""
if (g_id, h_id) not in self.__node_maps:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').')
return self.__node_maps[(g_id, h_id)]

def get_forward_map(self, g_id, h_id) :
"""
Returns the forward map (or the half of the adjacence matrix) between nodes of the two indicated graphs.
:param g: The Id of the first compared graph
:param h: The Id of the second compared graph
:type g: size_t
:type h: size_t
:return: The forward map to the adjacence matrix between nodes of the two graphs
:rtype: list[npy_uint32]
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_backward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function.
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work !
"""
return self.get_node_map(g_id, h_id).forward_map
def get_backward_map(self, g_id, h_id) :
"""
Returns the backward map (or the half of the adjacence matrix) between nodes of the two indicated graphs.
:param g: The Id of the first compared graph
:param h: The Id of the second compared graph
:type g: size_t
:type h: size_t
:return: The backward map to the adjacence matrix between nodes of the two graphs
:rtype: list[npy_uint32]
.. seealso:: run_method(), get_upper_bound(), get_lower_bound(), get_forward_map(), get_runtime(), quasimetric_cost(), get_node_map(), get_assignment_matrix()
.. warning:: run_method() between the same two graph must be called before this function.
.. note:: I don't know how to connect the two map to reconstruct the adjacence matrix. Please come back when I know how it's work !
"""
return self.get_node_map(g_id, h_id).backward_map
def get_all_graph_ids(self):
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)]

+ 20
- 8
gklearn/ged/env/node_map.py View File

@@ -6,15 +6,27 @@ Created on Wed Apr 22 11:31:26 2020
@author: ljia @author: ljia
""" """
import numpy as np import numpy as np
from gklearn.utils import dummy_node, undefined_node



class NodeMap(object): class NodeMap(object):
def __init__(self, num_nodes_g, num_nodes_h): def __init__(self, num_nodes_g, num_nodes_h):
self.__forward_map = [np.inf] * num_nodes_g
self.__backward_map = [np.inf] * num_nodes_h
self.__forward_map = [undefined_node()] * num_nodes_g
self.__backward_map = [undefined_node()] * num_nodes_h
self.__induced_cost = np.inf self.__induced_cost = np.inf
def clear(self):
"""
/*!
* @brief Clears the node map.
*/
"""
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))]
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))]
def num_source_nodes(self): def num_source_nodes(self):
return len(self.__forward_map) return len(self.__forward_map)
@@ -28,7 +40,7 @@ class NodeMap(object):
return self.__forward_map[node] return self.__forward_map[node]
else: else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return np.inf
return undefined_node()
def pre_image(self, node): def pre_image(self, node):
@@ -36,28 +48,28 @@ class NodeMap(object):
return self.__backward_map[node] return self.__backward_map[node]
else: else:
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.')
return np.inf
return undefined_node()
def as_relation(self, relation): def as_relation(self, relation):
relation.clear() relation.clear()
for i in range(0, len(self.__forward_map)): for i in range(0, len(self.__forward_map)):
k = self.__forward_map[i] k = self.__forward_map[i]
if k != np.inf:
if k != undefined_node():
relation.append(tuple((i, k))) relation.append(tuple((i, k)))
for k in range(0, len(self.__backward_map)): for k in range(0, len(self.__backward_map)):
i = self.__backward_map[k] i = self.__backward_map[k]
if i == np.inf:
if i == dummy_node():
relation.append(tuple((i, k))) relation.append(tuple((i, k)))
def add_assignment(self, i, k): def add_assignment(self, i, k):
if i != np.inf:
if i != dummy_node():
if i < len(self.__forward_map): if i < len(self.__forward_map):
self.__forward_map[i] = k self.__forward_map[i] = k
else: else:
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.')
if k != np.inf:
if k != dummy_node():
if k < len(self.__backward_map): if k < len(self.__backward_map):
self.__backward_map[k] = i self.__backward_map[k] = i
else: else:


+ 3
- 0
gklearn/ged/methods/__init__.py View File

@@ -0,0 +1,3 @@
from gklearn.ged.methods.ged_method import GEDMethod
from gklearn.ged.methods.lsape_based_method import LSAPEBasedMethod
from gklearn.ged.methods.bipartite import Bipartite

+ 117
- 0
gklearn/ged/methods/bipartite.py View File

@@ -0,0 +1,117 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 16:09:29 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.methods import LSAPEBasedMethod
from gklearn.ged.util import LSAPESolver
from gklearn.utils import SpecialLabel


class Bipartite(LSAPEBasedMethod):
def __init__(self, ged_data):
super().__init__(ged_data)
self._compute_lower_bound = False
###########################################################################
# Inherited member functions from LSAPEBasedMethod.
###########################################################################
def _lsape_populate_instance(self, g, h, master_problem):
# #ifdef _OPENMP
for row_in_master in range(0, nx.number_of_nodes(g)):
for col_in_master in range(0, nx.number_of_nodes(h)):
master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master)
for row_in_master in range(0, nx.number_of_nodes(g)):
master_problem[row_in_master, nx.number_of_nodes(h) + row_in_master] = self._compute_deletion_cost(g, row_in_master)
for col_in_master in range(0, nx.number_of_nodes(h)):
master_problem[nx.number_of_nodes(g) + col_in_master, col_in_master] = self._compute_insertion_cost(h, col_in_master)

# for row_in_master in range(0, master_problem.shape[0]):
# for col_in_master in range(0, master_problem.shape[1]):
# if row_in_master < nx.number_of_nodes(g) and col_in_master < nx.number_of_nodes(h):
# master_problem[row_in_master, col_in_master] = self._compute_substitution_cost(g, h, row_in_master, col_in_master)
# elif row_in_master < nx.number_of_nodes(g):
# master_problem[row_in_master, nx.number_of_nodes(h)] = self._compute_deletion_cost(g, row_in_master)
# elif col_in_master < nx.number_of_nodes(h):
# master_problem[nx.number_of_nodes(g), col_in_master] = self._compute_insertion_cost(h, col_in_master)


###########################################################################
# Helper member functions.
###########################################################################


def _compute_substitution_cost(self, g, h, u, v):
# Collect node substitution costs.
cost = self._ged_data.node_cost(g.nodes[u]['label'], h.nodes[v]['label'])
# Initialize subproblem.
d1, d2 = g.degree[u], h.degree[v]
subproblem = np.ones((d1 + d2, d1 + d2)) * np.inf
subproblem[d1:, d2:] = 0
# subproblem = np.empty((g.degree[u] + 1, h.degree[v] + 1))
# Collect edge deletion costs.
i = 0 # @todo: should directed graphs be considered?
for label in g[u].values(): # all u's neighbor
subproblem[i, d2 + i] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
# subproblem[i, h.degree[v]] = self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
i += 1
# Collect edge insertion costs.
i = 0 # @todo: should directed graphs be considered?
for label in h[v].values(): # all u's neighbor
subproblem[d1 + i, i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
# subproblem[g.degree[u], i] = self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
i += 1
# Collect edge relabelling costs.
i = 0
for label1 in g[u].values():
j = 0
for label2 in h[v].values():
subproblem[i, j] = self._ged_data.edge_cost(label1['label'], label2['label'])
j += 1
i += 1
# Solve subproblem.
subproblem_solver = LSAPESolver(subproblem)
subproblem_solver.set_model(self._lsape_model)
subproblem_solver.solve()
# Update and return overall substitution cost.
cost += subproblem_solver.minimal_cost()
return cost
def _compute_deletion_cost(self, g, v):
# Collect node deletion cost.
cost = self._ged_data.node_cost(g.nodes[v]['label'], SpecialLabel.DUMMY)
# Collect edge deletion costs.
for label in g[v].values():
cost += self._ged_data.edge_cost(label['label'], SpecialLabel.DUMMY)
# Return overall deletion cost.
return cost
def _compute_insertion_cost(self, g, v):
# Collect node insertion cost.
cost = self._ged_data.node_cost(SpecialLabel.DUMMY, g.nodes[v]['label'])
# Collect edge insertion costs.
for label in g[v].values():
cost += self._ged_data.edge_cost(SpecialLabel.DUMMY, label['label'])
# Return overall insertion cost.
return cost

+ 195
- 0
gklearn/ged/methods/ged_method.py View File

@@ -0,0 +1,195 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 15:52:35 2020

@author: ljia
"""
import numpy as np
import time
import networkx as nx


class GEDMethod(object):
def __init__(self, ged_data):
self._initialized = False
self._ged_data = ged_data
self._options = None
self._lower_bound = 0
self._upper_bound = np.inf
self._node_map = [0, 0] # @todo
self._runtime = None
self._init_time = None
def init(self):
"""Initializes the method with options specified by set_options().
"""
start = time.time()
self._ged_init()
end = time.time()
self._init_time = end - start
self._initialized = True
def set_options(self, options):
"""
/*!
* @brief Sets the options of the method.
* @param[in] options String of the form <tt>[--@<option@> @<arg@>] [...]</tt>, where @p option contains neither spaces nor single quotes,
* and @p arg contains neither spaces nor single quotes or is of the form <tt>'[--@<sub-option@> @<sub-arg@>] [...]'</tt>,
* where both @p sub-option and @p sub-arg contain neither spaces nor single quotes.
*/
"""
self._ged_set_default_options()
for key, val in options.items():
if not self._ged_parse_option(key, val):
raise Exception('Invalid option "', key, '". Usage: options = "' + self._ged_valid_options_string() + '".') # @todo: not implemented.
self._initialized = False
def run(self, g_id, h_id):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g_id ID of input graph.
* @param[in] h_id ID of input graph.
*/
"""
start = time.time()
result = self.run_as_util(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id])
end = time.time()
self._lower_bound = result['lower_bound']
self._upper_bound = result['upper_bound']
if len(result['node_maps']) > 0:
self._node_map = result['node_maps'][0]
self._runtime = end - start
def run_as_util(self, g, h):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
*/
"""
# Compute optimal solution and return if at least one of the two graphs is empty.
if nx.number_of_nodes(g) == 0 or nx.number_of_nodes(h) == 0:
print('This is not implemented.')
pass # @todo:
# Run the method.
return self._ged_run(g, h)
def get_upper_bound(self):
"""
/*!
* @brief Returns an upper bound.
* @return Upper bound for graph edit distance provided by last call to run() or -1 if the method does not yield an upper bound.
*/
"""
return self._upper_bound
def get_lower_bound(self):
"""
/*!
* @brief Returns a lower bound.
* @return Lower bound for graph edit distance provided by last call to run() or -1 if the method does not yield a lower bound.
*/
"""
return self._lower_bound
def get_runtime(self):
"""
/*!
* @brief Returns the runtime.
* @return Runtime of last call to run() in seconds.
*/
"""
return self._runtime

def get_init_time(self):
"""
/*!
* @brief Returns the initialization time.
* @return Runtime of last call to init() in seconds.
*/
"""
return self._init_time


def get_node_map(self):
"""
/*!
* @brief Returns a graph matching.
* @return Constant reference to graph matching provided by last call to run() or to an empty matching if the method does not yield a matching.
*/
"""
return self._node_map
def _ged_init(self):
"""
/*!
* @brief Initializes the method.
* @note Must be overridden by derived classes that require initialization.
*/
"""
pass
def _ged_parse_option(self, option, arg):
"""
/*!
* @brief Parses one option.
* @param[in] option The name of the option.
* @param[in] arg The argument of the option.
* @return Boolean @p true if @p option is a valid option name for the method and @p false otherwise.
* @note Must be overridden by derived classes that have options.
*/
"""
return False
def _ged_run(self, g, h):
"""
/*!
* @brief Runs the method with options specified by set_options().
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
* @note Must be overridden by derived classes.
*/
"""
return {}
def _ged_valid_options_string(self):
"""
/*!
* @brief Returns string of all valid options.
* @return String of the form <tt>[--@<option@> @<arg@>] [...]</tt>.
* @note Must be overridden by derived classes that have options.
*/
"""
return ''
def _ged_set_default_options(self):
"""
/*!
* @brief Sets all options to default values.
* @note Must be overridden by derived classes that have options.
*/
"""
pass

+ 254
- 0
gklearn/ged/methods/lsape_based_method.py View File

@@ -0,0 +1,254 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 18 16:01:24 2020

@author: ljia
"""
import numpy as np
import networkx as nx
from gklearn.ged.methods import GEDMethod
from gklearn.ged.util import LSAPESolver, misc
from gklearn.ged.env import NodeMap

class LSAPEBasedMethod(GEDMethod):
def __init__(self, ged_data):
super().__init__(ged_data)
self._lsape_model = None # @todo: LSAPESolver::ECBP
self._greedy_method = None # @todo: LSAPESolver::BASIC
self._compute_lower_bound = True
self._solve_optimally = True
self._num_threads = 1
self._centrality_method = 'NODE' # @todo
self._centrality_weight = 0.7
self._centralities = {}
self._max_num_solutions = 1
def populate_instance_and_run_as_util(self, g, h): #, lsape_instance):
"""
/*!
* @brief Runs the method with options specified by set_options() and provides access to constructed LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] result Result variable.
* @param[out] lsape_instance LSAPE instance.
*/
"""
result = {'node_maps': [], 'lower_bound': 0, 'upper_bound': np.inf}
# Populate the LSAPE instance and set up the solver.
nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
lsape_instance = np.ones((nb1 + nb2, nb1 + nb2)) * np.inf
# lsape_instance = np.empty((nx.number_of_nodes(g) + 1, nx.number_of_nodes(h) + 1))
self.populate_instance(g, h, lsape_instance)
# nb1, nb2 = nx.number_of_nodes(g), nx.number_of_nodes(h)
# lsape_instance_new = np.empty((nb1 + nb2, nb1 + nb2)) * np.inf
# lsape_instance_new[nb1:, nb2:] = 0
# lsape_instance_new[0:nb1, 0:nb2] = lsape_instance[0:nb1, 0:nb2]
# for i in range(nb1): # all u's neighbor
# lsape_instance_new[i, nb2 + i] = lsape_instance[i, nb2]
# for i in range(nb2): # all u's neighbor
# lsape_instance_new[nb1 + i, i] = lsape_instance[nb2, i]
# lsape_solver = LSAPESolver(lsape_instance_new)
lsape_solver = LSAPESolver(lsape_instance)
# Solve the LSAPE instance.
if self._solve_optimally:
lsape_solver.set_model(self._lsape_model)
else:
lsape_solver.set_greedy_method(self._greedy_method)
lsape_solver.solve(self._max_num_solutions)
# Compute and store lower and upper bound.
if self._compute_lower_bound and self._solve_optimally:
result['lower_bound'] = lsape_solver.minimal_cost() * self._lsape_lower_bound_scaling_factor(g, h) # @todo: test
for solution_id in range(0, lsape_solver.num_solutions()):
result['node_maps'].append(NodeMap(nx.number_of_nodes(g), nx.number_of_nodes(h)))
misc.construct_node_map_from_solver(lsape_solver, result['node_maps'][-1], solution_id)
self._ged_data.compute_induced_cost(g, h, result['node_maps'][-1])
# Add centralities and reoptimize.
if self._centrality_weight > 0 and self._centrality_method != 'NODE':
print('This is not implemented.')
pass # @todo
# Sort the node maps and set the upper bound.
if len(result['node_maps']) > 1 or len(result['node_maps']) > self._max_num_solutions:
print('This is not implemented.') # @todo:
pass
if len(result['node_maps']) == 0:
result['upper_bound'] = np.inf
else:
result['upper_bound'] = result['node_maps'][0].induced_cost()
return result
def populate_instance(self, g, h, lsape_instance):
"""
/*!
* @brief Populates the LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] lsape_instance LSAPE instance.
*/
"""
if not self._initialized:
pass
# @todo: if (not this->initialized_) {
self._lsape_populate_instance(g, h, lsape_instance)
lsape_instance[nx.number_of_nodes(g):, nx.number_of_nodes(h):] = 0
# lsape_instance[nx.number_of_nodes(g), nx.number_of_nodes(h)] = 0
###########################################################################
# Member functions inherited from GEDMethod.
###########################################################################
def _ged_init(self):
self._lsape_pre_graph_init(False)
for graph in self._ged_data._graphs:
self._init_graph(graph)
self._lsape_init()
def _ged_run(self, g, h):
# lsape_instance = np.empty((0, 0))
result = self.populate_instance_and_run_as_util(g, h) # , lsape_instance)
return result
def _ged_parse_option(self, option, arg):
is_valid_option = False
if option == 'threads': # @todo: try.. catch...
self._num_threads = arg
is_valid_option = True
elif option == 'lsape_model':
self._lsape_model = arg # @todo
is_valid_option = True
elif option == 'greedy_method':
self._greedy_method = arg # @todo
is_valid_option = True
elif option == 'optimal':
self._solve_optimally = arg # @todo
is_valid_option = True
elif option == 'centrality_method':
self._centrality_method = arg # @todo
is_valid_option = True
elif option == 'centrality_weight':
self._centrality_weight = arg # @todo
is_valid_option = True
elif option == 'max_num_solutions':
if arg == 'ALL':
self._max_num_solutions = -1
else:
self._max_num_solutions = arg # @todo
is_valid_option = True
is_valid_option = is_valid_option or self._lsape_parse_option(option, arg)
is_valid_option = True # @todo: this is not in the C++ code.
return is_valid_option
def _ged_set_default_options(self):
self._lsape_model = None # @todo: LSAPESolver::ECBP
self._greedy_method = None # @todo: LSAPESolver::BASIC
self._solve_optimally = True
self._num_threads = 1
self._centrality_method = 'NODE' # @todo
self._centrality_weight = 0.7
self._max_num_solutions = 1
###########################################################################
# Private helper member functions.
###########################################################################
def _init_graph(self, graph):
if self._centrality_method != 'NODE':
self._init_centralities(graph) # @todo
self._lsape_init_graph(graph)
###########################################################################
# Virtual member functions to be overridden by derived classes.
###########################################################################
def _lsape_init(self):
"""
/*!
* @brief Initializes the method after initializing the global variables for the graphs.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require custom initialization.
*/
"""
pass
def _lsape_parse_option(self, option, arg):
"""
/*!
* @brief Parses one option that is not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
* @param[in] option The name of the option.
* @param[in] arg The argument of the option.
* @return Returns true if @p option is a valid option name for the method and false otherwise.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
*/
"""
return False
def _lsape_set_default_options(self):
"""
/*!
* @brief Sets all options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod to default values.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that have options that are not among the ones shared by all derived classes of ged::LSAPEBasedMethod.
*/
"""
pass
def _lsape_populate_instance(self, g, h, lsape_instance):
"""
/*!
* @brief Populates the LSAPE instance.
* @param[in] g Input graph.
* @param[in] h Input graph.
* @param[out] lsape_instance LSAPE instance of size (n + 1) x (m + 1), where n and m are the number of nodes in @p g and @p h. The last row and the last column represent insertion and deletion.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod.
*/
"""
pass
def _lsape_init_graph(self, graph):
"""
/*!
* @brief Initializes global variables for one graph.
* @param[in] graph Graph for which the global variables have to be initialized.
* @note Must be overridden by derived classes of ged::LSAPEBasedMethod that require to initialize custom global variables.
*/
"""
pass
def _lsape_pre_graph_init(self, called_at_runtime):
"""
/*!
* @brief Initializes the method at runtime or during initialization before initializing the global variables for the graphs.
* @param[in] called_at_runtime Equals @p true if called at runtime and @p false if called during initialization.
* @brief Must be overridden by derived classes of ged::LSAPEBasedMethod that require default initialization at runtime before initializing the global variables for the graphs.
*/
"""
pass

+ 2
- 0
gklearn/ged/util/__init__.py View File

@@ -1 +1,3 @@
from gklearn.ged.util.lsape_solver import LSAPESolver
from gklearn.ged.util.util import compute_geds, ged_options_to_string from gklearn.ged.util.util import compute_geds, ged_options_to_string
from gklearn.ged.util.util import compute_geds_cml

+ 121
- 0
gklearn/ged/util/lsape_solver.py View File

@@ -0,0 +1,121 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 22 15:37:36 2020

@author: ljia
"""
import numpy as np
from scipy.optimize import linear_sum_assignment

class LSAPESolver(object):
def __init__(self, cost_matrix=None):
"""
/*!
* @brief Constructs solver for LSAPE problem instance.
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved.
*/
"""
self.__cost_matrix = cost_matrix
self.__model = 'ECBP'
self.__greedy_method = 'BASIC'
self.__solve_optimally = True
self.__minimal_cost = 0
self.__row_to_col_assignments = []
self.__col_to_row_assignments = []
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
def clear_solution(self):
"""Clears a previously computed solution.
"""
self.__minimal_cost = 0
self.__row_to_col_assignments.clear()
self.__col_to_row_assignments.clear()
self.__row_to_col_assignments.append([]) # @todo
self.__col_to_row_assignments.append([])
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
def set_model(self, model):
"""
/*!
* @brief Makes the solver use a specific model for optimal solving.
* @param[in] model The model that should be used.
*/
"""
self.__solve_optimally = True
self.__model = model
def solve(self, num_solutions=1):
"""
/*!
* @brief Solves the LSAPE problem instance.
* @param[in] num_solutions The maximal number of solutions that should be computed.
*/
"""
self.clear_solution()
if self.__solve_optimally:
row_id, col_id = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self.__row_to_col_assignments[0] = col_id
self.__col_to_row_assignments[0] = np.argsort(col_id) # @todo: might be slow, can use row_id
self.__compute_cost_from_assignments()
if num_solutions > 1:
pass # @todo:
else:
print('here is non op.')
pass # @todo: greedy.
# self.__

def minimal_cost(self):
"""
/*!
* @brief Returns the cost of the computed solutions.
* @return Cost of computed solutions.
*/
"""
return self.__minimal_cost
def get_assigned_col(self, row, solution_id=0):
"""
/*!
* @brief Returns the assigned column.
* @param[in] row Row whose assigned column should be returned.
* @param[in] solution_id ID of the solution where the assignment should be looked up.
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column.
*/
"""
return self.__row_to_col_assignments[solution_id][row]
def get_assigned_row(self, col, solution_id=0):
"""
/*!
* @brief Returns the assigned row.
* @param[in] col Column whose assigned row should be returned.
* @param[in] solution_id ID of the solution where the assignment should be looked up.
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row.
*/
"""
return self.__col_to_row_assignments[solution_id][col]
def num_solutions(self):
"""
/*!
* @brief Returns the number of solutions.
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions.
*/
"""
return len(self.__row_to_col_assignments)


def __compute_cost_from_assignments(self): # @todo
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]])

+ 21
- 0
gklearn/ged/util/misc.py View File

@@ -5,6 +5,27 @@ Created on Thu Mar 19 18:13:56 2020


@author: ljia @author: ljia
""" """
from gklearn.utils import dummy_node


def construct_node_map_from_solver(solver, node_map, solution_id):
node_map.clear()
num_nodes_g = node_map.num_source_nodes()
num_nodes_h = node_map.num_target_nodes()
# add deletions and substitutions
for row in range(0, num_nodes_g):
col = solver.get_assigned_col(row, solution_id)
if col >= num_nodes_h:
node_map.add_assignment(row, dummy_node())
else:
node_map.add_assignment(row, col)
# insertions.
for col in range(0, num_nodes_h):
if solver.get_assigned_row(col, solution_id) >= num_nodes_g:
node_map.add_assignment(dummy_node(), col)


def options_string_to_options_map(options_string): def options_string_to_options_map(options_string):
"""Transforms an options string into an options map. """Transforms an options string into an options map.


+ 78
- 1
gklearn/ged/util/util.py View File

@@ -13,6 +13,7 @@ from functools import partial
import sys import sys
from tqdm import tqdm from tqdm import tqdm
import networkx as nx import networkx as nx
from gklearn.ged.env import GEDEnv
from gklearn.gedlib import librariesImport, gedlibpy from gklearn.gedlib import librariesImport, gedlibpy




@@ -22,7 +23,7 @@ def compute_ged(g1, g2, options):
ged_env.add_nx_graph(g1, '') ged_env.add_nx_graph(g1, '')
ged_env.add_nx_graph(g2, '') ged_env.add_nx_graph(g2, '')
listID = ged_env.get_all_graph_ids() listID = ged_env.get_all_graph_ids()
ged_env.init()
ged_env.init(init_type=options['init_option'])
ged_env.set_method(options['method'], ged_options_to_string(options)) ged_env.set_method(options['method'], ged_options_to_string(options))
ged_env.init_method() ged_env.init_method()


@@ -46,6 +47,82 @@ def compute_ged(g1, g2, options):
return dis, pi_forward, pi_backward return dis, pi_forward, pi_backward




def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True):
# initialize ged env.
ged_env = GEDEnv()
ged_env.set_edit_cost(options['edit_cost'], edit_cost_constants=options['edit_cost_constants'])
for g in graphs:
ged_env.add_nx_graph(g, '')
listID = ged_env.get_all_graph_ids()
ged_env.init(init_type=options['init_option'])
if parallel:
options['threads'] = 1
ged_env.set_method(options['method'], options)
ged_env.init_method()

# compute ged.
neo_options = {'edit_cost': options['edit_cost'],
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
ged_vec = [0 for i in range(len_itr)]
n_edit_operations = [0 for i in range(len_itr)]
itr = combinations(range(0, len(graphs)), 2)
n_jobs = multiprocessing.cpu_count()
if len_itr < 100 * n_jobs:
chunksize = int(len_itr / n_jobs) + 1
else:
chunksize = 100
def init_worker(graphs_toshare, ged_env_toshare, listID_toshare):
global G_graphs, G_ged_env, G_listID
G_graphs = graphs_toshare
G_ged_env = ged_env_toshare
G_listID = listID_toshare
do_partial = partial(_wrapper_compute_ged_parallel, neo_options, sort)
pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
if verbose:
iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
desc='computing GEDs', file=sys.stdout)
else:
iterator = pool.imap_unordered(do_partial, itr, chunksize)
# iterator = pool.imap_unordered(do_partial, itr, chunksize)
for i, j, dis, n_eo_tmp in iterator:
idx_itr = int(len(graphs) * i + j - (i + 1) * (i + 2) / 2)
ged_vec[idx_itr] = dis
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_edit_operations[idx_itr] = n_eo_tmp
# print('\n-------------------------------------------')
# print(i, j, idx_itr, dis)
pool.close()
pool.join()
else:
ged_vec = []
n_edit_operations = []
if verbose:
iterator = tqdm(range(len(graphs)), desc='computing GEDs', file=sys.stdout)
else:
iterator = range(len(graphs))
for i in iterator:
# for i in range(len(graphs)):
for j in range(i + 1, len(graphs)):
if nx.number_of_nodes(graphs[i]) <= nx.number_of_nodes(graphs[j]) or not sort:
dis, pi_forward, pi_backward = _compute_ged(ged_env, listID[i], listID[j], graphs[i], graphs[j])
else:
dis, pi_backward, pi_forward = _compute_ged(ged_env, listID[j], listID[i], graphs[j], graphs[i])
ged_vec.append(dis)
ged_mat[i][j] = dis
ged_mat[j][i] = dis
n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
n_edit_operations.append(n_eo_tmp)

return ged_vec, ged_mat, n_edit_operations



def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True): def compute_geds(graphs, options={}, sort=True, parallel=False, verbose=True):
# initialize ged env. # initialize ged env.
ged_env = gedlibpy.GEDEnv() ged_env = gedlibpy.GEDEnv()


+ 1
- 0
gklearn/preimage/__init__.py View File

@@ -13,5 +13,6 @@ __date__ = "March 2020"
from gklearn.preimage.preimage_generator import PreimageGenerator from gklearn.preimage.preimage_generator import PreimageGenerator
from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator from gklearn.preimage.median_preimage_generator import MedianPreimageGenerator
from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator from gklearn.preimage.random_preimage_generator import RandomPreimageGenerator
from gklearn.preimage.median_preimage_generator_cml import MedianPreimageGeneratorCML
from gklearn.preimage.kernel_knn_cv import kernel_knn_cv from gklearn.preimage.kernel_knn_cv import kernel_knn_cv
from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class from gklearn.preimage.generate_random_preimages_by_class import generate_random_preimages_by_class

+ 57
- 0
gklearn/tests/test_ged_env.py View File

@@ -0,0 +1,57 @@
"""Tests of GEDEnv.
"""


def test_GEDEnv():
"""Test GEDEnv.
"""
"""**1. Get dataset.**"""

from gklearn.utils import Dataset
# Predefined dataset name, use dataset "MUTAG".
ds_name = 'MUTAG'
# Initialize a Dataset.
dataset = Dataset()
# Load predefined dataset "MUTAG".
dataset.load_predefined_dataset(ds_name)
graph1 = dataset.graphs[0]
graph2 = dataset.graphs[1]
"""**2. Compute graph edit distance.**"""
try:
from gklearn.ged.env import GEDEnv
ged_env = GEDEnv() # initailize GED environment.
ged_env.set_edit_cost('CONSTANT', # GED cost type.
edit_cost_constants=[3, 3, 1, 3, 3, 1] # edit costs.
)
ged_env.add_nx_graph(graph1, '') # add graph1
ged_env.add_nx_graph(graph2, '') # add graph2
listID = ged_env.get_all_graph_ids() # get list IDs of graphs
ged_env.init(init_type='LAZY_WITHOUT_SHUFFLED_COPIES') # initialize GED environment.
options = {'initialization_method': 'RANDOM', # or 'NODE', etc.
'threads': 1 # parallel threads.
}
ged_env.set_method('BIPARTITE', # GED method.
options # options for GED method.
)
ged_env.init_method() # initialize GED method.
ged_env.run_method(listID[0], listID[1]) # run.
pi_forward = ged_env.get_forward_map(listID[0], listID[1]) # forward map.
pi_backward = ged_env.get_backward_map(listID[0], listID[1]) # backward map.
dis = ged_env.get_upper_bound(listID[0], listID[1]) # GED bewteen two graphs.
import networkx as nx
assert len(pi_forward) == nx.number_of_nodes(graph1), len(pi_backward) == nx.number_of_nodes(graph2)
except Exception as exception:
assert False, exception


if __name__ == "__main__":
test_GEDEnv()

+ 4
- 1
gklearn/tests/test_median_preimage_generator.py View File

@@ -68,4 +68,7 @@ def test_median_preimage_generator():
print('\n-------------------------------------') print('\n-------------------------------------')
print('fit method:', fit_method, '\n') print('fit method:', fit_method, '\n')
mpg_options['fit_method'] = fit_method mpg_options['fit_method'] = fit_method
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4))
try:
generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged_options, mge_options, save_results=save_results, save_medians=True, plot_medians=True, load_gm='auto', dir_save=dir_save, irrelevant_labels=irrelevant_labels, edge_required=edge_required, cut_range=range(0, 4))
except Exception as exception:
assert False, exception

+ 1
- 1
gklearn/utils/__init__.py View File

@@ -20,7 +20,7 @@ from gklearn.utils.graph_files import load_dataset, save_dataset
from gklearn.utils.timer import Timer from gklearn.utils.timer import Timer
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
from gklearn.utils.utils import compute_gram_matrices_by_class from gklearn.utils.utils import compute_gram_matrices_by_class
from gklearn.utils.utils import SpecialLabel
from gklearn.utils.utils import SpecialLabel, dummy_node, undefined_node, dummy_edge
from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix from gklearn.utils.utils import normalize_gram_matrix, compute_distance_matrix
from gklearn.utils.trie import Trie from gklearn.utils.trie import Trie
from gklearn.utils.knn import knn_cv, knn_classification from gklearn.utils.knn import knn_cv, knn_classification

+ 41
- 9
gklearn/utils/utils.py View File

@@ -472,14 +472,6 @@ def get_mlti_dim_edge_attrs(G, attr_names):
for ed, attrs in G.edges(data=True): for ed, attrs in G.edges(data=True):
attributes.append(tuple(attrs[aname] for aname in attr_names)) attributes.append(tuple(attrs[aname] for aname in attr_names))
return attributes return attributes


@unique
class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = 1 # The dummy label.
# DUMMY = auto # enum.auto does not exist in Python 3.5.
def normalize_gram_matrix(gram_matrix): def normalize_gram_matrix(gram_matrix):
@@ -506,4 +498,44 @@ def compute_distance_matrix(gram_matrix):
dis_max = np.max(np.max(dis_mat)) dis_max = np.max(np.max(dis_mat))
dis_min = np.min(np.min(dis_mat[dis_mat != 0])) dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
dis_mean = np.mean(np.mean(dis_mat)) dis_mean = np.mean(np.mean(dis_mat))
return dis_mat, dis_max, dis_min, dis_mean
return dis_mat, dis_max, dis_min, dis_mean


def dummy_node():
"""
/*!
* @brief Returns a dummy node.
* @return ID of dummy node.
*/
"""
return np.inf # @todo: in GEDLIB, this is the max - 1 rather than max, I don't know why.


def undefined_node():
"""
/*!
* @brief Returns an undefined node.
* @return ID of undefined node.
*/

"""
return np.inf


def dummy_edge():
"""
/*!
* @brief Returns a dummy edge.
* @return ID of dummy edge.
*/

"""
return np.inf


@unique
class SpecialLabel(Enum):
"""can be used to define special labels.
"""
DUMMY = 1 # The dummy label.
# DUMMY = auto # enum.auto does not exist in Python 3.5.

Loading…
Cancel
Save