Browse Source

Merge pull request #34 from jajupmochi/v0.2.x

V0.2.x
master
linlin GitHub 4 years ago
parent
commit
2c36da08ab
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
34 changed files with 2017 additions and 1858 deletions
  1. +6
    -0
      .gitignore
  2. +8
    -5
      gklearn/experiments/papers/PRL_2020/utils.py
  3. +12
    -12
      gklearn/ged/edit_costs/constant.py
  4. +127
    -127
      gklearn/ged/env/ged_env.py
  5. +25
    -25
      gklearn/ged/env/node_map.py
  6. +5
    -5
      gklearn/ged/learning/cost_matrices_learner.py
  7. +2
    -2
      gklearn/ged/learning/costs_learner.py
  8. +30
    -30
      gklearn/ged/util/lsape_solver.py
  9. +10
    -4
      gklearn/ged/util/util.py
  10. +51
    -51
      gklearn/kernels/common_walk.py
  11. +10
    -10
      gklearn/kernels/conjugate_gradient.py
  12. +11
    -11
      gklearn/kernels/fixed_point.py
  13. +7
    -7
      gklearn/kernels/graph_kernel.py
  14. +45
    -45
      gklearn/kernels/marginalized.py
  15. +80
    -80
      gklearn/kernels/path_up_to_h.py
  16. +4
    -4
      gklearn/kernels/random_walk_meta.py
  17. +33
    -33
      gklearn/kernels/shortest_path.py
  18. +7
    -7
      gklearn/kernels/spectral_decomposition.py
  19. +56
    -56
      gklearn/kernels/structural_sp.py
  20. +85
    -85
      gklearn/kernels/treelet.py
  21. +41
    -41
      gklearn/kernels/weisfeiler_lehman.py
  22. +2
    -2
      gklearn/preimage/generate_random_preimages_by_class.py
  23. +25
    -25
      gklearn/preimage/kernel_knn_cv.py
  24. +284
    -283
      gklearn/preimage/median_preimage_generator.py
  25. +221
    -221
      gklearn/preimage/median_preimage_generator_cml.py
  26. +283
    -283
      gklearn/preimage/median_preimage_generator_py.py
  27. +109
    -109
      gklearn/preimage/random_preimage_generator.py
  28. +10
    -10
      gklearn/preimage/remove_best_graph.py
  29. +2
    -2
      gklearn/preimage/utils.py
  30. +171
    -28
      gklearn/tests/test_graph_kernels.py
  31. +244
    -244
      gklearn/utils/dataset.py
  32. +4
    -4
      gklearn/utils/graph_files.py
  33. +2
    -2
      gklearn/utils/knn.py
  34. +5
    -5
      gklearn/utils/timer.py

+ 6
- 0
.gitignore View File

@@ -73,3 +73,9 @@ gklearn/kernels/.tags


# docker travis debug. # docker travis debug.
ci.sh ci.sh

# outputs.
outputs/

# pyCharm.
.idea/

+ 8
- 5
gklearn/experiments/papers/PRL_2020/utils.py View File

@@ -158,7 +158,7 @@ def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name=
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
param_grid_precomputed = {'compute_method': ['fp'], param_grid_precomputed = {'compute_method': ['fp'],
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], 'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel],
'weight': np.logspace(-3, -10, num=8, base=10)}
'weight': np.logspace(-4, -10, num=7, base=10)}
elif kernel_name == 'SpectralDecomposition': elif kernel_name == 'SpectralDecomposition':
from gklearn.kernels.randomWalkKernel import randomwalkkernel from gklearn.kernels.randomWalkKernel import randomwalkkernel
@@ -196,14 +196,17 @@ def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name=
elif kernel_name == 'Treelet': elif kernel_name == 'Treelet':
from gklearn.kernels.treeletKernel import treeletkernel from gklearn.kernels.treeletKernel import treeletkernel
estimator = treeletkernel estimator = treeletkernel
from gklearn.utils.kernels import polynomialkernel
from gklearn.utils.kernels import gaussiankernel, polynomialkernel
import functools import functools
gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) gkernels = [functools.partial(gaussiankernel, gamma=1 / ga)
# for ga in np.linspace(1, 10, 10)] # for ga in np.linspace(1, 10, 10)]
for ga in np.logspace(0, 10, num=11, base=10)]
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11)
for c in np.logspace(0, 10, num=11, base=10)]
for ga in np.logspace(0, 10, num=11, base=10)]
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11)
for c in np.logspace(0, 10, num=11, base=10)]
# pkernels = [functools.partial(polynomialkernel, d=1, c=1)]

param_grid_precomputed = {'sub_kernel': pkernels + gkernels} param_grid_precomputed = {'sub_kernel': pkernels + gkernels}
# 'parallel': [None]}
elif kernel_name == 'WLSubtree': elif kernel_name == 'WLSubtree':
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel


+ 12
- 12
gklearn/ged/edit_costs/constant.py View File

@@ -14,37 +14,37 @@ class Constant(EditCost):


def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1):
self.__node_ins_cost = node_ins_cost
self.__node_del_cost = node_del_cost
self.__node_rel_cost = node_rel_cost
self.__edge_ins_cost = edge_ins_cost
self.__edge_del_cost = edge_del_cost
self.__edge_rel_cost = edge_rel_cost
self._node_ins_cost = node_ins_cost
self._node_del_cost = node_del_cost
self._node_rel_cost = node_rel_cost
self._edge_ins_cost = edge_ins_cost
self._edge_del_cost = edge_del_cost
self._edge_rel_cost = edge_rel_cost
def node_ins_cost_fun(self, node_label): def node_ins_cost_fun(self, node_label):
return self.__node_ins_cost
return self._node_ins_cost
def node_del_cost_fun(self, node_label): def node_del_cost_fun(self, node_label):
return self.__node_del_cost
return self._node_del_cost
def node_rel_cost_fun(self, node_label_1, node_label_2): def node_rel_cost_fun(self, node_label_1, node_label_2):
if node_label_1 != node_label_2: if node_label_1 != node_label_2:
return self.__node_rel_cost
return self._node_rel_cost
return 0 return 0
def edge_ins_cost_fun(self, edge_label): def edge_ins_cost_fun(self, edge_label):
return self.__edge_ins_cost
return self._edge_ins_cost
def edge_del_cost_fun(self, edge_label): def edge_del_cost_fun(self, edge_label):
return self.__edge_del_cost
return self._edge_del_cost
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): def edge_rel_cost_fun(self, edge_label_1, edge_label_2):
if edge_label_1 != edge_label_2: if edge_label_1 != edge_label_2:
return self.__edge_rel_cost
return self._edge_rel_cost
return 0 return 0

+ 127
- 127
gklearn/ged/env/ged_env.py View File

@@ -15,17 +15,17 @@ class GEDEnv(object):


def __init__(self): def __init__(self):
self.__initialized = False
self.__new_graph_ids = []
self.__ged_data = GEDData()
self._initialized = False
self._new_graph_ids = []
self._ged_data = GEDData()
# Variables needed for approximating ged_instance_. # Variables needed for approximating ged_instance_.
self.__lower_bounds = {}
self.__upper_bounds = {}
self.__runtimes = {}
self.__node_maps = {}
self.__original_to_internal_node_ids = []
self.__internal_to_original_node_ids = []
self.__ged_method = None
self._lower_bounds = {}
self._upper_bounds = {}
self._runtimes = {}
self._node_maps = {}
self._original_to_internal_node_ids = []
self._internal_to_original_node_ids = []
self._ged_method = None
def set_edit_cost(self, edit_cost, edit_cost_constants=[]): def set_edit_cost(self, edit_cost, edit_cost_constants=[]):
@@ -36,7 +36,7 @@ class GEDEnv(object):
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. * @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs.
*/ */
""" """
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants)
self._ged_data._set_edit_cost(edit_cost, edit_cost_constants)
def add_graph(self, graph_name='', graph_class=''): def add_graph(self, graph_name='', graph_class=''):
@@ -49,17 +49,17 @@ class GEDEnv(object):
*/ */
""" """
# @todo: graphs are not uninitialized. # @todo: graphs are not uninitialized.
self.__initialized = False
graph_id = self.__ged_data._num_graphs_without_shuffled_copies
self.__ged_data._num_graphs_without_shuffled_copies += 1
self.__new_graph_ids.append(graph_id)
self.__ged_data._graphs.append(nx.Graph())
self.__ged_data._graph_names.append(graph_name)
self.__ged_data._graph_classes.append(graph_class)
self.__original_to_internal_node_ids.append({})
self.__internal_to_original_node_ids.append({})
self.__ged_data._strings_to_internal_node_ids.append({})
self.__ged_data._internal_node_ids_to_strings.append({})
self._initialized = False
graph_id = self._ged_data._num_graphs_without_shuffled_copies
self._ged_data._num_graphs_without_shuffled_copies += 1
self._new_graph_ids.append(graph_id)
self._ged_data._graphs.append(nx.Graph())
self._ged_data._graph_names.append(graph_name)
self._ged_data._graph_classes.append(graph_class)
self._original_to_internal_node_ids.append({})
self._internal_to_original_node_ids.append({})
self._ged_data._strings_to_internal_node_ids.append({})
self._ged_data._internal_node_ids_to_strings.append({})
return graph_id return graph_id
@@ -70,14 +70,14 @@ class GEDEnv(object):
* @param[in] graph_id ID of graph that has to be cleared. * @param[in] graph_id ID of graph that has to be cleared.
*/ */
""" """
if graph_id > self.__ged_data.num_graphs_without_shuffled_copies():
if graph_id > self._ged_data.num_graphs_without_shuffled_copies():
raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.') raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.')
self.__ged_data._graphs[graph_id].clear()
self.__original_to_internal_node_ids[graph_id].clear()
self.__internal_to_original_node_ids[graph_id].clear()
self.__ged_data._strings_to_internal_node_ids[graph_id].clear()
self.__ged_data._internal_node_ids_to_strings[graph_id].clear()
self.__initialized = False
self._ged_data._graphs[graph_id].clear()
self._original_to_internal_node_ids[graph_id].clear()
self._internal_to_original_node_ids[graph_id].clear()
self._ged_data._strings_to_internal_node_ids[graph_id].clear()
self._ged_data._internal_node_ids_to_strings[graph_id].clear()
self._initialized = False
def add_node(self, graph_id, node_id, node_label): def add_node(self, graph_id, node_id, node_label):
@@ -90,15 +90,15 @@ class GEDEnv(object):
*/ */
""" """
# @todo: check ids. # @todo: check ids.
self.__initialized = False
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id])
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label)
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id)
self.__ged_data._node_label_to_id(node_label)
label_id = self.__ged_data._node_label_to_id(node_label)
self._initialized = False
internal_node_id = nx.number_of_nodes(self._ged_data._graphs[graph_id])
self._ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label)
self._original_to_internal_node_ids[graph_id][node_id] = internal_node_id
self._internal_to_original_node_ids[graph_id][internal_node_id] = node_id
self._ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id
self._ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id)
self._ged_data._node_label_to_id(node_label)
label_id = self._ged_data._node_label_to_id(node_label)
# @todo: ged_data_.graphs_[graph_id].set_label # @todo: ged_data_.graphs_[graph_id].set_label
@@ -114,10 +114,10 @@ class GEDEnv(object):
*/ */
""" """
# @todo: check everything. # @todo: check everything.
self.__initialized = False
self._initialized = False
# @todo: check ignore_duplicates. # @todo: check ignore_duplicates.
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label)
label_id = self.__ged_data._edge_label_to_id(edge_label)
self._ged_data._graphs[graph_id].add_edge(self._original_to_internal_node_ids[graph_id][nd_from], self._original_to_internal_node_ids[graph_id][nd_to], label=edge_label)
label_id = self._ged_data._edge_label_to_id(edge_label)
# @todo: ged_data_.graphs_[graph_id].set_label # @todo: ged_data_.graphs_[graph_id].set_label
@@ -182,30 +182,30 @@ class GEDEnv(object):
init_type = OptionsStringMap.InitType[init_type] init_type = OptionsStringMap.InitType[init_type]
# Throw an exception if no edit costs have been selected. # Throw an exception if no edit costs have been selected.
if self.__ged_data._edit_cost is None:
if self._ged_data._edit_cost is None:
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().')
# Return if the environment is initialized. # Return if the environment is initialized.
if self.__initialized:
if self._initialized:
return return
# Set initialization type. # Set initialization type.
self.__ged_data._init_type = init_type
self._ged_data._init_type = init_type
# @todo: Construct shuffled graph copies if necessary. # @todo: Construct shuffled graph copies if necessary.
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). # Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation).
# @todo: setup_adjacency_matrix, don't know if neccessary. # @todo: setup_adjacency_matrix, don't know if neccessary.
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs])
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs])
self._ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self._ged_data._graphs])
self._ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self._ged_data._graphs])
# Initialize cost matrices if necessary. # Initialize cost matrices if necessary.
if self.__ged_data._eager_init():
if self._ged_data._eager_init():
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment.
# Mark environment as initialized. # Mark environment as initialized.
self.__initialized = True
self.__new_graph_ids.clear()
self._initialized = True
self._new_graph_ids.clear()
def is_initialized(self): def is_initialized(self):
@@ -215,7 +215,7 @@ class GEDEnv(object):
* @return True if the environment is initialized. * @return True if the environment is initialized.
*/ */
""" """
return self.__initialized
return self._initialized
def get_init_type(self): def get_init_type(self):
@@ -225,16 +225,16 @@ class GEDEnv(object):
* @return Initialization type. * @return Initialization type.
*/ */
""" """
return self.__ged_data._init_type
return self._ged_data._init_type
def set_label_costs(self, node_label_costs=None, edge_label_costs=None): def set_label_costs(self, node_label_costs=None, edge_label_costs=None):
"""Set the costs between labels. """Set the costs between labels.
""" """
if node_label_costs is not None: if node_label_costs is not None:
self.__ged_data._node_label_costs = node_label_costs
self._ged_data._node_label_costs = node_label_costs
if edge_label_costs is not None: if edge_label_costs is not None:
self.__ged_data._edge_label_costs = edge_label_costs
self._ged_data._edge_label_costs = edge_label_costs
def set_method(self, method, options=''): def set_method(self, method, options=''):
@@ -245,67 +245,67 @@ class GEDEnv(object):
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. * @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method.
*/ */
""" """
del self.__ged_method
del self._ged_method
if isinstance(method, str): if isinstance(method, str):
method = OptionsStringMap.GEDMethod[method] method = OptionsStringMap.GEDMethod[method]


if method == Options.GEDMethod.BRANCH: if method == Options.GEDMethod.BRANCH:
self.__ged_method = Branch(self.__ged_data)
self._ged_method = Branch(self._ged_data)
elif method == Options.GEDMethod.BRANCH_FAST: elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
self._ged_method = BranchFast(self._ged_data)
elif method == Options.GEDMethod.BRANCH_FAST: elif method == Options.GEDMethod.BRANCH_FAST:
self.__ged_method = BranchFast(self.__ged_data)
self._ged_method = BranchFast(self._ged_data)
elif method == Options.GEDMethod.BRANCH_TIGHT: elif method == Options.GEDMethod.BRANCH_TIGHT:
self.__ged_method = BranchTight(self.__ged_data)
self._ged_method = BranchTight(self._ged_data)
elif method == Options.GEDMethod.BRANCH_UNIFORM: elif method == Options.GEDMethod.BRANCH_UNIFORM:
self.__ged_method = BranchUniform(self.__ged_data)
self._ged_method = BranchUniform(self._ged_data)
elif method == Options.GEDMethod.BRANCH_COMPACT: elif method == Options.GEDMethod.BRANCH_COMPACT:
self.__ged_method = BranchCompact(self.__ged_data)
self._ged_method = BranchCompact(self._ged_data)
elif method == Options.GEDMethod.PARTITION: elif method == Options.GEDMethod.PARTITION:
self.__ged_method = Partition(self.__ged_data)
self._ged_method = Partition(self._ged_data)
elif method == Options.GEDMethod.HYBRID: elif method == Options.GEDMethod.HYBRID:
self.__ged_method = Hybrid(self.__ged_data)
self._ged_method = Hybrid(self._ged_data)
elif method == Options.GEDMethod.RING: elif method == Options.GEDMethod.RING:
self.__ged_method = Ring(self.__ged_data)
self._ged_method = Ring(self._ged_data)
elif method == Options.GEDMethod.ANCHOR_AWARE_GED: elif method == Options.GEDMethod.ANCHOR_AWARE_GED:
self.__ged_method = AnchorAwareGED(self.__ged_data)
self._ged_method = AnchorAwareGED(self._ged_data)
elif method == Options.GEDMethod.WALKS: elif method == Options.GEDMethod.WALKS:
self.__ged_method = Walks(self.__ged_data)
self._ged_method = Walks(self._ged_data)
elif method == Options.GEDMethod.IPFP: elif method == Options.GEDMethod.IPFP:
self.__ged_method = IPFP(self.__ged_data)
self._ged_method = IPFP(self._ged_data)
elif method == Options.GEDMethod.BIPARTITE: elif method == Options.GEDMethod.BIPARTITE:
from gklearn.ged.methods import Bipartite from gklearn.ged.methods import Bipartite
self.__ged_method = Bipartite(self.__ged_data)
self._ged_method = Bipartite(self._ged_data)
elif method == Options.GEDMethod.SUBGRAPH: elif method == Options.GEDMethod.SUBGRAPH:
self.__ged_method = Subgraph(self.__ged_data)
self._ged_method = Subgraph(self._ged_data)
elif method == Options.GEDMethod.NODE: elif method == Options.GEDMethod.NODE:
self.__ged_method = Node(self.__ged_data)
self._ged_method = Node(self._ged_data)
elif method == Options.GEDMethod.RING_ML: elif method == Options.GEDMethod.RING_ML:
self.__ged_method = RingML(self.__ged_data)
self._ged_method = RingML(self._ged_data)
elif method == Options.GEDMethod.BIPARTITE_ML: elif method == Options.GEDMethod.BIPARTITE_ML:
self.__ged_method = BipartiteML(self.__ged_data)
self._ged_method = BipartiteML(self._ged_data)
elif method == Options.GEDMethod.REFINE: elif method == Options.GEDMethod.REFINE:
self.__ged_method = Refine(self.__ged_data)
self._ged_method = Refine(self._ged_data)
elif method == Options.GEDMethod.BP_BEAM: elif method == Options.GEDMethod.BP_BEAM:
self.__ged_method = BPBeam(self.__ged_data)
self._ged_method = BPBeam(self._ged_data)
elif method == Options.GEDMethod.SIMULATED_ANNEALING: elif method == Options.GEDMethod.SIMULATED_ANNEALING:
self.__ged_method = SimulatedAnnealing(self.__ged_data)
self._ged_method = SimulatedAnnealing(self._ged_data)
elif method == Options.GEDMethod.HED: elif method == Options.GEDMethod.HED:
self.__ged_method = HED(self.__ged_data)
self._ged_method = HED(self._ged_data)
elif method == Options.GEDMethod.STAR: elif method == Options.GEDMethod.STAR:
self.__ged_method = STAR(self.__ged_data)
self._ged_method = STAR(self._ged_data)
# #ifdef GUROBI # #ifdef GUROBI
elif method == Options.GEDMethod.F1: elif method == Options.GEDMethod.F1:
self.__ged_method = F1(self.__ged_data)
self._ged_method = F1(self._ged_data)
elif method == Options.GEDMethod.F2: elif method == Options.GEDMethod.F2:
self.__ged_method = F2(self.__ged_data)
self._ged_method = F2(self._ged_data)
elif method == Options.GEDMethod.COMPACT_MIP: elif method == Options.GEDMethod.COMPACT_MIP:
self.__ged_method = CompactMIP(self.__ged_data)
self._ged_method = CompactMIP(self._ged_data)
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS:
self.__ged_method = BLPNoEdgeLabels(self.__ged_data)
self._ged_method = BLPNoEdgeLabels(self._ged_data)


self.__ged_method.set_options(options)
self._ged_method.set_options(options)
def run_method(self, g_id, h_id): def run_method(self, g_id, h_id):
@@ -316,34 +316,34 @@ class GEDEnv(object):
* @param[in] h_id ID of an input graph that has been added to the environment. * @param[in] h_id ID of an input graph that has been added to the environment.
*/ */
""" """
if g_id >= self.__ged_data.num_graphs():
if g_id >= self._ged_data.num_graphs():
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.')
if h_id >= self.__ged_data.num_graphs():
if h_id >= self._ged_data.num_graphs():
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.')
if not self.__initialized:
if not self._initialized:
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.')
if self.__ged_method is None:
if self._ged_method is None:
raise Exception('No method has been set. Call set_method() before calling run().') raise Exception('No method has been set. Call set_method() before calling run().')
# Call selected GEDMethod and store results. # Call selected GEDMethod and store results.
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id):
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle?
if self._ged_data.shuffled_graph_copies_available() and (g_id == h_id):
self._ged_method.run(g_id, self._ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle?
else: else:
self.__ged_method.run(g_id, h_id)
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound()
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound()
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime()
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map()
self._ged_method.run(g_id, h_id)
self._lower_bounds[(g_id, h_id)] = self._ged_method.get_lower_bound()
self._upper_bounds[(g_id, h_id)] = self._ged_method.get_upper_bound()
self._runtimes[(g_id, h_id)] = self._ged_method.get_runtime()
self._node_maps[(g_id, h_id)] = self._ged_method.get_node_map()
def init_method(self): def init_method(self):
"""Initializes the method specified by call to set_method(). """Initializes the method specified by call to set_method().
""" """
if not self.__initialized:
if not self._initialized:
raise Exception('The environment is uninitialized. Call init() before calling init_method().') raise Exception('The environment is uninitialized. Call init() before calling init_method().')
if self.__ged_method is None:
if self._ged_method is None:
raise Exception('No method has been set. Call set_method() before calling init_method().') raise Exception('No method has been set. Call set_method() before calling init_method().')
self.__ged_method.init()
self._ged_method.init()
def get_num_node_labels(self): def get_num_node_labels(self):
@@ -354,7 +354,7 @@ class GEDEnv(object):
* @note If @p 1 is returned, the nodes are unlabeled. * @note If @p 1 is returned, the nodes are unlabeled.
*/ */
""" """
return len(self.__ged_data._node_labels)
return len(self._ged_data._node_labels)
def get_all_node_labels(self): def get_all_node_labels(self):
@@ -365,7 +365,7 @@ class GEDEnv(object):
* @note If @p 1 is returned, the nodes are unlabeled. * @note If @p 1 is returned, the nodes are unlabeled.
*/ */
""" """
return self.__ged_data._node_labels
return self._ged_data._node_labels
def get_node_label(self, label_id, to_dict=True): def get_node_label(self, label_id, to_dict=True):
@@ -379,8 +379,8 @@ class GEDEnv(object):
if label_id < 1 or label_id > self.get_num_node_labels(): if label_id < 1 or label_id > self.get_num_node_labels():
raise Exception('The environment does not contain a node label with ID', str(label_id), '.') raise Exception('The environment does not contain a node label with ID', str(label_id), '.')
if to_dict: if to_dict:
return dict(self.__ged_data._node_labels[label_id - 1])
return self.__ged_data._node_labels[label_id - 1]
return dict(self._ged_data._node_labels[label_id - 1])
return self._ged_data._node_labels[label_id - 1]
def get_num_edge_labels(self): def get_num_edge_labels(self):
@@ -391,7 +391,7 @@ class GEDEnv(object):
* @note If @p 1 is returned, the edges are unlabeled. * @note If @p 1 is returned, the edges are unlabeled.
*/ */
""" """
return len(self.__ged_data._edge_labels)
return len(self._ged_data._edge_labels)
def get_all_edge_labels(self): def get_all_edge_labels(self):
@@ -402,7 +402,7 @@ class GEDEnv(object):
* @note If @p 1 is returned, the edges are unlabeled. * @note If @p 1 is returned, the edges are unlabeled.
*/ */
""" """
return self.__ged_data._edge_labels
return self._ged_data._edge_labels
def get_edge_label(self, label_id, to_dict=True): def get_edge_label(self, label_id, to_dict=True):
@@ -416,8 +416,8 @@ class GEDEnv(object):
if label_id < 1 or label_id > self.get_num_edge_labels(): if label_id < 1 or label_id > self.get_num_edge_labels():
raise Exception('The environment does not contain an edge label with ID', str(label_id), '.') raise Exception('The environment does not contain an edge label with ID', str(label_id), '.')
if to_dict: if to_dict:
return dict(self.__ged_data._edge_labels[label_id - 1])
return self.__ged_data._edge_labels[label_id - 1]
return dict(self._ged_data._edge_labels[label_id - 1])
return self._ged_data._edge_labels[label_id - 1]
def get_upper_bound(self, g_id, h_id): def get_upper_bound(self, g_id, h_id):
@@ -429,9 +429,9 @@ class GEDEnv(object):
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. * @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/ */
""" """
if (g_id, h_id) not in self.__upper_bounds:
if (g_id, h_id) not in self._upper_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__upper_bounds[(g_id, h_id)]
return self._upper_bounds[(g_id, h_id)]
def get_lower_bound(self, g_id, h_id): def get_lower_bound(self, g_id, h_id):
@@ -443,9 +443,9 @@ class GEDEnv(object):
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. * @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/ */
""" """
if (g_id, h_id) not in self.__lower_bounds:
if (g_id, h_id) not in self._lower_bounds:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').')
return self.__lower_bounds[(g_id, h_id)]
return self._lower_bounds[(g_id, h_id)]
def get_runtime(self, g_id, h_id): def get_runtime(self, g_id, h_id):
@@ -457,9 +457,9 @@ class GEDEnv(object):
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. * @return Runtime of last call to run_method() with arguments @p g_id and @p h_id.
*/ */
""" """
if (g_id, h_id) not in self.__runtimes:
if (g_id, h_id) not in self._runtimes:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').')
return self.__runtimes[(g_id, h_id)]
return self._runtimes[(g_id, h_id)]


def get_init_time(self): def get_init_time(self):
@@ -469,7 +469,7 @@ class GEDEnv(object):
* @return Runtime of the last call to init_method(). * @return Runtime of the last call to init_method().
*/ */
""" """
return self.__ged_method.get_init_time()
return self._ged_method.get_init_time()




def get_node_map(self, g_id, h_id): def get_node_map(self, g_id, h_id):
@@ -481,9 +481,9 @@ class GEDEnv(object):
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. * @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id.
*/ */
""" """
if (g_id, h_id) not in self.__node_maps:
if (g_id, h_id) not in self._node_maps:
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').')
return self.__node_maps[(g_id, h_id)]
return self._node_maps[(g_id, h_id)]


def get_forward_map(self, g_id, h_id) : def get_forward_map(self, g_id, h_id) :
@@ -531,7 +531,7 @@ class GEDEnv(object):
* @param[in,out] node_map Node map whose induced edit cost is to be computed. * @param[in,out] node_map Node map whose induced edit cost is to be computed.
*/ */
""" """
self.__ged_data.compute_induced_cost(self.__ged_data._graphs[g_id], self.__ged_data._graphs[h_id], node_map)
self._ged_data.compute_induced_cost(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id], node_map)
def get_nx_graph(self, graph_id): def get_nx_graph(self, graph_id):
@@ -569,7 +569,7 @@ class GEDEnv(object):
.. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix() .. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations. .. note:: These functions allow to collect all the graph's informations.
""" """
graph = self.__ged_data.graph(graph_id)
graph = self._ged_data.graph(graph_id)
node_labels = [] node_labels = []
for n in graph.nodes(): for n in graph.nodes():
node_labels.append(graph.nodes[n]['label']) node_labels.append(graph.nodes[n]['label'])
@@ -590,7 +590,7 @@ class GEDEnv(object):
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix() .. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations. .. note:: These functions allow to collect all the graph's informations.
""" """
graph = self.__ged_data.graph(graph_id)
graph = self._ged_data.graph(graph_id)
if to_dict: if to_dict:
edges = {} edges = {}
for n1, n2, attr in graph.edges(data=True): for n1, n2, attr in graph.edges(data=True):
@@ -608,7 +608,7 @@ class GEDEnv(object):
* @return Name of the input graph. * @return Name of the input graph.
*/ */
""" """
return self.__ged_data._graph_names[graph_id]
return self._ged_data._graph_names[graph_id]
def get_graph_num_nodes(self, graph_id): def get_graph_num_nodes(self, graph_id):
@@ -619,7 +619,7 @@ class GEDEnv(object):
* @return Number of nodes in the graph. * @return Number of nodes in the graph.
*/ */
""" """
return nx.number_of_nodes(self.__ged_data.graph(graph_id))
return nx.number_of_nodes(self._ged_data.graph(graph_id))
def get_original_node_ids(self, graph_id): def get_original_node_ids(self, graph_id):
@@ -634,11 +634,11 @@ class GEDEnv(object):
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix() .. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix()
.. note:: These functions allow to collect all the graph's informations. .. note:: These functions allow to collect all the graph's informations.
""" """
return [i for i in self.__internal_to_original_node_ids[graph_id].values()]
return [i for i in self._internal_to_original_node_ids[graph_id].values()]
def get_node_cost(self, node_label_1, node_label_2): def get_node_cost(self, node_label_1, node_label_2):
return self.__ged_data.node_cost(node_label_1, node_label_2)
return self._ged_data.node_cost(node_label_1, node_label_2)
def get_node_rel_cost(self, node_label_1, node_label_2): def get_node_rel_cost(self, node_label_1, node_label_2):
@@ -654,7 +654,7 @@ class GEDEnv(object):
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0])) node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0]))
if isinstance(node_label_2, dict): if isinstance(node_label_2, dict):
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0])) node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.)
return self._ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.)
def get_node_del_cost(self, node_label): def get_node_del_cost(self, node_label):
@@ -667,7 +667,7 @@ class GEDEnv(object):
""" """
if isinstance(node_label, dict): if isinstance(node_label, dict):
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_del_cost_fun(node_label)
return self._ged_data._edit_cost.node_del_cost_fun(node_label)
def get_node_ins_cost(self, node_label): def get_node_ins_cost(self, node_label):
@@ -680,11 +680,11 @@ class GEDEnv(object):
""" """
if isinstance(node_label, dict): if isinstance(node_label, dict):
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label)
return self._ged_data._edit_cost.node_ins_cost_fun(node_label)
def get_edge_cost(self, edge_label_1, edge_label_2): def get_edge_cost(self, edge_label_1, edge_label_2):
return self.__ged_data.edge_cost(edge_label_1, edge_label_2)
return self._ged_data.edge_cost(edge_label_1, edge_label_2)
def get_edge_rel_cost(self, edge_label_1, edge_label_2): def get_edge_rel_cost(self, edge_label_1, edge_label_2):
@@ -700,7 +700,7 @@ class GEDEnv(object):
edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0])) edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0]))
if isinstance(edge_label_2, dict): if isinstance(edge_label_2, dict):
edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0])) edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2)
return self._ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2)
def get_edge_del_cost(self, edge_label): def get_edge_del_cost(self, edge_label):
@@ -713,7 +713,7 @@ class GEDEnv(object):
""" """
if isinstance(edge_label, dict): if isinstance(edge_label, dict):
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_del_cost_fun(edge_label)
return self._ged_data._edit_cost.edge_del_cost_fun(edge_label)
def get_edge_ins_cost(self, edge_label): def get_edge_ins_cost(self, edge_label):
@@ -726,8 +726,8 @@ class GEDEnv(object):
""" """
if isinstance(edge_label, dict): if isinstance(edge_label, dict):
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0]))
return self.__ged_data._edit_cost.edge_ins_cost_fun(edge_label)
return self._ged_data._edit_cost.edge_ins_cost_fun(edge_label)
def get_all_graph_ids(self): def get_all_graph_ids(self):
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)]
return [i for i in range(0, self._ged_data._num_graphs_without_shuffled_copies)]

+ 25
- 25
gklearn/ged/env/node_map.py View File

@@ -12,9 +12,9 @@ from gklearn.utils import dummy_node, undefined_node
class NodeMap(object): class NodeMap(object):
def __init__(self, num_nodes_g, num_nodes_h): def __init__(self, num_nodes_g, num_nodes_h):
self.__forward_map = [undefined_node()] * num_nodes_g
self.__backward_map = [undefined_node()] * num_nodes_h
self.__induced_cost = np.inf
self._forward_map = [undefined_node()] * num_nodes_g
self._backward_map = [undefined_node()] * num_nodes_h
self._induced_cost = np.inf
def clear(self): def clear(self):
@@ -23,29 +23,29 @@ class NodeMap(object):
* @brief Clears the node map. * @brief Clears the node map.
*/ */
""" """
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))]
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))]
self._forward_map = [undefined_node() for i in range(len(self._forward_map))]
self._backward_map = [undefined_node() for i in range(len(self._backward_map))]
def num_source_nodes(self): def num_source_nodes(self):
return len(self.__forward_map)
return len(self._forward_map)
def num_target_nodes(self): def num_target_nodes(self):
return len(self.__backward_map)
return len(self._backward_map)
def image(self, node): def image(self, node):
if node < len(self.__forward_map):
return self.__forward_map[node]
if node < len(self._forward_map):
return self._forward_map[node]
else: else:
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
return undefined_node() return undefined_node()
def pre_image(self, node): def pre_image(self, node):
if node < len(self.__backward_map):
return self.__backward_map[node]
if node < len(self._backward_map):
return self._backward_map[node]
else: else:
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.')
return undefined_node() return undefined_node()
@@ -53,50 +53,50 @@ class NodeMap(object):
def as_relation(self, relation): def as_relation(self, relation):
relation.clear() relation.clear()
for i in range(0, len(self.__forward_map)):
k = self.__forward_map[i]
for i in range(0, len(self._forward_map)):
k = self._forward_map[i]
if k != undefined_node(): if k != undefined_node():
relation.append(tuple((i, k))) relation.append(tuple((i, k)))
for k in range(0, len(self.__backward_map)):
i = self.__backward_map[k]
for k in range(0, len(self._backward_map)):
i = self._backward_map[k]
if i == dummy_node(): if i == dummy_node():
relation.append(tuple((i, k))) relation.append(tuple((i, k)))
def add_assignment(self, i, k): def add_assignment(self, i, k):
if i != dummy_node(): if i != dummy_node():
if i < len(self.__forward_map):
self.__forward_map[i] = k
if i < len(self._forward_map):
self._forward_map[i] = k
else: else:
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.')
if k != dummy_node(): if k != dummy_node():
if k < len(self.__backward_map):
self.__backward_map[k] = i
if k < len(self._backward_map):
self._backward_map[k] = i
else: else:
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.')
def set_induced_cost(self, induced_cost): def set_induced_cost(self, induced_cost):
self.__induced_cost = induced_cost
self._induced_cost = induced_cost
def induced_cost(self): def induced_cost(self):
return self.__induced_cost
return self._induced_cost
@property @property
def forward_map(self): def forward_map(self):
return self.__forward_map
return self._forward_map


@forward_map.setter @forward_map.setter
def forward_map(self, value): def forward_map(self, value):
self.__forward_map = value
self._forward_map = value
@property @property
def backward_map(self): def backward_map(self):
return self.__backward_map
return self._backward_map


@backward_map.setter @backward_map.setter
def backward_map(self, value): def backward_map(self, value):
self.__backward_map = value
self._backward_map = value

+ 5
- 5
gklearn/ged/learning/cost_matrices_learner.py View File

@@ -49,7 +49,7 @@ class CostMatricesLearner(CostsLearner):
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints) prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
self._execute_cvx(prob)
edit_costs_new = x.value edit_costs_new = x.value
residual = np.sqrt(prob.value) residual = np.sqrt(prob.value)
elif not self._triangle_rule and not self._allow_zeros: # @todo elif not self._triangle_rule and not self._allow_zeros: # @todo
@@ -57,7 +57,7 @@ class CostMatricesLearner(CostsLearner):
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec)
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]]
prob = cp.Problem(cp.Minimize(cost_fun), constraints) prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
self._execute_cvx(prob)
edit_costs_new = x.value edit_costs_new = x.value
residual = np.sqrt(prob.value) residual = np.sqrt(prob.value)
elif self._triangle_rule and not self._allow_zeros: # @todo elif self._triangle_rule and not self._allow_zeros: # @todo
@@ -67,7 +67,7 @@ class CostMatricesLearner(CostsLearner):
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
prob = cp.Problem(cp.Minimize(cost_fun), constraints) prob = cp.Problem(cp.Minimize(cost_fun), constraints)
self.__execute_cvx(prob)
self._execute_cvx(prob)
edit_costs_new = x.value edit_costs_new = x.value
residual = np.sqrt(prob.value) residual = np.sqrt(prob.value)
else: else:
@@ -113,7 +113,7 @@ class CostMatricesLearner(CostsLearner):
elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec: elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec:
self._ec_changed = True self._ec_changed = True
break break
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
# if abs(cost - edit_cost_list[-2][i]) > self._epsilon_ec:
# ec_changed = True # ec_changed = True
# break # break
self._residual_changed = False self._residual_changed = False
@@ -135,7 +135,7 @@ class CostMatricesLearner(CostsLearner):
print('-------------------------------------------------------------------------') print('-------------------------------------------------------------------------')
print('States of iteration', self._itrs + 1) print('States of iteration', self._itrs + 1)
print('-------------------------------------------------------------------------') print('-------------------------------------------------------------------------')
# print('Time spend:', self.__runtime_optimize_ec)
# print('Time spend:', self._runtime_optimize_ec)
print('Total number of iterations for optimizing:', self._itrs + 1) print('Total number of iterations for optimizing:', self._itrs + 1)
print('Total number of updating edit costs:', self._num_updates_ecs) print('Total number of updating edit costs:', self._num_updates_ecs)
print('Was optimization of edit costs converged:', self._converged) print('Was optimization of edit costs converged:', self._converged)


+ 2
- 2
gklearn/ged/learning/costs_learner.py View File

@@ -126,8 +126,8 @@ class CostsLearner(object):


def termination_criterion_met(self, converged, timer, itr, itrs_without_update): def termination_criterion_met(self, converged, timer, itr, itrs_without_update):
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
# if self._state == AlgorithmState.TERMINATED:
# self._state = AlgorithmState.INITIALIZED
return True return True
return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)


+ 30
- 30
gklearn/ged/util/lsape_solver.py View File

@@ -19,27 +19,27 @@ class LSAPESolver(object):
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. * @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved.
*/ */
""" """
self.__cost_matrix = cost_matrix
self.__model = 'ECBP'
self.__greedy_method = 'BASIC'
self.__solve_optimally = True
self.__minimal_cost = 0
self.__row_to_col_assignments = []
self.__col_to_row_assignments = []
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
self._cost_matrix = cost_matrix
self._model = 'ECBP'
self._greedy_method = 'BASIC'
self._solve_optimally = True
self._minimal_cost = 0
self._row_to_col_assignments = []
self._col_to_row_assignments = []
self._dual_var_rows = [] # @todo
self._dual_var_cols = [] # @todo
def clear_solution(self): def clear_solution(self):
"""Clears a previously computed solution. """Clears a previously computed solution.
""" """
self.__minimal_cost = 0
self.__row_to_col_assignments.clear()
self.__col_to_row_assignments.clear()
self.__row_to_col_assignments.append([]) # @todo
self.__col_to_row_assignments.append([])
self.__dual_var_rows = [] # @todo
self.__dual_var_cols = [] # @todo
self._minimal_cost = 0
self._row_to_col_assignments.clear()
self._col_to_row_assignments.clear()
self._row_to_col_assignments.append([]) # @todo
self._col_to_row_assignments.append([])
self._dual_var_rows = [] # @todo
self._dual_var_cols = [] # @todo
def set_model(self, model): def set_model(self, model):
@@ -49,8 +49,8 @@ class LSAPESolver(object):
* @param[in] model The model that should be used. * @param[in] model The model that should be used.
*/ */
""" """
self.__solve_optimally = True
self.__model = model
self._solve_optimally = True
self._model = model
def solve(self, num_solutions=1): def solve(self, num_solutions=1):
@@ -61,17 +61,17 @@ class LSAPESolver(object):
*/ */
""" """
self.clear_solution() self.clear_solution()
if self.__solve_optimally:
row_ind, col_ind = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self.__row_to_col_assignments[0] = col_ind
self.__col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind
self.__compute_cost_from_assignments()
if self._solve_optimally:
row_ind, col_ind = linear_sum_assignment(self._cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used.
self._row_to_col_assignments[0] = col_ind
self._col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind
self._compute_cost_from_assignments()
if num_solutions > 1: if num_solutions > 1:
pass # @todo: pass # @todo:
else: else:
print('here is non op.') print('here is non op.')
pass # @todo: greedy. pass # @todo: greedy.
# self.__
# self._


def minimal_cost(self): def minimal_cost(self):
@@ -81,7 +81,7 @@ class LSAPESolver(object):
* @return Cost of computed solutions. * @return Cost of computed solutions.
*/ */
""" """
return self.__minimal_cost
return self._minimal_cost
def get_assigned_col(self, row, solution_id=0): def get_assigned_col(self, row, solution_id=0):
@@ -93,7 +93,7 @@ class LSAPESolver(object):
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. * @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column.
*/ */
""" """
return self.__row_to_col_assignments[solution_id][row]
return self._row_to_col_assignments[solution_id][row]
def get_assigned_row(self, col, solution_id=0): def get_assigned_row(self, col, solution_id=0):
@@ -105,7 +105,7 @@ class LSAPESolver(object):
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. * @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row.
*/ */
""" """
return self.__col_to_row_assignments[solution_id][col]
return self._col_to_row_assignments[solution_id][col]
def num_solutions(self): def num_solutions(self):
@@ -115,8 +115,8 @@ class LSAPESolver(object):
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. * @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions.
*/ */
""" """
return len(self.__row_to_col_assignments)
return len(self._row_to_col_assignments)




def __compute_cost_from_assignments(self): # @todo
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]])
def _compute_cost_from_assignments(self): # @todo
self._minimal_cost = np.sum(self._cost_matrix[range(0, len(self._row_to_col_assignments[0])), self._row_to_col_assignments[0]])

+ 10
- 4
gklearn/ged/util/util.py View File

@@ -70,10 +70,16 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True


# compute ged. # compute ged.
# options used to compute numbers of edit operations. # options used to compute numbers of edit operations.
neo_options = {'edit_cost': options['edit_cost'],
'is_cml': True,
'node_labels': node_labels,
'edge_labels': edge_labels}
if node_label_costs is None and edge_label_costs is None:
neo_options = {'edit_cost': options['edit_cost'],
'is_cml': False,
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'],
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
else:
neo_options = {'edit_cost': options['edit_cost'],
'is_cml': True,
'node_labels': node_labels,
'edge_labels': edge_labels}
ged_mat = np.zeros((len(graphs), len(graphs))) ged_mat = np.zeros((len(graphs), len(graphs)))
if parallel: if parallel:
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) len_itr = int(len(graphs) * (len(graphs) - 1) / 2)


+ 51
- 51
gklearn/kernels/common_walk.py View File

@@ -26,18 +26,18 @@ class CommonWalk(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__weight = kwargs.get('weight', 1)
self.__compute_method = kwargs.get('compute_method', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__compute_method = self.__compute_method.lower()
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._weight = kwargs.get('weight', 1)
self._compute_method = kwargs.get('compute_method', None)
self._ds_infos = kwargs.get('ds_infos', {})
self._compute_method = self._compute_method.lower()




def _compute_gm_series(self): def _compute_gm_series(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs] self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix. # compute Gram matrix.
@@ -51,15 +51,15 @@ class CommonWalk(GraphKernel):
iterator = itr iterator = itr
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -67,9 +67,9 @@ class CommonWalk(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs] self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix. # compute Gram matrix.
@@ -80,10 +80,10 @@ class CommonWalk(GraphKernel):
# G_gn = gn_toshare # G_gn = gn_toshare
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_do_exp do_fun = self._wrapper_kernel_do_exp
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_do_geo do_fun = self._wrapper_kernel_do_geo
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm,
@@ -93,9 +93,9 @@ class CommonWalk(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list] g_list = [G.to_directed() for G in g_list]
@@ -107,23 +107,23 @@ class CommonWalk(GraphKernel):
iterator = range(len(g_list)) iterator = range(len(g_list))
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i in iterator: for i in iterator:
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight)
kernel = self._kernel_do_exp(g1, g_list[i], self._weight)
kernel_list[i] = kernel kernel_list[i] = kernel
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i in iterator: for i in iterator:
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight)
kernel = self._kernel_do_geo(g1, g_list[i], self._weight)
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list] g_list = [G.to_directed() for G in g_list]
@@ -136,10 +136,10 @@ class CommonWalk(GraphKernel):
# G_g_list = g_list_toshare # G_g_list = g_list_toshare
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_list_do_exp do_fun = self._wrapper_kernel_list_do_exp
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_list_do_geo do_fun = self._wrapper_kernel_list_do_geo
def func_assign(result, var_to_assign): def func_assign(result, var_to_assign):
@@ -154,31 +154,31 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_list_do_exp(self, itr): def _wrapper_kernel_list_do_exp(self, itr):
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight)




def _wrapper_kernel_list_do_geo(self, itr): def _wrapper_kernel_list_do_geo(self, itr):
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__check_graphs([g1] + [g2])
self.__add_dummy_labels([g1] + [g2])
if not self.__ds_infos['directed']: # convert
self._check_graphs([g1] + [g2])
self._add_dummy_labels([g1] + [g2])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g2 = g2.to_directed() g2 = g2.to_directed()
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
kernel = self.__kernel_do_exp(g1, g2, self.__weight)
if self._compute_method == 'exp':
kernel = self._kernel_do_exp(g1, g2, self._weight)
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
kernel = self.__kernel_do_geo(g1, g2, self.__weight)
elif self._compute_method == 'geo':
kernel = self._kernel_do_geo(g1, g2, self._weight)


return kernel return kernel
def __kernel_do_exp(self, g1, g2, beta):
def _kernel_do_exp(self, g1, g2, beta):
"""Compute common walk graph kernel between 2 graphs using exponential """Compute common walk graph kernel between 2 graphs using exponential
series. series.
@@ -195,7 +195,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs. The common walk Kernel between 2 graphs.
""" """
# get tensor product / direct product # get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node. # return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2: if nx.number_of_nodes(gp) < 2:
return 0 return 0
@@ -227,10 +227,10 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_exp(self, itr): def _wrapper_kernel_do_exp(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight)
def __kernel_do_geo(self, g1, g2, gamma):
def _kernel_do_geo(self, g1, g2, gamma):
"""Compute common walk graph kernel between 2 graphs using geometric """Compute common walk graph kernel between 2 graphs using geometric
series. series.
@@ -247,7 +247,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs. The common walk Kernel between 2 graphs.
""" """
# get tensor product / direct product # get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node. # return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2: if nx.number_of_nodes(gp) < 2:
return 0 return 0
@@ -262,24 +262,24 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_geo(self, itr): def _wrapper_kernel_do_geo(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight)
def __check_graphs(self, Gn):
def _check_graphs(self, Gn):
for g in Gn: for g in Gn:
if nx.number_of_nodes(g) == 1: if nx.number_of_nodes(g) == 1:
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.')
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]
def _init_worker_gm(gn_toshare): def _init_worker_gm(gn_toshare):


+ 10
- 10
gklearn/kernels/conjugate_gradient.py View File

@@ -60,7 +60,7 @@ class ConjugateGradient(RandomWalkMeta):
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda)
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel


@@ -127,7 +127,7 @@ class ConjugateGradient(RandomWalkMeta):
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(g1, g_list[i], lmda)
kernel = self._kernel_do(g1, g_list[i], lmda)
kernel_list[i] = kernel kernel_list[i] = kernel


else: # @todo else: # @todo
@@ -190,7 +190,7 @@ class ConjugateGradient(RandomWalkMeta):
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
kernel = self.__kernel_do(g1, g2, lmda)
kernel = self._kernel_do(g1, g2, lmda)


else: # @todo else: # @todo
pass pass
@@ -198,7 +198,7 @@ class ConjugateGradient(RandomWalkMeta):
return kernel return kernel
def __kernel_do(self, g1, g2, lmda):
def _kernel_do(self, g1, g2, lmda):
# Frist, compute kernels between all pairs of nodes using the method borrowed # Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels # from FCSP. It is faster than directly computing all edge kernels
@@ -222,7 +222,7 @@ class ConjugateGradient(RandomWalkMeta):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight)
return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
def _func_fp(x, p_times, lmda, w_times): def _func_fp(x, p_times, lmda, w_times):
@@ -246,19 +246,19 @@ class ConjugateGradient(RandomWalkMeta):
# Define edge kernels. # Define edge kernels.
def compute_ek_11(e1, e2, ke): def compute_ek_11(e1, e2, ke):
e1_labels = [e1[2][el] for el in self._edge_labels] e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
def compute_ek_10(e1, e2, ke): def compute_ek_10(e1, e2, ke):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
return ke(e1_labels, e2_labels) return ke(e1_labels, e2_labels)
def compute_ek_01(e1, e2, ke): def compute_ek_01(e1, e2, ke):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_attrs, e2_attrs) return ke(e1_attrs, e2_attrs)
def compute_ek_00(e1, e2, ke): def compute_ek_00(e1, e2, ke):


+ 11
- 11
gklearn/kernels/fixed_point.py View File

@@ -60,7 +60,7 @@ class FixedPoint(RandomWalkMeta):
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda)
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel


@@ -127,7 +127,7 @@ class FixedPoint(RandomWalkMeta):
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(g1, g_list[i], lmda)
kernel = self._kernel_do(g1, g_list[i], lmda)
kernel_list[i] = kernel kernel_list[i] = kernel


else: # @todo else: # @todo
@@ -190,7 +190,7 @@ class FixedPoint(RandomWalkMeta):
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal')
if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
kernel = self.__kernel_do(g1, g2, lmda)
kernel = self._kernel_do(g1, g2, lmda)


else: # @todo else: # @todo
pass pass
@@ -198,7 +198,7 @@ class FixedPoint(RandomWalkMeta):
return kernel return kernel
def __kernel_do(self, g1, g2, lmda):
def _kernel_do(self, g1, g2, lmda):
# Frist, compute kernels between all pairs of nodes using the method borrowed # Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels # from FCSP. It is faster than directly computing all edge kernels
@@ -221,10 +221,10 @@ class FixedPoint(RandomWalkMeta):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight)
return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight)
def _func_fp(x, p_times, lmda, w_times):
def _func_fp(self, x, p_times, lmda, w_times):
haha = w_times * x haha = w_times * x
haha = lmda * haha haha = lmda * haha
haha = p_times + haha haha = p_times + haha
@@ -245,19 +245,19 @@ class FixedPoint(RandomWalkMeta):
# Define edge kernels. # Define edge kernels.
def compute_ek_11(e1, e2, ke): def compute_ek_11(e1, e2, ke):
e1_labels = [e1[2][el] for el in self._edge_labels] e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) return ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
def compute_ek_10(e1, e2, ke): def compute_ek_10(e1, e2, ke):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
return ke(e1_labels, e2_labels) return ke(e1_labels, e2_labels)
def compute_ek_01(e1, e2, ke): def compute_ek_01(e1, e2, ke):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
return ke(e1_attrs, e2_attrs) return ke(e1_attrs, e2_attrs)
def compute_ek_00(e1, e2, ke): def compute_ek_00(e1, e2, ke):


+ 7
- 7
gklearn/kernels/graph_kernel.py View File

@@ -37,7 +37,7 @@ class GraphKernel(object):
raise Exception('The graph list given is empty. No computation was performed.') raise Exception('The graph list given is empty. No computation was performed.')
else: else:
self._graphs = [g.copy() for g in graphs[0]] self._graphs = [g.copy() for g in graphs[0]]
self._gram_matrix = self.__compute_gram_matrix()
self._gram_matrix = self._compute_gram_matrix()
self._gram_matrix_unnorm = np.copy(self._gram_matrix) self._gram_matrix_unnorm = np.copy(self._gram_matrix)
if self._normalize: if self._normalize:
self._gram_matrix = self.normalize_gm(self._gram_matrix) self._gram_matrix = self.normalize_gm(self._gram_matrix)
@@ -45,17 +45,17 @@ class GraphKernel(object):
elif len(graphs) == 2: elif len(graphs) == 2:
if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy())
kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy())
return kernel, self._run_time return kernel, self._run_time
elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
g1 = graphs[0].copy() g1 = graphs[0].copy()
g_list = [g.copy() for g in graphs[1]] g_list = [g.copy() for g in graphs[1]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time return kernel_list, self._run_time
elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
g1 = graphs[1].copy() g1 = graphs[1].copy()
g_list = [g.copy() for g in graphs[0]] g_list = [g.copy() for g in graphs[0]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time return kernel_list, self._run_time
else: else:
raise Exception('Cannot detect graphs.') raise Exception('Cannot detect graphs.')
@@ -99,7 +99,7 @@ class GraphKernel(object):
return dis_mat, dis_max, dis_min, dis_mean return dis_mat, dis_max, dis_min, dis_mean
def __compute_gram_matrix(self):
def _compute_gram_matrix(self):
start_time = time.time() start_time = time.time()
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -125,7 +125,7 @@ class GraphKernel(object):
pass pass
def __compute_kernel_list(self, g1, g_list):
def _compute_kernel_list(self, g1, g_list):
start_time = time.time() start_time = time.time()
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -151,7 +151,7 @@ class GraphKernel(object):
pass pass
def __compute_single_kernel(self, g1, g2):
def _compute_single_kernel(self, g1, g2):
start_time = time.time() start_time = time.time()
kernel = self._compute_single_kernel_series(g1, g2) kernel = self._compute_single_kernel_series(g1, g2)


+ 45
- 45
gklearn/kernels/marginalized.py View File

@@ -33,25 +33,25 @@ class Marginalized(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__p_quit = kwargs.get('p_quit', 0.5)
self.__n_iteration = kwargs.get('n_iteration', 10)
self.__remove_totters = kwargs.get('remove_totters', False)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__n_iteration = int(self.__n_iteration)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._p_quit = kwargs.get('p_quit', 0.5)
self._n_iteration = kwargs.get('n_iteration', 10)
self._remove_totters = kwargs.get('remove_totters', False)
self._ds_infos = kwargs.get('ds_infos', {})
self._n_iteration = int(self._n_iteration)




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
# @todo: this may not work. # @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -63,7 +63,7 @@ class Marginalized(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j])
kernel = self._kernel_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered? gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -71,9 +71,9 @@ class Marginalized(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = range(0, len(self._graphs)) itr = range(0, len(self._graphs))
if len(self._graphs) < 100 * self._n_jobs: if len(self._graphs) < 100 * self._n_jobs:
@@ -105,16 +105,16 @@ class Marginalized(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
# @todo: this may not work. # @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -123,17 +123,17 @@ class Marginalized(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(g1, g_list[i])
kernel = self._kernel_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = range(0, len(g_list)) itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs: if len(g_list) < 100 * self._n_jobs:
@@ -171,19 +171,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_g1, G_g_list[itr])
return itr, self._kernel_do(G_g1, G_g_list[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels)
kernel = self.__kernel_do(g1, g2)
self._add_dummy_labels([g1] + [g2])
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self._node_labels, self._edge_labels)
kernel = self._kernel_do(g1, g2)
return kernel return kernel
def __kernel_do(self, g1, g2):
def _kernel_do(self, g1, g2):
"""Compute marginalized graph kernel between 2 graphs. """Compute marginalized graph kernel between 2 graphs.
Parameters Parameters
@@ -205,7 +205,7 @@ class Marginalized(GraphKernel):
p_init_G1 = 1 / num_nodes_G1 p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2 p_init_G2 = 1 / num_nodes_G2
q = self.__p_quit * self.__p_quit
q = self._p_quit * self._p_quit
r1 = q r1 = q
# # initial R_inf # # initial R_inf
@@ -260,36 +260,36 @@ class Marginalized(GraphKernel):
if len(g2[node2]) > 0: if len(g2[node2]) > 0:
R_inf[(node1, node2)] = r1 R_inf[(node1, node2)] = r1
else: else:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else: else:
if len(g2[node2]) > 0: if len(g2[node2]) > 0:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else: else:
R_inf[(node1, node2)] = 1 R_inf[(node1, node2)] = 1
# compute all transition probability first. # compute all transition probability first.
t_dict = {} t_dict = {}
if self.__n_iteration > 1:
if self._n_iteration > 1:
for node1 in g1.nodes(): for node1 in g1.nodes():
neighbor_n1 = g1[node1] neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks # the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent # generating step (uniform distribution over the vertices adjacent
# to the current vertex) # to the current vertex)
if len(neighbor_n1) > 0: if len(neighbor_n1) > 0:
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1)
p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1)
for node2 in g2.nodes(): for node2 in g2.nodes():
neighbor_n2 = g2[node2] neighbor_n2 = g2[node2]
if len(neighbor_n2) > 0: if len(neighbor_n2) > 0:
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2)
p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1: for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2: for neighbor2 in neighbor_n2:
t_dict[(node1, node2, neighbor1, neighbor2)] = \ t_dict[(node1, node2, neighbor1, neighbor2)] = \
p_trans_n1 * p_trans_n2 * \ p_trans_n1 * p_trans_n2 * \
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels))
# Compute R_inf with a simple interative method # Compute R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1):
for i in range(2, self._n_iteration + 1):
R_inf_old = R_inf.copy() R_inf_old = R_inf.copy()
# Compute R_inf for each pair of nodes # Compute R_inf for each pair of nodes
@@ -311,7 +311,7 @@ class Marginalized(GraphKernel):
# add elements of R_inf up and compute kernel. # add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items(): for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels))
kernel += s * value # ref [1] equation (6) kernel += s * value # ref [1] equation (6)
return kernel return kernel
@@ -320,19 +320,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j])
return i, j, self._kernel_do(G_gn[i], G_gn[j])


def _wrapper_untotter(self, i): def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work.
return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work.
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 80
- 80
gklearn/kernels/path_up_to_h.py View File

@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__depth = int(kwargs.get('depth', 10))
self.__k_func = kwargs.get('k_func', 'MinMax')
self.__compute_method = kwargs.get('compute_method', 'trie')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._depth = int(kwargs.get('depth', 10))
self._k_func = kwargs.get('k_func', 'MinMax')
self._compute_method = kwargs.get('compute_method', 'trie')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


if self.__compute_method == 'trie':
all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
if self._compute_method == 'trie':
all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel: for i, j in iterator_kernel:
kernel = self.__kernel_do_trie(all_paths[i], all_paths[j])
kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
else: else:
all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel: for i, j in iterator_kernel:
kernel = self.__kernel_do_naive(all_paths[i], all_paths[j])
kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
chunksize = 100 chunksize = 100
all_paths = [[] for _ in range(len(self._graphs))] all_paths = [[] for _ in range(len(self._graphs))]
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
get_ps_fun = self._wrapper_find_all_path_as_trie get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else: else:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
def init_worker(trie_toshare): def init_worker(trie_toshare):
global G_trie global G_trie
G_trie = trie_toshare G_trie = trie_toshare
do_fun = self._wrapper_kernel_do_trie do_fun = self._wrapper_kernel_do_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
def init_worker(plist_toshare): def init_worker(plist_toshare):
global G_plist global G_plist
G_plist = plist_toshare G_plist = plist_toshare
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def init_worker(plist_toshare): def init_worker(plist_toshare):
global G_plist global G_plist
G_plist = plist_toshare G_plist = plist_toshare
do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this?
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)


if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps]
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
for i in iterator_kernel: for i in iterator_kernel:
kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i])
kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps]
paths_g1 = self._find_all_paths_until_length(g1)
paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
for i in iterator_kernel: for i in iterator_kernel:
kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i])
kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
chunksize = 100 chunksize = 100
paths_g_list = [[] for _ in range(len(g_list))] paths_g_list = [[] for _ in range(len(g_list))]
if self.__compute_method == 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_path_as_trie(g1)
if self._compute_method == 'trie' and self._k_func is not None:
paths_g1 = self._find_all_path_as_trie(g1)
get_ps_fun = self._wrapper_find_all_path_as_trie get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_paths_until_length(g1)
elif self._compute_method != 'trie' and self._k_func is not None:
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
if self.__compute_method == 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_trie(G_p1, G_plist[itr])
elif self.__compute_method != 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_naive(G_p1, G_plist[itr])
if self._compute_method == 'trie' and self._k_func is not None:
return itr, self._kernel_do_trie(G_p1, G_plist[itr])
elif self._compute_method != 'trie' and self._k_func is not None:
return itr, self._kernel_do_naive(G_p1, G_plist[itr])
else: else:
return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr])
return itr, self._kernel_do_kernelless(G_p1, G_plist[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g2 = self.__find_all_path_as_trie(g2)
kernel = self.__kernel_do_trie(paths_g1, paths_g2)
self._add_dummy_labels([g1] + [g2])
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g2 = self._find_all_path_as_trie(g2)
kernel = self._kernel_do_trie(paths_g1, paths_g2)
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g2 = self.__find_all_paths_until_length(g2)
kernel = self.__kernel_do_naive(paths_g1, paths_g2)
paths_g1 = self._find_all_paths_until_length(g1)
paths_g2 = self._find_all_paths_until_length(g2)
kernel = self._kernel_do_naive(paths_g1, paths_g2)
return kernel return kernel


def __kernel_do_trie(self, trie1, trie2):
def _kernel_do_trie(self, trie1, trie2):
"""Compute path graph kernels up to depth d between 2 graphs using trie. """Compute path graph kernels up to depth d between 2 graphs using trie.
Parameters Parameters
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel : float kernel : float
Path kernel up to h between 2 graphs. Path kernel up to h between 2 graphs.
""" """
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
# traverse all paths in graph1 and search them in graph2. Deep-first # traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied. # search is applied.
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# print(setlist) # print(setlist)
kernel = setlist[0] / setlist[1] kernel = setlist[0] / setlist[1]
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
# traverse all paths in graph1 and search them in graph2. Deep-first # traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied. # search is applied.
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_trie(self, itr): def _wrapper_kernel_do_trie(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j])
return i, j, self._kernel_do_trie(G_trie[i], G_trie[j])
def __kernel_do_naive(self, paths1, paths2):
def _kernel_do_naive(self, paths1, paths2):
"""Compute path graph kernels up to depth d between 2 graphs naively. """Compute path graph kernels up to depth d between 2 graphs naively.
Parameters Parameters
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
""" """
all_paths = list(set(paths1 + paths2)) all_paths = list(set(paths1 + paths2))
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
length_union = len(set(paths1 + paths2)) length_union = len(set(paths1 + paths2))
kernel = (len(set(paths1)) + len(set(paths2)) - kernel = (len(set(paths1)) + len(set(paths2)) -
length_union) / length_union length_union) / length_union
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# kernel_uv = np.dot(vector1, vector2) # kernel_uv = np.dot(vector1, vector2)
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
path_count1 = Counter(paths1) path_count1 = Counter(paths1)
path_count2 = Counter(paths2) path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel = np.sum(np.minimum(vector1, vector2)) / \ kernel = np.sum(np.minimum(vector1, vector2)) / \
np.sum(np.maximum(vector1, vector2)) np.sum(np.maximum(vector1, vector2))
elif self.__k_func is None: # no sub-kernel used; compare paths directly.
elif self._k_func is None: # no sub-kernel used; compare paths directly.
path_count1 = Counter(paths1) path_count1 = Counter(paths1)
path_count2 = Counter(paths2) path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_naive(self, itr): def _wrapper_kernel_do_naive(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j])
return i, j, self._kernel_do_naive(G_plist[i], G_plist[j])
def __find_all_path_as_trie(self, G):
def _find_all_path_as_trie(self, G):
# all_path = find_all_paths_until_length(G, length, ds_attrs, # all_path = find_all_paths_until_length(G, length, ds_attrs,
# node_label=node_label, # node_label=node_label,
# edge_label=edge_label) # edge_label=edge_label)
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# them. Deep-first search is applied. Notice the reverse of each path is # them. Deep-first search is applied. Notice the reverse of each path is
# also stored to the trie. # also stored to the trie.
def traverseGraph(root, ptrie, G, pcurrent=[]): def traverseGraph(root, ptrie, G, pcurrent=[]):
if len(pcurrent) < self.__depth + 1:
if len(pcurrent) < self._depth + 1:
for neighbor in G[root]: for neighbor in G[root]:
if neighbor not in pcurrent: if neighbor not in pcurrent:
pcurrent.append(neighbor) pcurrent.append(neighbor)
plstr = self.__paths2labelseqs([pcurrent], G)
plstr = self._paths2labelseqs([pcurrent], G)
ptrie.insertWord(plstr[0]) ptrie.insertWord(plstr[0])
traverseGraph(neighbor, ptrie, G, pcurrent) traverseGraph(neighbor, ptrie, G, pcurrent)
del pcurrent[-1] del pcurrent[-1]
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
ptrie = Trie() ptrie = Trie()
path_l = [[n] for n in G.nodes] # paths of length l path_l = [[n] for n in G.nodes] # paths of length l
path_l_str = self.__paths2labelseqs(path_l, G)
path_l_str = self._paths2labelseqs(path_l, G)
for p in path_l_str: for p in path_l_str:
ptrie.insertWord(p) ptrie.insertWord(p)
for n in G.nodes: for n in G.nodes:
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_find_all_path_as_trie(self, itr_item): def _wrapper_find_all_path_as_trie(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__find_all_path_as_trie(g)
return i, self._find_all_path_as_trie(g)
# @todo: (can be removed maybe) this method find paths repetively, it could be faster. # @todo: (can be removed maybe) this method find paths repetively, it could be faster.
def __find_all_paths_until_length(self, G, tolabelseqs=True):
def _find_all_paths_until_length(self, G, tolabelseqs=True):
"""Find all paths no longer than a certain maximum length in a graph. A """Find all paths no longer than a certain maximum length in a graph. A
recursive depth first search is applied. recursive depth first search is applied.
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
""" """
# path_l = [tuple([n]) for n in G.nodes] # paths of length l # path_l = [tuple([n]) for n in G.nodes] # paths of length l
# all_paths = path_l[:] # all_paths = path_l[:]
# for l in range(1, self.__depth + 1):
# for l in range(1, self._depth + 1):
# path_l_new = [] # path_l_new = []
# for path in path_l: # for path in path_l:
# for neighbor in G[path[-1]]: # for neighbor in G[path[-1]]:
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
path_l = [[n] for n in G.nodes] # paths of length l path_l = [[n] for n in G.nodes] # paths of length l
all_paths = [p.copy() for p in path_l] all_paths = [p.copy() for p in path_l]
for l in range(1, self.__depth + 1):
for l in range(1, self._depth + 1):
path_lplus1 = [] path_lplus1 = []
for path in path_l: for path in path_l:
for neighbor in G[path[-1]]: for neighbor in G[path[-1]]:
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
all_paths += path_lplus1 all_paths += path_lplus1
path_l = [p.copy() for p in path_lplus1] path_l = [p.copy() for p in path_lplus1]
# for i in range(0, self.__depth + 1):
# for i in range(0, self._depth + 1):
# new_paths = find_all_paths(G, i) # new_paths = find_all_paths(G, i)
# if new_paths == []: # if new_paths == []:
# break # break
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# consider labels # consider labels
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
# print() # print()
return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
def __paths2labelseqs(self, plist, G):
if len(self.__node_labels) > 0:
if len(self.__edge_labels) > 0:
def _paths2labelseqs(self, plist, G):
if len(self._node_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = [] path_strs = []
for path in plist: for path in plist:
pths_tmp = [] pths_tmp = []
for idx, node in enumerate(path[:-1]): for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
else: else:
path_strs = [] path_strs = []
for path in plist: for path in plist:
pths_tmp = [] pths_tmp = []
for node in path: for node in path:
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
return path_strs return path_strs
else: else:
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = [] path_strs = []
for path in plist: for path in plist:
if len(path) == 1: if len(path) == 1:
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
pths_tmp = [] pths_tmp = []
for idx, node in enumerate(path[:-1]): for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
return path_strs return path_strs
else: else:
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# return [tuple([len(path)]) for path in all_paths] # return [tuple([len(path)]) for path in all_paths]
def __add_dummy_labels(self, Gn):
if self.__k_func is not None:
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if self._k_func is not None:
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 4
- 4
gklearn/kernels/random_walk_meta.py View File

@@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel):
def _add_dummy_labels(self, Gn): def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 33
- 33
gklearn/kernels/shortest_path.py View File

@@ -26,11 +26,11 @@ class ShortestPath(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -39,7 +39,7 @@ class ShortestPath(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -51,7 +51,7 @@ class ShortestPath(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__sp_do(self._graphs[i], self._graphs[j])
kernel = self._sp_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -106,7 +106,7 @@ class ShortestPath(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__sp_do(g1, g_list[i])
kernel = self._sp_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
@@ -114,7 +114,7 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
get_sp_graphs_fun = self._wrapper_get_sp_graphs get_sp_graphs_fun = self._wrapper_get_sp_graphs
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -151,55 +151,55 @@ class ShortestPath(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__sp_do(G_g1, G_gl[itr])
return itr, self._sp_do(G_g1, G_gl[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g2 = getSPGraph(g2, edge_weight=self.__edge_weight)
kernel = self.__sp_do(g1, g2)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
g2 = getSPGraph(g2, edge_weight=self._edge_weight)
kernel = self._sp_do(g1, g2)
return kernel return kernel
def _wrapper_get_sp_graphs(self, itr_item): def _wrapper_get_sp_graphs(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, getSPGraph(g, edge_weight=self.__edge_weight)
return i, getSPGraph(g, edge_weight=self._edge_weight)
def __sp_do(self, g1, g2):
def _sp_do(self, g1, g2):
kernel = 0 kernel = 0
# compute shortest path matrices first, method borrowed from FCSP. # compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
if len(self._node_labels) > 0:
# node symb and non-synb labeled # node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
if len(self._node_attrs) > 0:
kn = self._node_kernels['mix']
for n1, n2 in product( for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)): g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled # node symb labeled
else: else:
kn = self.__node_kernels['symb']
kn = self._node_kernels['symb']
for n1 in g1.nodes(data=True): for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True): for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else: else:
# node non-synb labeled # node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
if len(self._node_attrs) > 0:
kn = self._node_kernels['nsymb']
for n1 in g1.nodes(data=True): for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True): for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled # node unlabeled
else: else:
@@ -210,7 +210,7 @@ class ShortestPath(GraphKernel):
return kernel return kernel
# compute graph kernels # compute graph kernels
if self.__ds_infos['directed']:
if self._ds_infos['directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']: if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])]
@@ -261,4 +261,4 @@ class ShortestPath(GraphKernel):
def _wrapper_sp_do(self, itr): def _wrapper_sp_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__sp_do(G_gs[i], G_gs[j])
return i, j, self._sp_do(G_gs[i], G_gs[j])

+ 7
- 7
gklearn/kernels/spectral_decomposition.py View File

@@ -66,7 +66,7 @@ class SpectralDecomposition(RandomWalkMeta):
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -162,7 +162,7 @@ class SpectralDecomposition(RandomWalkMeta):
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
kernel_list[i] = kernel kernel_list[i] = kernel
else: # @todo else: # @todo
@@ -190,9 +190,9 @@ class SpectralDecomposition(RandomWalkMeta):
P_list = [] P_list = []
D_list = [] D_list = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout)
iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
else: else:
iterator = range(len(g_list))
iterator = g_list
for G in iterator: for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix. # A actually is the transpose of the adjacency matrix.
@@ -252,7 +252,7 @@ class SpectralDecomposition(RandomWalkMeta):
if self._p is None: # p is uniform distribution as default. if self._p is None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1) q_T1 = 1 / nx.number_of_nodes(g1)
q_T2 = 1 / nx.number_of_nodes(g2) q_T2 = 1 / nx.number_of_nodes(g2)
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel)
kernel = self._kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel)
else: # @todo else: # @todo
pass pass
else: # @todo else: # @todo
@@ -261,7 +261,7 @@ class SpectralDecomposition(RandomWalkMeta):
return kernel return kernel
def __kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
def _kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):
# use uniform distribution if there is no prior knowledge. # use uniform distribution if there is no prior knowledge.
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense()
# @todo: this is not needed when p = q (kr = kl.T) for undirected graphs. # @todo: this is not needed when p = q (kr = kl.T) for undirected graphs.
@@ -280,4 +280,4 @@ class SpectralDecomposition(RandomWalkMeta):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel)
return i, j, self._kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel)

+ 56
- 56
gklearn/kernels/structural_sp.py View File

@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_attrs = kwargs.get('edge_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__edge_kernels = kwargs.get('edge_kernels', None)
self.__compute_method = kwargs.get('compute_method', 'naive')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._compute_method = kwargs.get('compute_method', 'naive')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -60,14 +60,14 @@ class StructuralSP(GraphKernel):
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
else: else:
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
# if(kernel > 1): # if(kernel > 1):
# print("error here ") # print("error here ")
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of self._graphs # get shortest path graphs of self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel):
global G_spl, G_gs global G_spl, G_gs
G_spl = spl_toshare G_spl = spl_toshare
G_gs = gs_toshare G_gs = gs_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_ssp_do_naive do_fun = self._wrapper_ssp_do_naive
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
@@ -119,18 +119,18 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [] splist = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -138,13 +138,13 @@ class StructuralSP(GraphKernel):
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i in iterator: for i in iterator:
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
else: else:
for i in iterator: for i in iterator:
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [None] * len(g_list) splist = [None] * len(g_list)
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of g_list # get shortest path graphs of g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel):
G_spl = spl_toshare G_spl = spl_toshare
G_g1 = g1_toshare G_g1 = g1_toshare
G_gl = gl_toshare G_gl = gl_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_kernel_list_do do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign): def func_assign(result, var_to_assign):
@@ -199,36 +199,36 @@ class StructuralSP(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])


def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed'])
if self.__compute_method == 'trie':
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2)
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed'])
if self._compute_method == 'trie':
kernel = self._ssp_do_trie(g1, g2, sp1, sp2)
else: else:
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2)
kernel = self._ssp_do_naive(g1, g2, sp1, sp2)
return kernel return kernel
def _wrapper_get_sps_naive(self, itr_item): def _wrapper_get_sps_naive(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])
def __ssp_do_naive(self, g1, g2, spl1, spl2):
def _ssp_do_naive(self, g1, g2, spl1, spl2):
kernel = 0 kernel = 0
# First, compute shortest path matrices, method borrowed from FCSP. # First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = self.__get_all_node_kernels(g1, g2)
vk_dict = self._get_all_node_kernels(g1, g2)
# Then, compute kernels between all pairs of edges, which is an idea of # Then, compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = self.__get_all_edge_kernels(g1, g2)
ek_dict = self._get_all_edge_kernels(g1, g2)
# compute graph kernels # compute graph kernels
if vk_dict: if vk_dict:
@@ -314,27 +314,27 @@ class StructuralSP(GraphKernel):
def _wrapper_ssp_do_naive(self, itr): def _wrapper_ssp_do_naive(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
def __get_all_node_kernels(self, g1, g2):
def _get_all_node_kernels(self, g1, g2):
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2):
def _get_all_edge_kernels(self, g1, g2):
# compute kernels between all pairs of edges, which is an idea of # compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels ek_dict = {} # dict of edge kernels
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled # edge symb and non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['mix']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -342,11 +342,11 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled # edge symb labeled
else: else:
ke = self.__edge_kernels['symb']
ke = self._edge_kernels['symb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
ek_temp = ke(e1_labels, e2_labels) ek_temp = ke(e1_labels, e2_labels)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -354,12 +354,12 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else: else:
# edge non-synb labeled # edge non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['nsymb']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_attrs, e2_attrs) ek_temp = ke(e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp


+ 85
- 85
gklearn/kernels/treelet.py View File

@@ -28,16 +28,16 @@ class Treelet(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__sub_kernel = kwargs.get('sub_kernel', None)
self.__ds_infos = kwargs.get('ds_infos', {})
if self.__sub_kernel is None:
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._sub_kernel = kwargs.get('sub_kernel', None)
self._ds_infos = kwargs.get('ds_infos', {})
if self._sub_kernel is None:
raise Exception('Sub kernel not set.') raise Exception('Sub kernel not set.')




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
@@ -47,7 +47,7 @@ class Treelet(GraphKernel):
else: else:
iterator = self._graphs iterator = self._graphs
for g in iterator: for g in iterator:
canonkeys.append(self.__get_canonkeys(g))
canonkeys.append(self._get_canonkeys(g))
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -59,7 +59,7 @@ class Treelet(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(canonkeys[i], canonkeys[j])
kernel = self._kernel_do(canonkeys[i], canonkeys[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered? gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -67,7 +67,7 @@ class Treelet(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
@@ -103,18 +103,18 @@ class Treelet(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [] canonkeys_list = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
for g in iterator: for g in iterator:
canonkeys_list.append(self.__get_canonkeys(g))
canonkeys_list.append(self._get_canonkeys(g))
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -123,18 +123,18 @@ class Treelet(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i])
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))] canonkeys_list = [[] for _ in range(len(g_list))]
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -173,18 +173,18 @@ class Treelet(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr])
return itr, self._kernel_do(G_ck_1, G_ck_list[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_2 = self.__get_canonkeys(g2)
kernel = self.__kernel_do(canonkeys_1, canonkeys_2)
self._add_dummy_labels([g1] + [g2])
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_2 = self._get_canonkeys(g2)
kernel = self._kernel_do(canonkeys_1, canonkeys_2)
return kernel return kernel
def __kernel_do(self, canonkey1, canonkey2):
def _kernel_do(self, canonkey1, canonkey2):
"""Compute treelet graph kernel between 2 graphs. """Compute treelet graph kernel between 2 graphs.
Parameters Parameters
@@ -200,17 +200,17 @@ class Treelet(GraphKernel):
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
kernel = self.__sub_kernel(vector1, vector2)
kernel = self._sub_kernel(vector1, vector2)
return kernel return kernel
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j])
return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j])
def __get_canonkeys(self, G):
def _get_canonkeys(self, G):
"""Generate canonical keys of all treelets in a graph. """Generate canonical keys of all treelets in a graph.
Parameters Parameters
@@ -236,7 +236,7 @@ class Treelet(GraphKernel):
patterns['0'] = list(G.nodes()) patterns['0'] = list(G.nodes())
canonkey['0'] = nx.number_of_nodes(G) canonkey['0'] = nx.number_of_nodes(G)
for i in range(1, 6): # for i in range(1, 6): for i in range(1, 6): # for i in range(1, 6):
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed'])
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed'])
canonkey[str(i)] = len(patterns[str(i)]) canonkey[str(i)] = len(patterns[str(i)])
# n-star patterns # n-star patterns
@@ -330,11 +330,11 @@ class Treelet(GraphKernel):
### pattern obtained in the structural analysis section above, which is a ### pattern obtained in the structural analysis section above, which is a
### string corresponding to a unique treelet. A dictionary is built to keep ### string corresponding to a unique treelet. A dictionary is built to keep
### track of the amount of every treelet. ### track of the amount of every treelet.
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0:
if len(self._node_labels) > 0 or len(self._edge_labels) > 0:
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet.
# linear patterns # linear patterns
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels))
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels))
for key in canonkey_t: for key in canonkey_t:
canonkey_l[('0', key)] = canonkey_t[key] canonkey_l[('0', key)] = canonkey_t[key]
@@ -343,9 +343,9 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i)]: for pattern in patterns[str(i)]:
canonlist = [] canonlist = []
for idx, node in enumerate(pattern[:-1]): for idx, node in enumerate(pattern[:-1]):
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels))
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels))
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1]
treelet.append(tuple([str(i)] + canonkey_t)) treelet.append(tuple([str(i)] + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -356,13 +356,13 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i) + 'star']: for pattern in patterns[str(i) + 'star']:
canonlist = [] canonlist = []
for leaf in pattern[1:]: for leaf in pattern[1:]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + canonkey_t = tuple(['d' if i == 5 else str(i * 2)] +
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonlist) + canonlist)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -372,17 +372,17 @@ class Treelet(GraphKernel):
for pattern in patterns['7']: for pattern in patterns['7']:
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['7'] canonkey_t = tuple(['7']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -391,38 +391,38 @@ class Treelet(GraphKernel):
for pattern in patterns['11']: for pattern in patterns['11']:
canonlist = [] canonlist = []
for leaf in pattern[1:4]: for leaf in pattern[1:4]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['b'] canonkey_t = tuple(['b']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
# pattern 10 # pattern 10
treelet = [] treelet = []
for pattern in patterns['10']: for pattern in patterns['10']:
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonkey0 = list(chain.from_iterable(canonlist)) canonkey0 = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['a'] canonkey_t = tuple(['a']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonkey4 + canonkey0) + canonkey4 + canonkey0)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -432,15 +432,15 @@ class Treelet(GraphKernel):
for pattern in patterns['12']: for pattern in patterns['12']:
canonlist0 = [] canonlist0 = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist0.append(tuple((nlabels, elabels))) canonlist0.append(tuple((nlabels, elabels)))
canonlist0.sort() canonlist0.sort()
canonlist0 = list(chain.from_iterable(canonlist0)) canonlist0 = list(chain.from_iterable(canonlist0))
canonlist3 = [] canonlist3 = []
for leaf in pattern[4:6]: for leaf in pattern[4:6]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels)
canonlist3.append(tuple((nlabels, elabels))) canonlist3.append(tuple((nlabels, elabels)))
canonlist3.sort() canonlist3.sort()
canonlist3 = list(chain.from_iterable(canonlist3)) canonlist3 = list(chain.from_iterable(canonlist3))
@@ -448,14 +448,14 @@ class Treelet(GraphKernel):
# 2 possible key can be generated from 2 nodes with extended label 3, # 2 possible key can be generated from 2 nodes with extended label 3,
# select the one with lower lexicographic order. # select the one with lower lexicographic order.
canonkey_t1 = tuple(['c'] canonkey_t1 = tuple(['c']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ canonlist3) + canonlist3)
canonkey_t2 = tuple(['c'] canonkey_t2 = tuple(['c']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonlist0) + canonlist0)
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -463,24 +463,24 @@ class Treelet(GraphKernel):
# pattern 9 # pattern 9
treelet = [] treelet = []
for pattern in patterns['9']: for pattern in patterns['9']:
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
if prekey2 + canonkey2 < prekey3 + canonkey3: if prekey2 + canonkey2 < prekey3 + canonkey3:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey2 + prekey3 + canonkey2 + canonkey3 + prekey2 + prekey3 + canonkey2 + canonkey3
else: else:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey3 + prekey2 + canonkey3 + canonkey2 + prekey3 + prekey2 + canonkey3 + canonkey2
treelet.append(tuple(['9'] treelet.append(tuple(['9']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonkey_t)) + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -492,15 +492,15 @@ class Treelet(GraphKernel):
def _wrapper_get_canonkeys(self, itr_item): def _wrapper_get_canonkeys(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__get_canonkeys(g)
return i, self._get_canonkeys(g)
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 41
- 41
gklearn/kernels/weisfeiler_lehman.py View File

@@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__height = int(kwargs.get('height', 0))
self.__base_kernel = kwargs.get('base_kernel', 'subtree')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._height = int(kwargs.get('height', 0))
self._base_kernel = kwargs.get('base_kernel', 'subtree')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings import warnings
warnings.warn('A part of the computation is parallelized.') warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(self._graphs)
self._add_dummy_node_labels(self._graphs)
# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(self._graphs)
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(self._graphs)
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(self._graphs)
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(self._graphs)
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(self._graphs)
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(self._graphs)
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do(self._graphs)
gram_matrix = self._user_kernel_do(self._graphs)
return gram_matrix return gram_matrix
@@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings import warnings
warnings.warn('A part of the computation is parallelized.') warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(g_list + [g1])
self._add_dummy_node_labels(g_list + [g1])
# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(g_list + [g1])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(g_list + [g1])
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(g_list + [g1])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(g_list + [g1])
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(g_list + [g1])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(g_list + [g1])
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do(g_list + [g1])
gram_matrix = self._user_kernel_do(g_list + [g1])
return list(gram_matrix[-1][0:-1]) return list(gram_matrix[-1][0:-1])
@@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
self.__add_dummy_node_labels([g1] + [g2])
self._add_dummy_node_labels([g1] + [g2])


# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do([g1] + [g2])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do([g1] + [g2])
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do([g1] + [g2])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do([g1] + [g2])
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do([g1] + [g2])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do([g1] + [g2])
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do([g1] + [g2])
gram_matrix = self._user_kernel_do([g1] + [g2])
return gram_matrix[0][1] return gram_matrix[0][1]
def __subtree_kernel_do(self, Gn):
def _subtree_kernel_do(self, Gn):
"""Compute Weisfeiler-Lehman kernels between graphs. """Compute Weisfeiler-Lehman kernels between graphs.
Parameters Parameters
@@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for G in Gn: for G in Gn:
# set all labels into a tuple. # set all labels into a tuple.
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels)
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
# get the set of original labels # get the set of original labels
labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
# number of occurence of each label in G # number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))
# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn)
# iterate each height # iterate each height
for h in range(1, self.__height + 1):
for h in range(1, self._height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
@@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
all_num_of_each_label.append(dict(Counter(labels_comp))) all_num_of_each_label.append(dict(Counter(labels_comp)))
# Compute subtree kernel with h iterations and add it to the final kernel # Compute subtree kernel with h iterations and add it to the final kernel
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn)
return gram_matrix return gram_matrix


def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn):
def _compute_gram_itr(self, gram_matrix, all_num_of_each_label, Gn):
"""Compute Gram matrix using the base kernel. """Compute Gram matrix using the base kernel.
""" """
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
elif self._parallel is None: elif self._parallel is None:
for i in range(len(gram_matrix)): for i in range(len(gram_matrix)):
for j in range(i, len(gram_matrix)): for j in range(i, len(gram_matrix)):
gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i],
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
all_num_of_each_label[j], gram_matrix[i][j]) all_num_of_each_label[j], gram_matrix[i][j])
gram_matrix[j][i] = gram_matrix[i][j] gram_matrix[j][i] = gram_matrix[i][j]
def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
"""Compute the subtree kernel. """Compute the subtree kernel.
""" """
labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
@@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
def _wl_spkernel_do(Gn, node_label, edge_label, height): def _wl_spkernel_do(Gn, node_label, edge_label, height):
@@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
return gram_matrix return gram_matrix
def __add_dummy_node_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_node_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
self._node_labels = [SpecialLabel.DUMMY]
class WLSubtree(WeisfeilerLehman): class WLSubtree(WeisfeilerLehman):


+ 2
- 2
gklearn/preimage/generate_random_preimages_by_class.py View File

@@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save)


dis_k_dataset_list = [] dis_k_dataset_list = []
@@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
print('\ncomplete.\n') print('\ncomplete.\n')


def __init_output_file_preimage(ds_name, gkernel, dir_output):
def _init_output_file_preimage(ds_name, gkernel, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 25
- 25
gklearn/preimage/kernel_knn_cv.py View File

@@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else: else:
fn_output_detail, fn_output_summary = None, None fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori. # 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...') print('2. computing/loading Gram matrix...')
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
# 3. perform k-nn CV. # 3. perform k-nn CV.
print('3. performing k-nn CV...') print('3. performing k-nn CV...')
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random':
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'best-dataset': elif train_examples == 'best-dataset':
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'trainset': elif train_examples == 'trainset':
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)


print('\ncomplete.\n') print('\ncomplete.\n')
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs Gn = dataset_all.graphs
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']


# get shuffles. # get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = [[], [], []] accuracies = [[], [], []]
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
mge_options['update_order'] = True mge_options['update_order'] = True
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0 mpg_options['runtime_precompute_gm'] = 0
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
mge_options['update_order'] = False mge_options['update_order'] = False
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0 mpg_options['runtime_precompute_gm'] = 0
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
_, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
medians[0].append(set_median) medians[0].append(set_median)
medians[1].append(gen_median) medians[1].append(gen_median)
medians[2].append(gen_median_uo) medians[2].append(gen_median_uo)
@@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
# compute dis_mat between medians. # compute dis_mat between medians.
dataset = dataset_all.copy() dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in G_app], targets=None) dataset.load_graphs([g.copy() for g in G_app], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix. # compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_medians = [] kernels_to_medians = []
for g in G_app: for g in G_app:
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
f_summary.close() f_summary.close()
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs Gn = dataset_all.graphs
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']


# get shuffles. # get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = [] accuracies = []
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
# compute dis_mat between medians. # compute dis_mat between medians.
dataset = dataset_all.copy() dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in best_graphs], targets=None) dataset.load_graphs([g.copy() for g in best_graphs], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix. # compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_best_graphs = [] kernels_to_best_graphs = []
for g in best_graphs: for g in best_graphs:
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
f_summary.close() f_summary.close()
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']
@@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) dis_mat, _, _, _ = compute_distance_matrix(gram_matrix)


# get shuffles. # get shuffles.
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size)
accuracies = [] accuracies = []
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
f_summary.close() f_summary.close()
def __get_shuffles(y_all, n_splits, test_size):
def _get_shuffles(y_all, n_splits, test_size):
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
train_indices = [[] for _ in range(n_splits)] train_indices = [[] for _ in range(n_splits)]
test_indices = [[] for _ in range(n_splits)] test_indices = [[] for _ in range(n_splits)]
@@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size):
return train_indices, test_indices, train_nums, keys return train_indices, test_indices, train_nums, keys
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
mpg = MedianPreimageGenerator() mpg = MedianPreimageGenerator()
mpg.dataset = dataset.copy() mpg.dataset = dataset.copy()
mpg.set_options(**mpg_options.copy()) mpg.set_options(**mpg_options.copy())
@@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option
return mpg.set_median, mpg.gen_median return mpg.set_median, mpg.gen_median




def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
if load_gm == 'auto': if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
time_precompute_gm = float(gmfile['run_time']) time_precompute_gm = float(gmfile['run_time'])
else: else:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
elif not load_gm: elif not load_gm:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
else: else:
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
@@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
return gram_matrix_unnorm, time_precompute_gm return gram_matrix_unnorm, time_precompute_gm




def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time return gram_matrix_unnorm, run_time
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv'


+ 284
- 283
gklearn/preimage/median_preimage_generator.py
File diff suppressed because it is too large
View File


+ 221
- 221
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
### arguments to set. ### arguments to set.
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__ds_name = None
self._mge = None
self._ged_options = {}
self._mge_options = {}
# self._fit_method = 'k-graphs'
self._init_method = 'random'
self._init_ecc = None
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._ds_name = None
# for cml. # for cml.
self.__time_limit_in_sec = 0
self.__max_itrs = 100
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__allow_zeros = True
# self.__triangle_rule = True
self._time_limit_in_sec = 0
self._max_itrs = 100
self._max_itrs_without_update = 3
self._epsilon_residual = 0.01
self._epsilon_ec = 0.1
self._allow_zeros = True
# self._triangle_rule = True
### values to compute. ### values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__set_median = None
self.__gen_median = None
self.__best_from_dataset = None
self.__sod_set_median = None
self.__sod_gen_median = None
self.__k_dis_set_median = None
self.__k_dis_gen_median = None
self.__k_dis_dataset = None
self.__node_label_costs = None
self.__edge_label_costs = None
self._runtime_optimize_ec = None
self._runtime_generate_preimage = None
self._runtime_total = None
self._set_median = None
self._gen_median = None
self._best_from_dataset = None
self._sod_set_median = None
self._sod_gen_median = None
self._k_dis_set_median = None
self._k_dis_gen_median = None
self._k_dis_dataset = None
self._node_label_costs = None
self._edge_label_costs = None
# for cml. # for cml.
self.__itrs = 0
self.__converged = False
self.__num_updates_ecs = 0
self._itrs = 0
self._converged = False
self._num_updates_ecs = 0
### values that can be set or to be computed. ### values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._edit_cost_constants = []
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None


def set_options(self, **kwargs): def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {}) self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None) self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__ds_name = kwargs.get('ds_name', None)
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 100)
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
self._ged_options = kwargs.get('ged_options', {})
self._mge_options = kwargs.get('mge_options', {})
# self._fit_method = kwargs.get('fit_method', 'k-graphs')
self._init_method = kwargs.get('init_method', 'random')
self._init_ecc = kwargs.get('init_ecc', None)
self._edit_cost_constants = kwargs.get('edit_cost_constants', [])
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._ds_name = kwargs.get('ds_name', None)
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 100)
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._allow_zeros = kwargs.get('allow_zeros', True)
# self._triangle_rule = kwargs.get('triangle_rule', True)
def run(self): def run(self):
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
start = time.time() start = time.time()
# 1. precompute gram matrix. # 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time() end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else: else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else: else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset':
# start = time.time() # start = time.time()
# self.__runtime_precompute_gm = 0
# self._runtime_precompute_gm = 0
# end_precompute_gm = start # end_precompute_gm = start
# 2. optimize edit cost constants. # 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
self._optimize_edit_cost_vector()
end_optimize_ec = time.time() end_optimize_ec = time.time()
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm
# 3. compute set median and gen median using optimized edit costs. # 3. compute set median and gen median using optimized edit costs.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing set median and gen median using optimized edit costs...\n') print('\nstart computing set median and gen median using optimized edit costs...\n')
self.__gmg_bcu()
self._gmg_bcu()
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self.__runtime_total = end_generate_preimage - start
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self._runtime_total = end_generate_preimage - start
if self._verbose >= 2: if self._verbose >= 2:
print('medians computed.') print('medians computed.')
print('SOD of the set median: ', self.__sod_set_median)
print('SOD of the generalized median: ', self.__sod_gen_median)
print('SOD of the set median: ', self._sod_set_median)
print('SOD of the generalized median: ', self._sod_gen_median)
# 4. compute kernel distances to the true median. # 4. compute kernel distances to the true median.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing distances to true median....\n') print('\nstart computing distances to true median....\n')
self.__compute_distances_to_true_median()
self._compute_distances_to_true_median()


# 5. print out results. # 5. print out results.
if self._verbose: if self._verbose:
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('================================================================================') print('================================================================================')
print('Finished generation of preimages.') print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------')
print('The optimized edit costs:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
print('SOD of the generalized median:', self.__sod_gen_median)
print('Distance in kernel space for set median:', self.__k_dis_set_median)
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to optimize edit costs:', self.__runtime_optimize_ec)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating edit costs:', self.__num_updates_ecs)
print('Is optimization of edit costs converged:', self.__converged)
print('The optimized edit costs:', self._edit_cost_constants)
print('SOD of the set median:', self._sod_set_median)
print('SOD of the generalized median:', self._sod_gen_median)
print('Distance in kernel space for set median:', self._k_dis_set_median)
print('Distance in kernel space for generalized median:', self._k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to optimize edit costs:', self._runtime_optimize_ec)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating edit costs:', self._num_updates_ecs)
print('Is optimization of edit costs converged:', self._converged)
print('================================================================================') print('================================================================================')
print() print()




def get_results(self): def get_results(self):
results = {} results = {}
results['edit_cost_constants'] = self.__edit_cost_constants
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_optimize_ec'] = self.__runtime_optimize_ec
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['sod_set_median'] = self.__sod_set_median
results['sod_gen_median'] = self.__sod_gen_median
results['k_dis_set_median'] = self.__k_dis_set_median
results['k_dis_gen_median'] = self.__k_dis_gen_median
results['k_dis_dataset'] = self.__k_dis_dataset
results['itrs'] = self.__itrs
results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecs
results['edit_cost_constants'] = self._edit_cost_constants
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_optimize_ec'] = self._runtime_optimize_ec
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['sod_set_median'] = self._sod_set_median
results['sod_gen_median'] = self._sod_gen_median
results['k_dis_set_median'] = self._k_dis_set_median
results['k_dis_gen_median'] = self._k_dis_gen_median
results['k_dis_dataset'] = self._k_dis_dataset
results['itrs'] = self._itrs
results['converged'] = self._converged
results['num_updates_ecc'] = self._num_updates_ecs
results['mge'] = {} results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents()
return results return results


def __optimize_edit_cost_vector(self):
def _optimize_edit_cost_vector(self):
"""Learn edit cost vector. """Learn edit cost vector.
""" """
# Initialize label costs randomly. # Initialize label costs randomly.
if self.__init_method == 'random':
if self._init_method == 'random':
# Initialize label costs. # Initialize label costs.
self.__initialize_label_costs()
self._initialize_label_costs()
# Optimize edit cost matrices. # Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
self._optimize_ecm_by_kernel_distances()
# Initialize all label costs with the same value. # Initialize all label costs with the same value.
elif self.__init_method == 'uniform': # random
elif self._init_method == 'uniform': # random
pass pass
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'LETTER2':
elif self._fit_method == 'random': # random
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = random.sample(range(1, 1000), 3)
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'LETTER2':
random.seed(time.time()) random.seed(time.time())
self.__edit_cost_constants = random.sample(range(1, 1000), 5)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 5)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._dataset.node_attrs == []: if self._dataset.node_attrs == []:
self.__edit_cost_constants[2] = 0
self._edit_cost_constants[2] = 0
if self._dataset.edge_attrs == []: if self._dataset.edge_attrs == []:
self.__edit_cost_constants[5] = 0
self._edit_cost_constants[5] = 0
else: else:
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._verbose >= 2: if self._verbose >= 2:
print('edit cost constants used:', self.__edit_cost_constants)
elif self.__fit_method == 'expert': # expert
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
print('edit cost constants used:', self._edit_cost_constants)
elif self._fit_method == 'expert': # expert
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
else: else:
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1]
self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
else: else:
self.__edit_cost_constants = self.__init_ecc
elif self.__fit_method == 'k-graphs':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__init_ecc = [0, 0, 1, 1, 1, 0]
self._edit_cost_constants = self._init_ecc
elif self._fit_method == 'k-graphs':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._init_ecc = [0, 0, 1, 1, 1, 0]
if self._dataset.node_attrs == []: if self._dataset.node_attrs == []:
self.__init_ecc[2] = 0
self._init_ecc[2] = 0
if self._dataset.edge_attrs == []: if self._dataset.edge_attrs == []:
self.__init_ecc[5] = 0
self._init_ecc[5] = 0
else: else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimize on the k-graph subset. # optimize on the k-graph subset.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'whole-dataset':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
self._optimize_ecm_by_kernel_distances()
elif self._fit_method == 'whole-dataset':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
else: else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimizeon the whole set. # optimizeon the whole set.
self.__optimize_ecc_by_kernel_distances()
elif self.__fit_method == 'precomputed':
self._optimize_ecc_by_kernel_distances()
elif self._fit_method == 'precomputed':
pass pass
def __initialize_label_costs(self):
self.__initialize_node_label_costs()
self.__initialize_edge_label_costs()
def _initialize_label_costs(self):
self._initialize_node_label_costs()
self._initialize_edge_label_costs()
def __initialize_node_label_costs(self):
def _initialize_node_label_costs(self):
# Get list of node labels. # Get list of node labels.
nls = self._dataset.get_all_node_labels() nls = self._dataset.get_all_node_labels()
# Generate random costs. # Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__node_label_costs = rand_costs
self._node_label_costs = rand_costs




def __initialize_edge_label_costs(self):
def _initialize_edge_label_costs(self):
# Get list of edge labels. # Get list of edge labels.
els = self._dataset.get_all_edge_labels() els = self._dataset.get_all_edge_labels()
# Generate random costs. # Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__edge_label_costs = rand_costs
self._edge_label_costs = rand_costs
def __optimize_ecm_by_kernel_distances(self):
def _optimize_ecm_by_kernel_distances(self):
# compute distances in feature space. # compute distances in feature space.
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
dis_k_vec = [] dis_k_vec = []
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
dis_k_vec = np.array(dis_k_vec) dis_k_vec = np.array(dis_k_vec)
# Set GEDEnv options. # Set GEDEnv options.
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
# self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed.
# graphs = [self._clean_graph(g) for g in self._dataset.graphs]
# self._edit_cost_constants = self._init_ecc
options = self._ged_options.copy()
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed.
options['node_labels'] = self._dataset.node_labels options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels options['edge_labels'] = self._dataset.edge_labels
# options['node_attrs'] = self._dataset.node_attrs # options['node_attrs'] = self._dataset.node_attrs
# options['edge_attrs'] = self._dataset.edge_attrs # options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
options['node_label_costs'] = self._node_label_costs
options['edge_label_costs'] = self._edge_label_costs
# Learner cost matrices. # Learner cost matrices.
# Initialize cost learner. # Initialize cost learner.
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec)
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec)
# Run cost learner. # Run cost learner.
cml.update(dis_k_vec, self._dataset.graphs, options) cml.update(dis_k_vec, self._dataset.graphs, options)
# Get results. # Get results.
results = cml.get_results() results = cml.get_results()
self.__converged = results['converged']
self.__itrs = results['itrs']
self.__num_updates_ecs = results['num_updates_ecs']
self._converged = results['converged']
self._itrs = results['itrs']
self._num_updates_ecs = results['num_updates_ecs']
cost_list = results['cost_list'] cost_list = results['cost_list']
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)]
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):]
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)]
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):]


def __gmg_bcu(self):
def _gmg_bcu(self):
""" """
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).


@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
# Set up the ged environment. # Set up the ged environment.
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
# gedlibpy.restart_env() # gedlibpy.restart_env()
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants)
graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants)
graphs = [self._clean_graph(g) for g in self._dataset.graphs]
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
graph_ids = ged_env.get_all_graph_ids() graph_ids = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels() node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels() edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels))
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels))
ged_env.set_label_costs(node_label_costs, edge_label_costs) ged_env.set_label_costs(node_label_costs, edge_label_costs)
set_median_id = ged_env.add_graph('set_median') set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median') gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_type=self.__ged_options['init_option'])
ged_env.init(init_type=self._ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost']))
self._mge.set_refine_method(self._ged_options['method'], self._ged_options)
options = self._mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
options['parallel'] = self.__parallel
options['parallel'] = self._parallel
# Select the GED algorithm. # Select the GED algorithm.
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
self._mge.set_options(mge_options_to_string(options))
self._mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels, edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs, node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs) edge_attrs=self._dataset.edge_attrs)
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options = self._ged_options.copy()
if self._parallel:
ged_options['threads'] = 1 ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
self._mge.set_init_method(ged_options['method'], ged_options)
self._mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator. # Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)
self._mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs. # Get SODs.
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
self._sod_set_median = self._mge.get_sum_of_distances('initialized')
self._sod_gen_median = self._mge.get_sum_of_distances('converged')
# Get median graphs. # Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id)
self.__gen_median = ged_env.get_nx_graph(gen_median_id)
self._set_median = ged_env.get_nx_graph(set_median_id)
self._gen_median = ged_env.get_nx_graph(gen_median_id)
def __compute_distances_to_true_median(self):
def _compute_distances_to_true_median(self):
# compute distance in kernel space for set median. # compute distance in kernel space for set median.
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernel_sm = 1 kernel_sm = 1
# @todo: not correct kernel value # @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_sm, withterm3=False) gram_with_sm, withterm3=False)
# compute distance in kernel space for generalized median. # compute distance in kernel space for generalized median.
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernel_gm = 1 kernel_gm = 1
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False) gram_with_gm, withterm3=False)
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False)) gram_with_gm, withterm3=False))
idx_k_dis_median_set_min = np.argmin(k_dis_median_set) idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
if self._verbose >= 2: if self._verbose >= 2:
print() print()
print('distance in kernel space for set median:', self.__k_dis_set_median)
print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('distance in kernel space for set median:', self._k_dis_set_median)
print('distance in kernel space for generalized median:', self._k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('distance in kernel space for each graph in median set:', k_dis_median_set) print('distance in kernel space for each graph in median set:', k_dis_median_set)
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
""" """
Cleans node and edge labels and attributes of the given graph. Cleans node and edge labels and attributes of the given graph.
""" """
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
@property @property
def mge(self): def mge(self):
return self.__mge
return self._mge
@property @property
def ged_options(self): def ged_options(self):
return self.__ged_options
return self._ged_options


@ged_options.setter @ged_options.setter
def ged_options(self, value): def ged_options(self, value):
self.__ged_options = value
self._ged_options = value


@property @property
def mge_options(self): def mge_options(self):
return self.__mge_options
return self._mge_options


@mge_options.setter @mge_options.setter
def mge_options(self, value): def mge_options(self, value):
self.__mge_options = value
self._mge_options = value




@property @property
def fit_method(self): def fit_method(self):
return self.__fit_method
return self._fit_method


@fit_method.setter @fit_method.setter
def fit_method(self, value): def fit_method(self, value):
self.__fit_method = value
self._fit_method = value
@property @property
def init_ecc(self): def init_ecc(self):
return self.__init_ecc
return self._init_ecc


@init_ecc.setter @init_ecc.setter
def init_ecc(self, value): def init_ecc(self, value):
self.__init_ecc = value
self._init_ecc = value
@property @property
def set_median(self): def set_median(self):
return self.__set_median
return self._set_median




@property @property
def gen_median(self): def gen_median(self):
return self.__gen_median
return self._gen_median
@property @property
def best_from_dataset(self): def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property @property
def gram_matrix_unnorm(self): def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter @gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value): def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 283
- 283
gklearn/preimage/median_preimage_generator_py.py
File diff suppressed because it is too large
View File


+ 109
- 109
gklearn/preimage/random_preimage_generator.py View File

@@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator):
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
# arguments to set. # arguments to set.
self.__k = 5 # number of nearest neighbors of phi in D_N.
self.__r_max = 10 # maximum number of iterations.
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self.__alphas = None # weights of linear combinations of points in kernel space.
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__time_limit_in_sec = 0
self.__max_itrs = 20
self._k = 5 # number of nearest neighbors of phi in D_N.
self._r_max = 10 # maximum number of iterations.
self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self._alphas = None # weights of linear combinations of points in kernel space.
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._time_limit_in_sec = 0
self._max_itrs = 20
# values to compute. # values to compute.
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__preimage = None
self.__best_from_dataset = None
self.__k_dis_preimage = None
self.__k_dis_dataset = None
self.__itrs = 0
self.__converged = False # @todo
self.__num_updates = 0
self._runtime_generate_preimage = None
self._runtime_total = None
self._preimage = None
self._best_from_dataset = None
self._k_dis_preimage = None
self._k_dis_dataset = None
self._itrs = 0
self._converged = False # @todo
self._num_updates = 0
# values that can be set or to be computed. # values that can be set or to be computed.
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None


def set_options(self, **kwargs): def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {}) self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None) self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__k = kwargs.get('k', 5)
self.__r_max = kwargs.get('r_max', 10)
self.__l = kwargs.get('l', 500)
self.__alphas = kwargs.get('alphas', None)
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 20)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._k = kwargs.get('k', 5)
self._r_max = kwargs.get('r_max', 10)
self._l = kwargs.get('l', 500)
self._alphas = kwargs.get('alphas', None)
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 20)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
def run(self): def run(self):
@@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator):
start = time.time() start = time.time()
# 1. precompute gram matrix. # 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time() end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else: else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else: else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# 2. compute k nearest neighbors of phi in D_N. # 2. compute k nearest neighbors of phi in D_N.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing k nearest neighbors of phi in D_N...\n') print('\nstart computing k nearest neighbors of phi in D_N...\n')
D_N = self._dataset.graphs D_N = self._dataset.graphs
if self.__alphas is None:
self.__alphas = [1 / len(D_N)] * len(D_N)
if self._alphas is None:
self._alphas = [1 / len(D_N)] * len(D_N)
k_dis_list = [] # distance between g_star and each graph. k_dis_list = [] # distance between g_star and each graph.
term3 = 0 term3 = 0
for i1, a1 in enumerate(self.__alphas):
for i2, a2 in enumerate(self.__alphas):
for i1, a1 in enumerate(self._alphas):
for i2, a2 in enumerate(self._alphas):
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2]
for idx in range(len(D_N)): for idx in range(len(D_N)):
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
# sort. # sort.
sort_idx = np.argsort(k_dis_list) sort_idx = np.argsort(k_dis_list)
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances.
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances.
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self.__k_dis_dataset = dis_gs[0]
self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self._k_dis_dataset = dis_gs[0]
if self.__k_dis_dataset == 0: # get the exact pre-image.
if self._k_dis_dataset == 0: # get the exact pre-image.
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = self.__best_from_dataset.copy()
self.__k_dis_preimage = self.__k_dis_dataset
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = self._best_from_dataset.copy()
self._k_dis_preimage = self._k_dis_dataset
if self._verbose: if self._verbose:
print() print()
print('=============================================================================') print('=============================================================================')
print('The exact pre-image is found from the input dataset.') print('The exact pre-image is found from the input dataset.')
print('-----------------------------------------------------------------------------') print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================') print('=============================================================================')
print() print()
return return
dhat = dis_gs[0] # the nearest distance dhat = dis_gs[0] # the nearest distance
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors
Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk]
# 3. start iterations. # 3. start iterations.
@@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator):
dihat_list = [] dihat_list = []
r = 0 r = 0
dis_of_each_itr = [dhat] dis_of_each_itr = [dhat]
if self.__parallel:
if self._parallel:
self._kernel_options['parallel'] = None self._kernel_options['parallel'] = None
self.__itrs = 0
self.__num_updates = 0
timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(timer, self.__itrs, r):
self._itrs = 0
self._num_updates = 0
timer = Timer(self._time_limit_in_sec)
while not self._termination_criterion_met(timer, self._itrs, r):
print('\n- r =', r) print('\n- r =', r)
found = False found = False
dis_bests = dis_gs + dihat_list dis_bests = dis_gs + dihat_list
@@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator):
nb_modif = 1 nb_modif = 1
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)):
nb_modif *= nb / (fdgs_max - idx) nb_modif *= nb / (fdgs_max - idx)
while fdgs_max < nb_vpairs_min and nb_modif < self.__l:
while fdgs_max < nb_vpairs_min and nb_modif < self._l:
fdgs_max += 1 fdgs_max += 1
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max
nb_increase = int(fdgs_max - fdgs_max_old) nb_increase = int(fdgs_max - fdgs_max_old)
@@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator):
for ig, gs in enumerate(Gs_nearest + gihat_list): for ig, gs in enumerate(Gs_nearest + gihat_list):
if self._verbose >= 2: if self._verbose >= 2:
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list))
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
if found: if found:
r = 0 r = 0
@@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator):
r += 1 r += 1
dis_of_each_itr.append(dhat) dis_of_each_itr.append(dhat)
self.__itrs += 1
self._itrs += 1
if self._verbose >= 2: if self._verbose >= 2:
print('Total number of iterations is', self.__itrs, '.')
print('The preimage is updated', self.__num_updates, 'times.')
print('Total number of iterations is', self._itrs, '.')
print('The preimage is updated', self._num_updates, 'times.')
print('The shortest distances for previous iterations are', dis_of_each_itr, '.') print('The shortest distances for previous iterations are', dis_of_each_itr, '.')
# get results and print. # get results and print.
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self.__k_dis_preimage = dhat
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self._k_dis_preimage = dhat
if self._verbose: if self._verbose:
print() print()
print('=============================================================================') print('=============================================================================')
print('Finished generation of preimages.') print('Finished generation of preimages.')
print('-----------------------------------------------------------------------------') print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset)
print('Distance in kernel space for the preimage:', self.__k_dis_preimage)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating preimage:', self.__num_updates)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset)
print('Distance in kernel space for the preimage:', self._k_dis_preimage)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating preimage:', self._num_updates)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================') print('=============================================================================')
print() print()
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self.__parallel:
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self._parallel:
gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
else: else:
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
return gnew, dhat, found return gnew, dhat, found
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None gnew = None
updated = False updated = False
for trial in range(0, self.__l):
for trial in range(0, self._l):
if self._verbose >= 2: if self._verbose >= 2:
print('---', trial + 1, 'trial out of', self.__l)
print('---', trial + 1, 'trial out of', self._l)


gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)


# get the better graph preimage. # get the better graph preimage.
if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew <= dhat: # @todo: the new distance is smaller or also equal?
@@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator):
found = True # found better or equally good graph. found = True # found better or equally good graph.
if updated: if updated:
self.__num_updates += 1
self._num_updates += 1
return gnew, dhat, found return gnew, dhat, found
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None gnew = None
len_itr = self.__l
len_itr = self._l
gnew_list = [None] * len_itr gnew_list = [None] * len_itr
dnew_list = [None] * len_itr dnew_list = [None] * len_itr
itr = range(0, len_itr) itr = range(0, len_itr)
@@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator):
print('I am smaller!') print('I am smaller!')
print('index (as in D_k U {gihat}) =', str(ig)) print('index (as in D_k U {gihat}) =', str(ig))
print('distance:', dhat, '->', dnew, '\n') print('distance:', dhat, '->', dnew, '\n')
self.__num_updates += 1
self._num_updates += 1
else: else:
if self._verbose >= 2: if self._verbose >= 2:
print('I am equal!') print('I am equal!')
@@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator):
def _generate_graph_parallel(self, g_init, fdgs, term3, itr): def _generate_graph_parallel(self, g_init, fdgs, term3, itr):
trial = itr trial = itr
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)
return trial, gtemp, dnew return trial, gtemp, dnew
def __do_trial(self, g_init, fdgs, term3, trial):
def _do_trial(self, g_init, fdgs, term3, trial):
# add and delete edges. # add and delete edges.
gtemp = g_init.copy() gtemp = g_init.copy()
seed = (trial + int(time.time())) % (2 ** 32 - 1) seed = (trial + int(time.time())) % (2 ** 32 - 1)
@@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator):
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options)
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernel_gtmp = 1 kernel_gtmp = 1
# @todo: not correct kernel value # @todo: not correct kernel value
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True)
return gtemp, dnew return gtemp, dnew


def get_results(self): def get_results(self):
results = {} results = {}
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['k_dis_dataset'] = self.__k_dis_dataset
results['k_dis_preimage'] = self.__k_dis_preimage
results['itrs'] = self.__itrs
results['num_updates'] = self.__num_updates
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['k_dis_dataset'] = self._k_dis_dataset
results['k_dis_preimage'] = self._k_dis_preimage
results['itrs'] = self._itrs
results['num_updates'] = self._num_updates
return results return results




def __termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
def _termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
# if self._state == AlgorithmState.TERMINATED:
# self._state = AlgorithmState.INITIALIZED
return True return True
return (r >= self.__r_max if self.__r_max >= 0 else False)
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
return (r >= self._r_max if self._r_max >= 0 else False)
# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)
@property @property
def preimage(self): def preimage(self):
return self.__preimage
return self._preimage
@property @property
def best_from_dataset(self): def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property @property
def gram_matrix_unnorm(self): def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter @gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value): def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 10
- 10
gklearn/preimage/remove_best_graph.py View File

@@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else: else:
fn_output_detail, fn_output_summary = None, None fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori. # 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...') print('2. computing/loading Gram matrix...')
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
sod_sm_list = [] sod_sm_list = []
sod_gm_list = [] sod_gm_list = []
@@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
# 3. get the best graph and remove it from median set. # 3. get the best graph and remove it from median set.
print('3. getting and removing the best graph...') print('3. getting and removing the best graph...')
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset]
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index]
num_graphs -= 1 num_graphs -= 1
if num_graphs == 1: if num_graphs == 1:
@@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
print('\ncomplete.\n') print('\ncomplete.\n')




def __get_best_graph(Gn, gram_matrix):
def _get_best_graph(Gn, gram_matrix):
k_dis_list = [] k_dis_list = []
for idx in range(len(Gn)): for idx in range(len(Gn)):
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False))
@@ -313,7 +313,7 @@ def get_relations(sign):
return 'worse' return 'worse'




def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
if load_gm == 'auto': if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = [] gram_matrix_unnorm_list = []
time_precompute_gm_list = [] time_precompute_gm_list = []
for dataset in datasets: for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm) gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm) time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = [] gram_matrix_unnorm_list = []
time_precompute_gm_list = [] time_precompute_gm_list = []
for dataset in datasets: for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm) gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm) time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
return gram_matrix_unnorm_list, time_precompute_gm_list return gram_matrix_unnorm_list, time_precompute_gm_list




def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time return gram_matrix_unnorm, run_time
def __init_output_file(ds_name, gkernel, fit_method, dir_output):
def _init_output_file(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 2
- 2
gklearn/preimage/utils.py View File

@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
sod_sm_list = [] sod_sm_list = []
sod_gm_list = [] sod_gm_list = []
@@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
print('\ncomplete.\n') print('\ncomplete.\n')


def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'


+ 171
- 28
gklearn/tests/test_graph_kernels.py View File

@@ -109,45 +109,183 @@ def test_Marginalized(ds_name, parallel, remove_totters):
assert False, exception assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_SylvesterEquation(ds_name, parallel):
"""Test sylvester equation kernel.
"""
from gklearn.kernels import SylvesterEquation
dataset = chooseDataset(ds_name)

try:
graph_kernel = SylvesterEquation(
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=1e-3,
p=None,
q=None,
edge_weight=None)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_ConjugateGradient(ds_name, parallel):
"""Test conjugate gradient kernel.
"""
from gklearn.kernels import ConjugateGradient
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
dataset = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}

try:
graph_kernel = ConjugateGradient(
node_labels=dataset.node_labels,
node_attrs=dataset.node_attrs,
edge_labels=dataset.edge_labels,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=1e-3,
p=None,
q=None,
edge_weight=None,
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_FixedPoint(ds_name, parallel):
"""Test fixed point kernel.
"""
from gklearn.kernels import FixedPoint
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
import functools
dataset = chooseDataset(ds_name)
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}

try:
graph_kernel = FixedPoint(
node_labels=dataset.node_labels,
node_attrs=dataset.node_attrs,
edge_labels=dataset.edge_labels,
edge_attrs=dataset.edge_attrs,
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=1e-3,
p=None,
q=None,
edge_weight=None,
node_kernels=sub_kernels,
edge_kernels=sub_kernels)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
@pytest.mark.parametrize('ds_name', ['Acyclic'])
@pytest.mark.parametrize('sub_kernel', ['exp', 'geo'])
@pytest.mark.parametrize('parallel', ['imap_unordered', None])
def test_SpectralDecomposition(ds_name, sub_kernel, parallel):
"""Test spectral decomposition kernel.
"""
from gklearn.kernels import SpectralDecomposition
dataset = chooseDataset(ds_name)

try:
graph_kernel = SpectralDecomposition(
ds_infos=dataset.get_dataset_infos(keys=['directed']),
weight=1e-3,
p=None,
q=None,
edge_weight=None,
sub_kernel=sub_kernel)
gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
except Exception as exception:
assert False, exception
# @pytest.mark.parametrize( # @pytest.mark.parametrize(
# 'compute_method,ds_name,sub_kernel', # 'compute_method,ds_name,sub_kernel',
# [ # [
# # ('sylvester', 'Alkane', None),
# # ('conjugate', 'Alkane', None),
# # ('conjugate', 'AIDS', None),
# # ('fp', 'Alkane', None),
# # ('fp', 'AIDS', None),
# ('sylvester', 'Alkane', None),
# ('conjugate', 'Alkane', None),
# ('conjugate', 'AIDS', None),
# ('fp', 'Alkane', None),
# ('fp', 'AIDS', None),
# ('spectral', 'Alkane', 'exp'), # ('spectral', 'Alkane', 'exp'),
# ('spectral', 'Alkane', 'geo'), # ('spectral', 'Alkane', 'geo'),
# ] # ]
# ) # )
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel):
# """Test random walk kernel kernel.
# @pytest.mark.parametrize('parallel', ['imap_unordered', None])
# def test_RandomWalk(ds_name, compute_method, sub_kernel, parallel):
# """Test random walk kernel.
# """ # """
# from gklearn.kernels.randomWalkKernel import randomwalkkernel
# from gklearn.kernels import RandomWalk
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct
# import functools # import functools
# Gn, y = chooseDataset(ds_name)
#
# dataset = chooseDataset(ds_name)


# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}]
# try:
# Kmatrix, run_time, idx = randomwalkkernel(Gn,
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# node_label='atom',
# edge_label='bond_type',
# sub_kernel=sub_kernel,
# # parallel=parallel,
# n_jobs=multiprocessing.cpu_count(),
# verbose=True)
# sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
# # try:
# graph_kernel = RandomWalk(node_labels=dataset.node_labels,
# node_attrs=dataset.node_attrs,
# edge_labels=dataset.edge_labels,
# edge_attrs=dataset.edge_attrs,
# ds_infos=dataset.get_dataset_infos(keys=['directed']),
# compute_method=compute_method,
# weight=1e-3,
# p=None,
# q=None,
# edge_weight=None,
# node_kernels=sub_kernels,
# edge_kernels=sub_kernels,
# sub_kernel=sub_kernel)
# gram_matrix, run_time = graph_kernel.compute(dataset.graphs,
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
# kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:],
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)
# kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1],
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True)

# except Exception as exception: # except Exception as exception:
# assert False, exception # assert False, exception


@@ -296,4 +434,9 @@ def test_WLSubtree(ds_name, parallel):


if __name__ == "__main__": if __name__ == "__main__":
# test_spkernel('Alkane', 'imap_unordered') # test_spkernel('Alkane', 'imap_unordered')
test_StructuralSP('Fingerprint_edge', 'imap_unordered')
# test_StructuralSP('Fingerprint_edge', 'imap_unordered')
test_WLSubtree('Acyclic', 'imap_unordered')
# test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered')
# test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered')
# test_RandomWalk('Acyclic', 'fp', None, None)
# test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered')

+ 244
- 244
gklearn/utils/dataset.py View File

@@ -16,54 +16,54 @@ class Dataset(object):
def __init__(self, filename=None, filename_targets=None, **kwargs): def __init__(self, filename=None, filename_targets=None, **kwargs):
if filename is None: if filename is None:
self.__graphs = None
self.__targets = None
self.__node_labels = None
self.__edge_labels = None
self.__node_attrs = None
self.__edge_attrs = None
self._graphs = None
self._targets = None
self._node_labels = None
self._edge_labels = None
self._node_attrs = None
self._edge_attrs = None
else: else:
self.load_dataset(filename, filename_targets=filename_targets, **kwargs) self.load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__substructures = None
self.__node_label_dim = None
self.__edge_label_dim = None
self.__directed = None
self.__dataset_size = None
self.__total_node_num = None
self.__ave_node_num = None
self.__min_node_num = None
self.__max_node_num = None
self.__total_edge_num = None
self.__ave_edge_num = None
self.__min_edge_num = None
self.__max_edge_num = None
self.__ave_node_degree = None
self.__min_node_degree = None
self.__max_node_degree = None
self.__ave_fill_factor = None
self.__min_fill_factor = None
self.__max_fill_factor = None
self.__node_label_nums = None
self.__edge_label_nums = None
self.__node_attr_dim = None
self.__edge_attr_dim = None
self.__class_number = None
self._substructures = None
self._node_label_dim = None
self._edge_label_dim = None
self._directed = None
self._dataset_size = None
self._total_node_num = None
self._ave_node_num = None
self._min_node_num = None
self._max_node_num = None
self._total_edge_num = None
self._ave_edge_num = None
self._min_edge_num = None
self._max_edge_num = None
self._ave_node_degree = None
self._min_node_degree = None
self._max_node_degree = None
self._ave_fill_factor = None
self._min_fill_factor = None
self._max_fill_factor = None
self._node_label_nums = None
self._edge_label_nums = None
self._node_attr_dim = None
self._edge_attr_dim = None
self._class_number = None
def load_dataset(self, filename, filename_targets=None, **kwargs): def load_dataset(self, filename, filename_targets=None, **kwargs):
self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()
def load_graphs(self, graphs, targets=None): def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels(). # this has to be followed by set_labels().
self.__graphs = graphs
self.__targets = targets
self._graphs = graphs
self._targets = targets
# self.set_labels_attrs() # @todo # self.set_labels_attrs() # @todo
@@ -71,108 +71,108 @@ class Dataset(object):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic': if ds_name == 'Acyclic':
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'AIDS': elif ds_name == 'AIDS':
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Alkane': elif ds_name == 'Alkane':
ds_file = current_path + '../../datasets/Alkane/dataset.ds' ds_file = current_path + '../../datasets/Alkane/dataset.ds'
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
elif ds_name == 'COIL-DEL': elif ds_name == 'COIL-DEL':
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COIL-RAG': elif ds_name == 'COIL-RAG':
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COLORS-3': elif ds_name == 'COLORS-3':
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Cuneiform': elif ds_name == 'Cuneiform':
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'DD': elif ds_name == 'DD':
ds_file = current_path + '../../datasets/DD/DD_A.txt' ds_file = current_path + '../../datasets/DD/DD_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'ENZYMES': elif ds_name == 'ENZYMES':
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Fingerprint': elif ds_name == 'Fingerprint':
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'FRANKENSTEIN': elif ds_name == 'FRANKENSTEIN':
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-high': # node non-symb elif ds_name == 'Letter-high': # node non-symb
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-low': # node non-symb elif ds_name == 'Letter-low': # node non-symb
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-med': # node non-symb elif ds_name == 'Letter-med': # node non-symb
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MAO': elif ds_name == 'MAO':
ds_file = current_path + '../../datasets/MAO/dataset.ds' ds_file = current_path + '../../datasets/MAO/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Monoterpenoides': elif ds_name == 'Monoterpenoides':
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MUTAG': elif ds_name == 'MUTAG':
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI1': elif ds_name == 'NCI1':
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI109': elif ds_name == 'NCI109':
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'PAH': elif ds_name == 'PAH':
ds_file = current_path + '../../datasets/PAH/dataset.ds' ds_file = current_path + '../../datasets/PAH/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'SYNTHETIC': elif ds_name == 'SYNTHETIC':
pass pass
elif ds_name == 'SYNTHETICnew': elif ds_name == 'SYNTHETICnew':
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Synthie': elif ds_name == 'Synthie':
pass pass
else: else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()


def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
self.__node_labels = node_labels
self.__node_attrs = node_attrs
self.__edge_labels = edge_labels
self.__edge_attrs = edge_attrs
self._node_labels = node_labels
self._node_attrs = node_attrs
self._edge_labels = edge_labels
self._edge_attrs = edge_attrs


def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None):
# @todo: remove labels which have only one possible values. # @todo: remove labels which have only one possible values.
if node_labels is None: if node_labels is None:
self.__node_labels = self.__graphs[0].graph['node_labels']
self._node_labels = self._graphs[0].graph['node_labels']
# # graphs are considered node unlabeled if all nodes have the same label. # # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) # infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
if node_attrs is None: if node_attrs is None:
self.__node_attrs = self.__graphs[0].graph['node_attrs']
self._node_attrs = self._graphs[0].graph['node_attrs']
# for G in Gn: # for G in Gn:
# for n in G.nodes(data=True): # for n in G.nodes(data=True):
# if 'attributes' in n[1]: # if 'attributes' in n[1]:
# return len(n[1]['attributes']) # return len(n[1]['attributes'])
# return 0 # return 0
if edge_labels is None: if edge_labels is None:
self.__edge_labels = self.__graphs[0].graph['edge_labels']
self._edge_labels = self._graphs[0].graph['edge_labels']
# # graphs are considered edge unlabeled if all edges have the same label. # # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if edge_attrs is None: if edge_attrs is None:
self.__edge_attrs = self.__graphs[0].graph['edge_attrs']
self._edge_attrs = self._graphs[0].graph['edge_attrs']
# for G in Gn: # for G in Gn:
# if nx.number_of_edges(G) > 0: # if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True): # for e in G.edges(data=True):
@@ -291,145 +291,145 @@ class Dataset(object):
# dataset size # dataset size
if 'dataset_size' in keys: if 'dataset_size' in keys:
if self.__dataset_size is None:
self.__dataset_size = self.__get_dataset_size()
infos['dataset_size'] = self.__dataset_size
if self._dataset_size is None:
self._dataset_size = self._get_dataset_size()
infos['dataset_size'] = self._dataset_size
# graph node number # graph node number
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']):
all_node_nums = self.__get_all_node_nums()
all_node_nums = self._get_all_node_nums()


if 'total_node_num' in keys: if 'total_node_num' in keys:
if self.__total_node_num is None:
self.__total_node_num = self.__get_total_node_num(all_node_nums)
infos['total_node_num'] = self.__total_node_num
if self._total_node_num is None:
self._total_node_num = self._get_total_node_num(all_node_nums)
infos['total_node_num'] = self._total_node_num
if 'ave_node_num' in keys: if 'ave_node_num' in keys:
if self.__ave_node_num is None:
self.__ave_node_num = self.__get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self.__ave_node_num
if self._ave_node_num is None:
self._ave_node_num = self._get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self._ave_node_num
if 'min_node_num' in keys: if 'min_node_num' in keys:
if self.__min_node_num is None:
self.__min_node_num = self.__get_min_node_num(all_node_nums)
infos['min_node_num'] = self.__min_node_num
if self._min_node_num is None:
self._min_node_num = self._get_min_node_num(all_node_nums)
infos['min_node_num'] = self._min_node_num
if 'max_node_num' in keys: if 'max_node_num' in keys:
if self.__max_node_num is None:
self.__max_node_num = self.__get_max_node_num(all_node_nums)
infos['max_node_num'] = self.__max_node_num
if self._max_node_num is None:
self._max_node_num = self._get_max_node_num(all_node_nums)
infos['max_node_num'] = self._max_node_num
# graph edge number # graph edge number
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']):
all_edge_nums = self.__get_all_edge_nums()
all_edge_nums = self._get_all_edge_nums()


if 'total_edge_num' in keys: if 'total_edge_num' in keys:
if self.__total_edge_num is None:
self.__total_edge_num = self.__get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self.__total_edge_num
if self._total_edge_num is None:
self._total_edge_num = self._get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self._total_edge_num
if 'ave_edge_num' in keys: if 'ave_edge_num' in keys:
if self.__ave_edge_num is None:
self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self.__ave_edge_num
if self._ave_edge_num is None:
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self._ave_edge_num
if 'max_edge_num' in keys: if 'max_edge_num' in keys:
if self.__max_edge_num is None:
self.__max_edge_num = self.__get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self.__max_edge_num
if self._max_edge_num is None:
self._max_edge_num = self._get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self._max_edge_num


if 'min_edge_num' in keys: if 'min_edge_num' in keys:
if self.__min_edge_num is None:
self.__min_edge_num = self.__get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self.__min_edge_num
if self._min_edge_num is None:
self._min_edge_num = self._get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self._min_edge_num
# label number # label number
if 'node_label_dim' in keys: if 'node_label_dim' in keys:
if self.__node_label_dim is None:
self.__node_label_dim = self.__get_node_label_dim()
infos['node_label_dim'] = self.__node_label_dim
if self._node_label_dim is None:
self._node_label_dim = self._get_node_label_dim()
infos['node_label_dim'] = self._node_label_dim
if 'node_label_nums' in keys: if 'node_label_nums' in keys:
if self.__node_label_nums is None:
self.__node_label_nums = {}
for node_label in self.__node_labels:
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label)
infos['node_label_nums'] = self.__node_label_nums
if self._node_label_nums is None:
self._node_label_nums = {}
for node_label in self._node_labels:
self._node_label_nums[node_label] = self._get_node_label_num(node_label)
infos['node_label_nums'] = self._node_label_nums
if 'edge_label_dim' in keys: if 'edge_label_dim' in keys:
if self.__edge_label_dim is None:
self.__edge_label_dim = self.__get_edge_label_dim()
infos['edge_label_dim'] = self.__edge_label_dim
if self._edge_label_dim is None:
self._edge_label_dim = self._get_edge_label_dim()
infos['edge_label_dim'] = self._edge_label_dim
if 'edge_label_nums' in keys: if 'edge_label_nums' in keys:
if self.__edge_label_nums is None:
self.__edge_label_nums = {}
for edge_label in self.__edge_labels:
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label)
infos['edge_label_nums'] = self.__edge_label_nums
if self._edge_label_nums is None:
self._edge_label_nums = {}
for edge_label in self._edge_labels:
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label)
infos['edge_label_nums'] = self._edge_label_nums
if 'directed' in keys or 'substructures' in keys: if 'directed' in keys or 'substructures' in keys:
if self.__directed is None:
self.__directed = self.__is_directed()
infos['directed'] = self.__directed
if self._directed is None:
self._directed = self._is_directed()
infos['directed'] = self._directed
# node degree # node degree
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']):
all_node_degrees = self.__get_all_node_degrees()
all_node_degrees = self._get_all_node_degrees()
if 'ave_node_degree' in keys: if 'ave_node_degree' in keys:
if self.__ave_node_degree is None:
self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self.__ave_node_degree
if self._ave_node_degree is None:
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self._ave_node_degree
if 'max_node_degree' in keys: if 'max_node_degree' in keys:
if self.__max_node_degree is None:
self.__max_node_degree = self.__get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self.__max_node_degree
if self._max_node_degree is None:
self._max_node_degree = self._get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self._max_node_degree
if 'min_node_degree' in keys: if 'min_node_degree' in keys:
if self.__min_node_degree is None:
self.__min_node_degree = self.__get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self.__min_node_degree
if self._min_node_degree is None:
self._min_node_degree = self._get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self._min_node_degree
# fill factor # fill factor
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']):
all_fill_factors = self.__get_all_fill_factors()
all_fill_factors = self._get_all_fill_factors()
if 'ave_fill_factor' in keys: if 'ave_fill_factor' in keys:
if self.__ave_fill_factor is None:
self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self.__ave_fill_factor
if self._ave_fill_factor is None:
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self._ave_fill_factor
if 'max_fill_factor' in keys: if 'max_fill_factor' in keys:
if self.__max_fill_factor is None:
self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self.__max_fill_factor
if self._max_fill_factor is None:
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self._max_fill_factor
if 'min_fill_factor' in keys: if 'min_fill_factor' in keys:
if self.__min_fill_factor is None:
self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self.__min_fill_factor
if self._min_fill_factor is None:
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self._min_fill_factor
if 'substructures' in keys: if 'substructures' in keys:
if self.__substructures is None:
self.__substructures = self.__get_substructures()
infos['substructures'] = self.__substructures
if self._substructures is None:
self._substructures = self._get_substructures()
infos['substructures'] = self._substructures
if 'class_number' in keys: if 'class_number' in keys:
if self.__class_number is None:
self.__class_number = self.__get_class_number()
infos['class_number'] = self.__class_number
if self._class_number is None:
self._class_number = self._get_class_number()
infos['class_number'] = self._class_number
if 'node_attr_dim' in keys: if 'node_attr_dim' in keys:
if self.__node_attr_dim is None:
self.__node_attr_dim = self.__get_node_attr_dim()
infos['node_attr_dim'] = self.__node_attr_dim
if self._node_attr_dim is None:
self._node_attr_dim = self._get_node_attr_dim()
infos['node_attr_dim'] = self._node_attr_dim
if 'edge_attr_dim' in keys: if 'edge_attr_dim' in keys:
if self.__edge_attr_dim is None:
self.__edge_attr_dim = self.__get_edge_attr_dim()
infos['edge_attr_dim'] = self.__edge_attr_dim
if self._edge_attr_dim is None:
self._edge_attr_dim = self._get_edge_attr_dim()
infos['edge_attr_dim'] = self._edge_attr_dim
# entropy of degree distribution. # entropy of degree distribution.
@@ -438,14 +438,14 @@ class Dataset(object):
base = params['all_degree_entropy']['base'] base = params['all_degree_entropy']['base']
else: else:
base = None base = None
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base)
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base)
if 'ave_degree_entropy' in keys: if 'ave_degree_entropy' in keys:
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']):
base = params['ave_degree_entropy']['base'] base = params['ave_degree_entropy']['base']
else: else:
base = None base = None
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base))
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base))
return infos return infos
@@ -457,12 +457,12 @@ class Dataset(object):
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
node_labels = [item for item in node_labels if item in self.__node_labels]
edge_labels = [item for item in edge_labels if item in self.__edge_labels]
node_attrs = [item for item in node_attrs if item in self.__node_attrs]
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs]
node_labels = [item for item in node_labels if item in self._node_labels]
edge_labels = [item for item in edge_labels if item in self._edge_labels]
node_attrs = [item for item in node_attrs if item in self._node_attrs]
edge_attrs = [item for item in edge_attrs if item in self._edge_attrs]


for g in self.__graphs:
for g in self._graphs:
for nd in g.nodes(): for nd in g.nodes():
for nl in node_labels: for nl in node_labels:
del g.nodes[nd][nl] del g.nodes[nd][nl]
@@ -474,99 +474,99 @@ class Dataset(object):
for ea in edge_attrs: for ea in edge_attrs:
del g.edges[ed][ea] del g.edges[ed][ea]
if len(node_labels) > 0: if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
self._node_labels = [nl for nl in self._node_labels if nl not in node_labels]
if len(edge_labels) > 0: if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
self._edge_labels = [el for el in self._edge_labels if el not in edge_labels]
if len(node_attrs) > 0: if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs]
if len(edge_attrs) > 0: if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs]
def clean_labels(self): def clean_labels(self):
labels = [] labels = []
for name in self.__node_labels:
for name in self._node_labels:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values()) label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes(): for nd in G.nodes():
del G.nodes[nd][name] del G.nodes[nd][name]
self.__node_labels = labels
self._node_labels = labels


labels = [] labels = []
for name in self.__edge_labels:
for name in self._edge_labels:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values()) label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges(): for ed in G.edges():
del G.edges[ed][name] del G.edges[ed][name]
self.__edge_labels = labels
self._edge_labels = labels


labels = [] labels = []
for name in self.__node_attrs:
for name in self._node_attrs:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values()) label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes(): for nd in G.nodes():
del G.nodes[nd][name] del G.nodes[nd][name]
self.__node_attrs = labels
self._node_attrs = labels


labels = [] labels = []
for name in self.__edge_attrs:
for name in self._edge_attrs:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values()) label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges(): for ed in G.edges():
del G.edges[ed][name] del G.edges[ed][name]
self.__edge_attrs = labels
self._edge_attrs = labels
def cut_graphs(self, range_): def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_]
if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_]
self._graphs = [self._graphs[i] for i in range_]
if self._targets is not None:
self._targets = [self._targets[i] for i in range_]
self.clean_labels() self.clean_labels()




def trim_dataset(self, edge_required=False): def trim_dataset(self, edge_required=False):
if edge_required: if edge_required:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
else: else:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0]
idx = [p[0] for p in trimed_pairs] idx = [p[0] for p in trimed_pairs]
self.__graphs = [p[1] for p in trimed_pairs]
self.__targets = [self.__targets[i] for i in idx]
self._graphs = [p[1] for p in trimed_pairs]
self._targets = [self._targets[i] for i in idx]
self.clean_labels() self.clean_labels()
def copy(self): def copy(self):
dataset = Dataset() dataset = Dataset()
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None
target = self.__targets.copy() if self.__targets is not None else None
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None
target = self._targets.copy() if self._targets is not None else None
node_labels = self._node_labels.copy() if self._node_labels is not None else None
node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None
edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None
edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None
dataset.load_graphs(graphs, target) dataset.load_graphs(graphs, target)
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs)
# @todo: clean_labels and add other class members? # @todo: clean_labels and add other class members?
@@ -575,7 +575,7 @@ class Dataset(object):
def get_all_node_labels(self): def get_all_node_labels(self):
node_labels = [] node_labels = []
for g in self.__graphs:
for g in self._graphs:
for n in g.nodes(): for n in g.nodes():
nl = tuple(g.nodes[n].items()) nl = tuple(g.nodes[n].items())
if nl not in node_labels: if nl not in node_labels:
@@ -585,7 +585,7 @@ class Dataset(object):
def get_all_edge_labels(self): def get_all_edge_labels(self):
edge_labels = [] edge_labels = []
for g in self.__graphs:
for g in self._graphs:
for e in g.edges(): for e in g.edges():
el = tuple(g.edges[e].items()) el = tuple(g.edges[e].items())
if el not in edge_labels: if el not in edge_labels:
@@ -593,93 +593,93 @@ class Dataset(object):
return edge_labels return edge_labels
def __get_dataset_size(self):
return len(self.__graphs)
def _get_dataset_size(self):
return len(self._graphs)
def __get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self.__graphs]
def _get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self._graphs]
def __get_total_node_nums(self, all_node_nums):
def _get_total_node_nums(self, all_node_nums):
return np.sum(all_node_nums) return np.sum(all_node_nums)
def __get_ave_node_num(self, all_node_nums):
def _get_ave_node_num(self, all_node_nums):
return np.mean(all_node_nums) return np.mean(all_node_nums)
def __get_min_node_num(self, all_node_nums):
def _get_min_node_num(self, all_node_nums):
return np.amin(all_node_nums) return np.amin(all_node_nums)
def __get_max_node_num(self, all_node_nums):
def _get_max_node_num(self, all_node_nums):
return np.amax(all_node_nums) return np.amax(all_node_nums)
def __get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self.__graphs]
def _get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self._graphs]
def __get_total_edge_nums(self, all_edge_nums):
def _get_total_edge_nums(self, all_edge_nums):
return np.sum(all_edge_nums) return np.sum(all_edge_nums)
def __get_ave_edge_num(self, all_edge_nums):
def _get_ave_edge_num(self, all_edge_nums):
return np.mean(all_edge_nums) return np.mean(all_edge_nums)
def __get_min_edge_num(self, all_edge_nums):
def _get_min_edge_num(self, all_edge_nums):
return np.amin(all_edge_nums) return np.amin(all_edge_nums)
def __get_max_edge_num(self, all_edge_nums):
def _get_max_edge_num(self, all_edge_nums):
return np.amax(all_edge_nums) return np.amax(all_edge_nums)
def __get_node_label_dim(self):
return len(self.__node_labels)
def _get_node_label_dim(self):
return len(self._node_labels)
def __get_node_label_num(self, node_label):
def _get_node_label_num(self, node_label):
nl = set() nl = set()
for G in self.__graphs:
for G in self._graphs:
nl = nl | set(nx.get_node_attributes(G, node_label).values()) nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl) return len(nl)
def __get_edge_label_dim(self):
return len(self.__edge_labels)
def _get_edge_label_dim(self):
return len(self._edge_labels)
def __get_edge_label_num(self, edge_label):
def _get_edge_label_num(self, edge_label):
el = set() el = set()
for G in self.__graphs:
for G in self._graphs:
el = el | set(nx.get_edge_attributes(G, edge_label).values()) el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el) return len(el)
def __is_directed(self):
return nx.is_directed(self.__graphs[0])
def _is_directed(self):
return nx.is_directed(self._graphs[0])
def __get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs]
def _get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs]
def __get_ave_node_degree(self, all_node_degrees):
def _get_ave_node_degree(self, all_node_degrees):
return np.mean(all_node_degrees) return np.mean(all_node_degrees)
def __get_max_node_degree(self, all_node_degrees):
def _get_max_node_degree(self, all_node_degrees):
return np.amax(all_node_degrees) return np.amax(all_node_degrees)
def __get_min_node_degree(self, all_node_degrees):
def _get_min_node_degree(self, all_node_degrees):
return np.amin(all_node_degrees) return np.amin(all_node_degrees)
def __get_all_fill_factors(self):
def _get_all_fill_factors(self):
"""Get fill factor, the number of non-zero entries in the adjacency matrix. """Get fill factor, the number of non-zero entries in the adjacency matrix.


Returns Returns
@@ -687,24 +687,24 @@ class Dataset(object):
list[float] list[float]
List of fill factors for all graphs. List of fill factors for all graphs.
""" """
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs]
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs]


def __get_ave_fill_factor(self, all_fill_factors):
def _get_ave_fill_factor(self, all_fill_factors):
return np.mean(all_fill_factors) return np.mean(all_fill_factors)
def __get_max_fill_factor(self, all_fill_factors):
def _get_max_fill_factor(self, all_fill_factors):
return np.amax(all_fill_factors) return np.amax(all_fill_factors)
def __get_min_fill_factor(self, all_fill_factors):
def _get_min_fill_factor(self, all_fill_factors):
return np.amin(all_fill_factors) return np.amin(all_fill_factors)
def __get_substructures(self):
def _get_substructures(self):
subs = set() subs = set()
for G in self.__graphs:
for G in self._graphs:
degrees = list(dict(G.degree()).values()) degrees = list(dict(G.degree()).values())
if any(i == 2 for i in degrees): if any(i == 2 for i in degrees):
subs.add('linear') subs.add('linear')
@@ -713,8 +713,8 @@ class Dataset(object):
if 'linear' in subs and 'non linear' in subs: if 'linear' in subs and 'non linear' in subs:
break break


if self.__directed:
for G in self.__graphs:
if self._directed:
for G in self._graphs:
if len(list(nx.find_cycle(G))) > 0: if len(list(nx.find_cycle(G))) > 0:
subs.add('cyclic') subs.add('cyclic')
break break
@@ -737,19 +737,19 @@ class Dataset(object):
return subs return subs
def __get_class_num(self):
return len(set(self.__targets))
def _get_class_num(self):
return len(set(self._targets))
def __get_node_attr_dim(self):
return len(self.__node_attrs)
def _get_node_attr_dim(self):
return len(self._node_attrs)
def __get_edge_attr_dim(self):
return len(self.__edge_attrs)
def _get_edge_attr_dim(self):
return len(self._edge_attrs)


def __compute_all_degree_entropy(self, base=None):
def _compute_all_degree_entropy(self, base=None):
"""Compute the entropy of degree distribution of each graph. """Compute the entropy of degree distribution of each graph.


Parameters Parameters
@@ -765,7 +765,7 @@ class Dataset(object):
from gklearn.utils.stats import entropy from gklearn.utils.stats import entropy
degree_entropy = [] degree_entropy = []
for g in self.__graphs:
for g in self._graphs:
degrees = list(dict(g.degree()).values()) degrees = list(dict(g.degree()).values())
en = entropy(degrees, base=base) en = entropy(degrees, base=base)
degree_entropy.append(en) degree_entropy.append(en)
@@ -774,32 +774,32 @@ class Dataset(object):
@property @property
def graphs(self): def graphs(self):
return self.__graphs
return self._graphs




@property @property
def targets(self): def targets(self):
return self.__targets
return self._targets
@property @property
def node_labels(self): def node_labels(self):
return self.__node_labels
return self._node_labels




@property @property
def edge_labels(self): def edge_labels(self):
return self.__edge_labels
return self._edge_labels
@property @property
def node_attrs(self): def node_attrs(self):
return self.__node_attrs
return self._node_attrs
@property @property
def edge_attrs(self): def edge_attrs(self):
return self.__edge_attrs
return self._edge_attrs
def split_dataset_by_target(dataset): def split_dataset_by_target(dataset):


+ 4
- 4
gklearn/utils/graph_files.py View File

@@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names # remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(float(tmp[1])) y.append(float(tmp[1]))
else: # targets in a seperate file else: # targets in a seperate file
for i in range(0, len(content)): for i in range(0, len(content)):
@@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names # remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
with open(filename_targets) as fnt: with open(filename_targets) as fnt:
content_y = fnt.read().splitlines() content_y = fnt.read().splitlines()
@@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None):
mol_class = graph.attrib['class'] mol_class = graph.attrib['class']
g, l_names = load_gxl(dir_dataset + '/' + mol_filename) g, l_names = load_gxl(dir_dataset + '/' + mol_filename)
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(mol_class) y.append(mol_class)
return data, y, label_names return data, y, label_names




def __append_label_names(label_names, new_names):
def _append_label_names(label_names, new_names):
for key, val in label_names.items(): for key, val in label_names.items():
label_names[key] += [name for name in new_names[key] if name not in val] label_names[key] += [name for name in new_names[key] if name not in val]


+ 2
- 2
gklearn/utils/knn.py View File

@@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
y_all = dataset.targets y_all = dataset.targets
# compute kernel distances. # compute kernel distances.
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset)
dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset)
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
@@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
return results return results
def __compute_kernel_distances(dataset, kernel_options, trainset=None):
def _compute_kernel_distances(dataset, kernel_options, trainset=None):
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels, edge_labels=dataset.edge_labels,


+ 5
- 5
gklearn/utils/timer.py View File

@@ -23,8 +23,8 @@ class Timer(object):
time_limit_in_sec : string time_limit_in_sec : string
The time limit in seconds. The time limit in seconds.
""" """
self.__time_limit_in_sec = time_limit_in_sec
self.__start_time = time.time()
self._time_limit_in_sec = time_limit_in_sec
self._start_time = time.time()
def expired(self): def expired(self):
@@ -34,7 +34,7 @@ class Timer(object):
------ ------
Boolean true if the time limit has expired and false otherwise. Boolean true if the time limit has expired and false otherwise.
""" """
if self.__time_limit_in_sec > 0:
runtime = time.time() - self.__start_time
return runtime >= self.__time_limit_in_sec
if self._time_limit_in_sec > 0:
runtime = time.time() - self._start_time
return runtime >= self._time_limit_in_sec
return False return False

Loading…
Cancel
Save