@@ -73,3 +73,9 @@ gklearn/kernels/.tags | |||||
# docker travis debug. | # docker travis debug. | ||||
ci.sh | ci.sh | ||||
# outputs. | |||||
outputs/ | |||||
# pyCharm. | |||||
.idea/ |
@@ -158,7 +158,7 @@ def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name= | |||||
sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | sub_kernel = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | ||||
param_grid_precomputed = {'compute_method': ['fp'], | param_grid_precomputed = {'compute_method': ['fp'], | ||||
'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], | 'node_kernels': [sub_kernel], 'edge_kernels': [sub_kernel], | ||||
'weight': np.logspace(-3, -10, num=8, base=10)} | |||||
'weight': np.logspace(-4, -10, num=7, base=10)} | |||||
elif kernel_name == 'SpectralDecomposition': | elif kernel_name == 'SpectralDecomposition': | ||||
from gklearn.kernels.randomWalkKernel import randomwalkkernel | from gklearn.kernels.randomWalkKernel import randomwalkkernel | ||||
@@ -196,14 +196,17 @@ def cross_validate(graphs, targets, kernel_name, output_dir='outputs/', ds_name= | |||||
elif kernel_name == 'Treelet': | elif kernel_name == 'Treelet': | ||||
from gklearn.kernels.treeletKernel import treeletkernel | from gklearn.kernels.treeletKernel import treeletkernel | ||||
estimator = treeletkernel | estimator = treeletkernel | ||||
from gklearn.utils.kernels import polynomialkernel | |||||
from gklearn.utils.kernels import gaussiankernel, polynomialkernel | |||||
import functools | import functools | ||||
gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | gkernels = [functools.partial(gaussiankernel, gamma=1 / ga) | ||||
# for ga in np.linspace(1, 10, 10)] | # for ga in np.linspace(1, 10, 10)] | ||||
for ga in np.logspace(0, 10, num=11, base=10)] | |||||
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11) | |||||
for c in np.logspace(0, 10, num=11, base=10)] | |||||
for ga in np.logspace(0, 10, num=11, base=10)] | |||||
pkernels = [functools.partial(polynomialkernel, d=d, c=c) for d in range(1, 11) | |||||
for c in np.logspace(0, 10, num=11, base=10)] | |||||
# pkernels = [functools.partial(polynomialkernel, d=1, c=1)] | |||||
param_grid_precomputed = {'sub_kernel': pkernels + gkernels} | param_grid_precomputed = {'sub_kernel': pkernels + gkernels} | ||||
# 'parallel': [None]} | |||||
elif kernel_name == 'WLSubtree': | elif kernel_name == 'WLSubtree': | ||||
from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel | ||||
@@ -14,37 +14,37 @@ class Constant(EditCost): | |||||
def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): | def __init__(self, node_ins_cost=1, node_del_cost=1, node_rel_cost=1, edge_ins_cost=1, edge_del_cost=1, edge_rel_cost=1): | ||||
self.__node_ins_cost = node_ins_cost | |||||
self.__node_del_cost = node_del_cost | |||||
self.__node_rel_cost = node_rel_cost | |||||
self.__edge_ins_cost = edge_ins_cost | |||||
self.__edge_del_cost = edge_del_cost | |||||
self.__edge_rel_cost = edge_rel_cost | |||||
self._node_ins_cost = node_ins_cost | |||||
self._node_del_cost = node_del_cost | |||||
self._node_rel_cost = node_rel_cost | |||||
self._edge_ins_cost = edge_ins_cost | |||||
self._edge_del_cost = edge_del_cost | |||||
self._edge_rel_cost = edge_rel_cost | |||||
def node_ins_cost_fun(self, node_label): | def node_ins_cost_fun(self, node_label): | ||||
return self.__node_ins_cost | |||||
return self._node_ins_cost | |||||
def node_del_cost_fun(self, node_label): | def node_del_cost_fun(self, node_label): | ||||
return self.__node_del_cost | |||||
return self._node_del_cost | |||||
def node_rel_cost_fun(self, node_label_1, node_label_2): | def node_rel_cost_fun(self, node_label_1, node_label_2): | ||||
if node_label_1 != node_label_2: | if node_label_1 != node_label_2: | ||||
return self.__node_rel_cost | |||||
return self._node_rel_cost | |||||
return 0 | return 0 | ||||
def edge_ins_cost_fun(self, edge_label): | def edge_ins_cost_fun(self, edge_label): | ||||
return self.__edge_ins_cost | |||||
return self._edge_ins_cost | |||||
def edge_del_cost_fun(self, edge_label): | def edge_del_cost_fun(self, edge_label): | ||||
return self.__edge_del_cost | |||||
return self._edge_del_cost | |||||
def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | def edge_rel_cost_fun(self, edge_label_1, edge_label_2): | ||||
if edge_label_1 != edge_label_2: | if edge_label_1 != edge_label_2: | ||||
return self.__edge_rel_cost | |||||
return self._edge_rel_cost | |||||
return 0 | return 0 |
@@ -15,17 +15,17 @@ class GEDEnv(object): | |||||
def __init__(self): | def __init__(self): | ||||
self.__initialized = False | |||||
self.__new_graph_ids = [] | |||||
self.__ged_data = GEDData() | |||||
self._initialized = False | |||||
self._new_graph_ids = [] | |||||
self._ged_data = GEDData() | |||||
# Variables needed for approximating ged_instance_. | # Variables needed for approximating ged_instance_. | ||||
self.__lower_bounds = {} | |||||
self.__upper_bounds = {} | |||||
self.__runtimes = {} | |||||
self.__node_maps = {} | |||||
self.__original_to_internal_node_ids = [] | |||||
self.__internal_to_original_node_ids = [] | |||||
self.__ged_method = None | |||||
self._lower_bounds = {} | |||||
self._upper_bounds = {} | |||||
self._runtimes = {} | |||||
self._node_maps = {} | |||||
self._original_to_internal_node_ids = [] | |||||
self._internal_to_original_node_ids = [] | |||||
self._ged_method = None | |||||
def set_edit_cost(self, edit_cost, edit_cost_constants=[]): | def set_edit_cost(self, edit_cost, edit_cost_constants=[]): | ||||
@@ -36,7 +36,7 @@ class GEDEnv(object): | |||||
* @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. | * @param[in] edit_cost_constants Constants passed to the constructor of the edit cost class selected by @p edit_costs. | ||||
*/ | */ | ||||
""" | """ | ||||
self.__ged_data._set_edit_cost(edit_cost, edit_cost_constants) | |||||
self._ged_data._set_edit_cost(edit_cost, edit_cost_constants) | |||||
def add_graph(self, graph_name='', graph_class=''): | def add_graph(self, graph_name='', graph_class=''): | ||||
@@ -49,17 +49,17 @@ class GEDEnv(object): | |||||
*/ | */ | ||||
""" | """ | ||||
# @todo: graphs are not uninitialized. | # @todo: graphs are not uninitialized. | ||||
self.__initialized = False | |||||
graph_id = self.__ged_data._num_graphs_without_shuffled_copies | |||||
self.__ged_data._num_graphs_without_shuffled_copies += 1 | |||||
self.__new_graph_ids.append(graph_id) | |||||
self.__ged_data._graphs.append(nx.Graph()) | |||||
self.__ged_data._graph_names.append(graph_name) | |||||
self.__ged_data._graph_classes.append(graph_class) | |||||
self.__original_to_internal_node_ids.append({}) | |||||
self.__internal_to_original_node_ids.append({}) | |||||
self.__ged_data._strings_to_internal_node_ids.append({}) | |||||
self.__ged_data._internal_node_ids_to_strings.append({}) | |||||
self._initialized = False | |||||
graph_id = self._ged_data._num_graphs_without_shuffled_copies | |||||
self._ged_data._num_graphs_without_shuffled_copies += 1 | |||||
self._new_graph_ids.append(graph_id) | |||||
self._ged_data._graphs.append(nx.Graph()) | |||||
self._ged_data._graph_names.append(graph_name) | |||||
self._ged_data._graph_classes.append(graph_class) | |||||
self._original_to_internal_node_ids.append({}) | |||||
self._internal_to_original_node_ids.append({}) | |||||
self._ged_data._strings_to_internal_node_ids.append({}) | |||||
self._ged_data._internal_node_ids_to_strings.append({}) | |||||
return graph_id | return graph_id | ||||
@@ -70,14 +70,14 @@ class GEDEnv(object): | |||||
* @param[in] graph_id ID of graph that has to be cleared. | * @param[in] graph_id ID of graph that has to be cleared. | ||||
*/ | */ | ||||
""" | """ | ||||
if graph_id > self.__ged_data.num_graphs_without_shuffled_copies(): | |||||
if graph_id > self._ged_data.num_graphs_without_shuffled_copies(): | |||||
raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.') | raise Exception('The graph', self.get_graph_name(graph_id), 'has not been added to the environment.') | ||||
self.__ged_data._graphs[graph_id].clear() | |||||
self.__original_to_internal_node_ids[graph_id].clear() | |||||
self.__internal_to_original_node_ids[graph_id].clear() | |||||
self.__ged_data._strings_to_internal_node_ids[graph_id].clear() | |||||
self.__ged_data._internal_node_ids_to_strings[graph_id].clear() | |||||
self.__initialized = False | |||||
self._ged_data._graphs[graph_id].clear() | |||||
self._original_to_internal_node_ids[graph_id].clear() | |||||
self._internal_to_original_node_ids[graph_id].clear() | |||||
self._ged_data._strings_to_internal_node_ids[graph_id].clear() | |||||
self._ged_data._internal_node_ids_to_strings[graph_id].clear() | |||||
self._initialized = False | |||||
def add_node(self, graph_id, node_id, node_label): | def add_node(self, graph_id, node_id, node_label): | ||||
@@ -90,15 +90,15 @@ class GEDEnv(object): | |||||
*/ | */ | ||||
""" | """ | ||||
# @todo: check ids. | # @todo: check ids. | ||||
self.__initialized = False | |||||
internal_node_id = nx.number_of_nodes(self.__ged_data._graphs[graph_id]) | |||||
self.__ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) | |||||
self.__original_to_internal_node_ids[graph_id][node_id] = internal_node_id | |||||
self.__internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||||
self.__ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||||
self.__ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||||
self.__ged_data._node_label_to_id(node_label) | |||||
label_id = self.__ged_data._node_label_to_id(node_label) | |||||
self._initialized = False | |||||
internal_node_id = nx.number_of_nodes(self._ged_data._graphs[graph_id]) | |||||
self._ged_data._graphs[graph_id].add_node(internal_node_id, label=node_label) | |||||
self._original_to_internal_node_ids[graph_id][node_id] = internal_node_id | |||||
self._internal_to_original_node_ids[graph_id][internal_node_id] = node_id | |||||
self._ged_data._strings_to_internal_node_ids[graph_id][str(node_id)] = internal_node_id | |||||
self._ged_data._internal_node_ids_to_strings[graph_id][internal_node_id] = str(node_id) | |||||
self._ged_data._node_label_to_id(node_label) | |||||
label_id = self._ged_data._node_label_to_id(node_label) | |||||
# @todo: ged_data_.graphs_[graph_id].set_label | # @todo: ged_data_.graphs_[graph_id].set_label | ||||
@@ -114,10 +114,10 @@ class GEDEnv(object): | |||||
*/ | */ | ||||
""" | """ | ||||
# @todo: check everything. | # @todo: check everything. | ||||
self.__initialized = False | |||||
self._initialized = False | |||||
# @todo: check ignore_duplicates. | # @todo: check ignore_duplicates. | ||||
self.__ged_data._graphs[graph_id].add_edge(self.__original_to_internal_node_ids[graph_id][nd_from], self.__original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||||
label_id = self.__ged_data._edge_label_to_id(edge_label) | |||||
self._ged_data._graphs[graph_id].add_edge(self._original_to_internal_node_ids[graph_id][nd_from], self._original_to_internal_node_ids[graph_id][nd_to], label=edge_label) | |||||
label_id = self._ged_data._edge_label_to_id(edge_label) | |||||
# @todo: ged_data_.graphs_[graph_id].set_label | # @todo: ged_data_.graphs_[graph_id].set_label | ||||
@@ -182,30 +182,30 @@ class GEDEnv(object): | |||||
init_type = OptionsStringMap.InitType[init_type] | init_type = OptionsStringMap.InitType[init_type] | ||||
# Throw an exception if no edit costs have been selected. | # Throw an exception if no edit costs have been selected. | ||||
if self.__ged_data._edit_cost is None: | |||||
if self._ged_data._edit_cost is None: | |||||
raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') | raise Exception('No edit costs have been selected. Call set_edit_cost() before calling init().') | ||||
# Return if the environment is initialized. | # Return if the environment is initialized. | ||||
if self.__initialized: | |||||
if self._initialized: | |||||
return | return | ||||
# Set initialization type. | # Set initialization type. | ||||
self.__ged_data._init_type = init_type | |||||
self._ged_data._init_type = init_type | |||||
# @todo: Construct shuffled graph copies if necessary. | # @todo: Construct shuffled graph copies if necessary. | ||||
# Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). | # Re-initialize adjacency matrices (also previously initialized graphs must be re-initialized because of possible re-allocation). | ||||
# @todo: setup_adjacency_matrix, don't know if neccessary. | # @todo: setup_adjacency_matrix, don't know if neccessary. | ||||
self.__ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self.__ged_data._graphs]) | |||||
self.__ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self.__ged_data._graphs]) | |||||
self._ged_data._max_num_nodes = np.max([nx.number_of_nodes(g) for g in self._ged_data._graphs]) | |||||
self._ged_data._max_num_edges = np.max([nx.number_of_edges(g) for g in self._ged_data._graphs]) | |||||
# Initialize cost matrices if necessary. | # Initialize cost matrices if necessary. | ||||
if self.__ged_data._eager_init(): | |||||
if self._ged_data._eager_init(): | |||||
pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. | pass # @todo: init_cost_matrices_: 1. Update node cost matrix if new node labels have been added to the environment; 2. Update edge cost matrix if new edge labels have been added to the environment. | ||||
# Mark environment as initialized. | # Mark environment as initialized. | ||||
self.__initialized = True | |||||
self.__new_graph_ids.clear() | |||||
self._initialized = True | |||||
self._new_graph_ids.clear() | |||||
def is_initialized(self): | def is_initialized(self): | ||||
@@ -215,7 +215,7 @@ class GEDEnv(object): | |||||
* @return True if the environment is initialized. | * @return True if the environment is initialized. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__initialized | |||||
return self._initialized | |||||
def get_init_type(self): | def get_init_type(self): | ||||
@@ -225,16 +225,16 @@ class GEDEnv(object): | |||||
* @return Initialization type. | * @return Initialization type. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__ged_data._init_type | |||||
return self._ged_data._init_type | |||||
def set_label_costs(self, node_label_costs=None, edge_label_costs=None): | def set_label_costs(self, node_label_costs=None, edge_label_costs=None): | ||||
"""Set the costs between labels. | """Set the costs between labels. | ||||
""" | """ | ||||
if node_label_costs is not None: | if node_label_costs is not None: | ||||
self.__ged_data._node_label_costs = node_label_costs | |||||
self._ged_data._node_label_costs = node_label_costs | |||||
if edge_label_costs is not None: | if edge_label_costs is not None: | ||||
self.__ged_data._edge_label_costs = edge_label_costs | |||||
self._ged_data._edge_label_costs = edge_label_costs | |||||
def set_method(self, method, options=''): | def set_method(self, method, options=''): | ||||
@@ -245,67 +245,67 @@ class GEDEnv(object): | |||||
* @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. | * @param[in] options An options string of the form @"[--@<option@> @<arg@>] [...]@" passed to the selected method. | ||||
*/ | */ | ||||
""" | """ | ||||
del self.__ged_method | |||||
del self._ged_method | |||||
if isinstance(method, str): | if isinstance(method, str): | ||||
method = OptionsStringMap.GEDMethod[method] | method = OptionsStringMap.GEDMethod[method] | ||||
if method == Options.GEDMethod.BRANCH: | if method == Options.GEDMethod.BRANCH: | ||||
self.__ged_method = Branch(self.__ged_data) | |||||
self._ged_method = Branch(self._ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_FAST: | elif method == Options.GEDMethod.BRANCH_FAST: | ||||
self.__ged_method = BranchFast(self.__ged_data) | |||||
self._ged_method = BranchFast(self._ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_FAST: | elif method == Options.GEDMethod.BRANCH_FAST: | ||||
self.__ged_method = BranchFast(self.__ged_data) | |||||
self._ged_method = BranchFast(self._ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_TIGHT: | elif method == Options.GEDMethod.BRANCH_TIGHT: | ||||
self.__ged_method = BranchTight(self.__ged_data) | |||||
self._ged_method = BranchTight(self._ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_UNIFORM: | elif method == Options.GEDMethod.BRANCH_UNIFORM: | ||||
self.__ged_method = BranchUniform(self.__ged_data) | |||||
self._ged_method = BranchUniform(self._ged_data) | |||||
elif method == Options.GEDMethod.BRANCH_COMPACT: | elif method == Options.GEDMethod.BRANCH_COMPACT: | ||||
self.__ged_method = BranchCompact(self.__ged_data) | |||||
self._ged_method = BranchCompact(self._ged_data) | |||||
elif method == Options.GEDMethod.PARTITION: | elif method == Options.GEDMethod.PARTITION: | ||||
self.__ged_method = Partition(self.__ged_data) | |||||
self._ged_method = Partition(self._ged_data) | |||||
elif method == Options.GEDMethod.HYBRID: | elif method == Options.GEDMethod.HYBRID: | ||||
self.__ged_method = Hybrid(self.__ged_data) | |||||
self._ged_method = Hybrid(self._ged_data) | |||||
elif method == Options.GEDMethod.RING: | elif method == Options.GEDMethod.RING: | ||||
self.__ged_method = Ring(self.__ged_data) | |||||
self._ged_method = Ring(self._ged_data) | |||||
elif method == Options.GEDMethod.ANCHOR_AWARE_GED: | elif method == Options.GEDMethod.ANCHOR_AWARE_GED: | ||||
self.__ged_method = AnchorAwareGED(self.__ged_data) | |||||
self._ged_method = AnchorAwareGED(self._ged_data) | |||||
elif method == Options.GEDMethod.WALKS: | elif method == Options.GEDMethod.WALKS: | ||||
self.__ged_method = Walks(self.__ged_data) | |||||
self._ged_method = Walks(self._ged_data) | |||||
elif method == Options.GEDMethod.IPFP: | elif method == Options.GEDMethod.IPFP: | ||||
self.__ged_method = IPFP(self.__ged_data) | |||||
self._ged_method = IPFP(self._ged_data) | |||||
elif method == Options.GEDMethod.BIPARTITE: | elif method == Options.GEDMethod.BIPARTITE: | ||||
from gklearn.ged.methods import Bipartite | from gklearn.ged.methods import Bipartite | ||||
self.__ged_method = Bipartite(self.__ged_data) | |||||
self._ged_method = Bipartite(self._ged_data) | |||||
elif method == Options.GEDMethod.SUBGRAPH: | elif method == Options.GEDMethod.SUBGRAPH: | ||||
self.__ged_method = Subgraph(self.__ged_data) | |||||
self._ged_method = Subgraph(self._ged_data) | |||||
elif method == Options.GEDMethod.NODE: | elif method == Options.GEDMethod.NODE: | ||||
self.__ged_method = Node(self.__ged_data) | |||||
self._ged_method = Node(self._ged_data) | |||||
elif method == Options.GEDMethod.RING_ML: | elif method == Options.GEDMethod.RING_ML: | ||||
self.__ged_method = RingML(self.__ged_data) | |||||
self._ged_method = RingML(self._ged_data) | |||||
elif method == Options.GEDMethod.BIPARTITE_ML: | elif method == Options.GEDMethod.BIPARTITE_ML: | ||||
self.__ged_method = BipartiteML(self.__ged_data) | |||||
self._ged_method = BipartiteML(self._ged_data) | |||||
elif method == Options.GEDMethod.REFINE: | elif method == Options.GEDMethod.REFINE: | ||||
self.__ged_method = Refine(self.__ged_data) | |||||
self._ged_method = Refine(self._ged_data) | |||||
elif method == Options.GEDMethod.BP_BEAM: | elif method == Options.GEDMethod.BP_BEAM: | ||||
self.__ged_method = BPBeam(self.__ged_data) | |||||
self._ged_method = BPBeam(self._ged_data) | |||||
elif method == Options.GEDMethod.SIMULATED_ANNEALING: | elif method == Options.GEDMethod.SIMULATED_ANNEALING: | ||||
self.__ged_method = SimulatedAnnealing(self.__ged_data) | |||||
self._ged_method = SimulatedAnnealing(self._ged_data) | |||||
elif method == Options.GEDMethod.HED: | elif method == Options.GEDMethod.HED: | ||||
self.__ged_method = HED(self.__ged_data) | |||||
self._ged_method = HED(self._ged_data) | |||||
elif method == Options.GEDMethod.STAR: | elif method == Options.GEDMethod.STAR: | ||||
self.__ged_method = STAR(self.__ged_data) | |||||
self._ged_method = STAR(self._ged_data) | |||||
# #ifdef GUROBI | # #ifdef GUROBI | ||||
elif method == Options.GEDMethod.F1: | elif method == Options.GEDMethod.F1: | ||||
self.__ged_method = F1(self.__ged_data) | |||||
self._ged_method = F1(self._ged_data) | |||||
elif method == Options.GEDMethod.F2: | elif method == Options.GEDMethod.F2: | ||||
self.__ged_method = F2(self.__ged_data) | |||||
self._ged_method = F2(self._ged_data) | |||||
elif method == Options.GEDMethod.COMPACT_MIP: | elif method == Options.GEDMethod.COMPACT_MIP: | ||||
self.__ged_method = CompactMIP(self.__ged_data) | |||||
self._ged_method = CompactMIP(self._ged_data) | |||||
elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: | elif method == Options.GEDMethod.BLP_NO_EDGE_LABELS: | ||||
self.__ged_method = BLPNoEdgeLabels(self.__ged_data) | |||||
self._ged_method = BLPNoEdgeLabels(self._ged_data) | |||||
self.__ged_method.set_options(options) | |||||
self._ged_method.set_options(options) | |||||
def run_method(self, g_id, h_id): | def run_method(self, g_id, h_id): | ||||
@@ -316,34 +316,34 @@ class GEDEnv(object): | |||||
* @param[in] h_id ID of an input graph that has been added to the environment. | * @param[in] h_id ID of an input graph that has been added to the environment. | ||||
*/ | */ | ||||
""" | """ | ||||
if g_id >= self.__ged_data.num_graphs(): | |||||
if g_id >= self._ged_data.num_graphs(): | |||||
raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') | raise Exception('The graph with ID', str(g_id), 'has not been added to the environment.') | ||||
if h_id >= self.__ged_data.num_graphs(): | |||||
if h_id >= self._ged_data.num_graphs(): | |||||
raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') | raise Exception('The graph with ID', str(h_id), 'has not been added to the environment.') | ||||
if not self.__initialized: | |||||
if not self._initialized: | |||||
raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') | raise Exception('The environment is uninitialized. Call init() after adding all graphs to the environment.') | ||||
if self.__ged_method is None: | |||||
if self._ged_method is None: | |||||
raise Exception('No method has been set. Call set_method() before calling run().') | raise Exception('No method has been set. Call set_method() before calling run().') | ||||
# Call selected GEDMethod and store results. | # Call selected GEDMethod and store results. | ||||
if self.__ged_data.shuffled_graph_copies_available() and (g_id == h_id): | |||||
self.__ged_method.run(g_id, self.__ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? | |||||
if self._ged_data.shuffled_graph_copies_available() and (g_id == h_id): | |||||
self._ged_method.run(g_id, self._ged_data.id_shuffled_graph_copy(h_id)) # @todo: why shuffle? | |||||
else: | else: | ||||
self.__ged_method.run(g_id, h_id) | |||||
self.__lower_bounds[(g_id, h_id)] = self.__ged_method.get_lower_bound() | |||||
self.__upper_bounds[(g_id, h_id)] = self.__ged_method.get_upper_bound() | |||||
self.__runtimes[(g_id, h_id)] = self.__ged_method.get_runtime() | |||||
self.__node_maps[(g_id, h_id)] = self.__ged_method.get_node_map() | |||||
self._ged_method.run(g_id, h_id) | |||||
self._lower_bounds[(g_id, h_id)] = self._ged_method.get_lower_bound() | |||||
self._upper_bounds[(g_id, h_id)] = self._ged_method.get_upper_bound() | |||||
self._runtimes[(g_id, h_id)] = self._ged_method.get_runtime() | |||||
self._node_maps[(g_id, h_id)] = self._ged_method.get_node_map() | |||||
def init_method(self): | def init_method(self): | ||||
"""Initializes the method specified by call to set_method(). | """Initializes the method specified by call to set_method(). | ||||
""" | """ | ||||
if not self.__initialized: | |||||
if not self._initialized: | |||||
raise Exception('The environment is uninitialized. Call init() before calling init_method().') | raise Exception('The environment is uninitialized. Call init() before calling init_method().') | ||||
if self.__ged_method is None: | |||||
if self._ged_method is None: | |||||
raise Exception('No method has been set. Call set_method() before calling init_method().') | raise Exception('No method has been set. Call set_method() before calling init_method().') | ||||
self.__ged_method.init() | |||||
self._ged_method.init() | |||||
def get_num_node_labels(self): | def get_num_node_labels(self): | ||||
@@ -354,7 +354,7 @@ class GEDEnv(object): | |||||
* @note If @p 1 is returned, the nodes are unlabeled. | * @note If @p 1 is returned, the nodes are unlabeled. | ||||
*/ | */ | ||||
""" | """ | ||||
return len(self.__ged_data._node_labels) | |||||
return len(self._ged_data._node_labels) | |||||
def get_all_node_labels(self): | def get_all_node_labels(self): | ||||
@@ -365,7 +365,7 @@ class GEDEnv(object): | |||||
* @note If @p 1 is returned, the nodes are unlabeled. | * @note If @p 1 is returned, the nodes are unlabeled. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__ged_data._node_labels | |||||
return self._ged_data._node_labels | |||||
def get_node_label(self, label_id, to_dict=True): | def get_node_label(self, label_id, to_dict=True): | ||||
@@ -379,8 +379,8 @@ class GEDEnv(object): | |||||
if label_id < 1 or label_id > self.get_num_node_labels(): | if label_id < 1 or label_id > self.get_num_node_labels(): | ||||
raise Exception('The environment does not contain a node label with ID', str(label_id), '.') | raise Exception('The environment does not contain a node label with ID', str(label_id), '.') | ||||
if to_dict: | if to_dict: | ||||
return dict(self.__ged_data._node_labels[label_id - 1]) | |||||
return self.__ged_data._node_labels[label_id - 1] | |||||
return dict(self._ged_data._node_labels[label_id - 1]) | |||||
return self._ged_data._node_labels[label_id - 1] | |||||
def get_num_edge_labels(self): | def get_num_edge_labels(self): | ||||
@@ -391,7 +391,7 @@ class GEDEnv(object): | |||||
* @note If @p 1 is returned, the edges are unlabeled. | * @note If @p 1 is returned, the edges are unlabeled. | ||||
*/ | */ | ||||
""" | """ | ||||
return len(self.__ged_data._edge_labels) | |||||
return len(self._ged_data._edge_labels) | |||||
def get_all_edge_labels(self): | def get_all_edge_labels(self): | ||||
@@ -402,7 +402,7 @@ class GEDEnv(object): | |||||
* @note If @p 1 is returned, the edges are unlabeled. | * @note If @p 1 is returned, the edges are unlabeled. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__ged_data._edge_labels | |||||
return self._ged_data._edge_labels | |||||
def get_edge_label(self, label_id, to_dict=True): | def get_edge_label(self, label_id, to_dict=True): | ||||
@@ -416,8 +416,8 @@ class GEDEnv(object): | |||||
if label_id < 1 or label_id > self.get_num_edge_labels(): | if label_id < 1 or label_id > self.get_num_edge_labels(): | ||||
raise Exception('The environment does not contain an edge label with ID', str(label_id), '.') | raise Exception('The environment does not contain an edge label with ID', str(label_id), '.') | ||||
if to_dict: | if to_dict: | ||||
return dict(self.__ged_data._edge_labels[label_id - 1]) | |||||
return self.__ged_data._edge_labels[label_id - 1] | |||||
return dict(self._ged_data._edge_labels[label_id - 1]) | |||||
return self._ged_data._edge_labels[label_id - 1] | |||||
def get_upper_bound(self, g_id, h_id): | def get_upper_bound(self, g_id, h_id): | ||||
@@ -429,9 +429,9 @@ class GEDEnv(object): | |||||
* @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | * @return Upper bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | ||||
*/ | */ | ||||
""" | """ | ||||
if (g_id, h_id) not in self.__upper_bounds: | |||||
if (g_id, h_id) not in self._upper_bounds: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') | raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_upper_bound(' + str(g_id) + ',' + str(h_id) + ').') | ||||
return self.__upper_bounds[(g_id, h_id)] | |||||
return self._upper_bounds[(g_id, h_id)] | |||||
def get_lower_bound(self, g_id, h_id): | def get_lower_bound(self, g_id, h_id): | ||||
@@ -443,9 +443,9 @@ class GEDEnv(object): | |||||
* @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | * @return Lower bound computed by the last call to run_method() with arguments @p g_id and @p h_id. | ||||
*/ | */ | ||||
""" | """ | ||||
if (g_id, h_id) not in self.__lower_bounds: | |||||
if (g_id, h_id) not in self._lower_bounds: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') | raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_lower_bound(' + str(g_id) + ',' + str(h_id) + ').') | ||||
return self.__lower_bounds[(g_id, h_id)] | |||||
return self._lower_bounds[(g_id, h_id)] | |||||
def get_runtime(self, g_id, h_id): | def get_runtime(self, g_id, h_id): | ||||
@@ -457,9 +457,9 @@ class GEDEnv(object): | |||||
* @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. | * @return Runtime of last call to run_method() with arguments @p g_id and @p h_id. | ||||
*/ | */ | ||||
""" | """ | ||||
if (g_id, h_id) not in self.__runtimes: | |||||
if (g_id, h_id) not in self._runtimes: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') | raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_runtime(' + str(g_id) + ',' + str(h_id) + ').') | ||||
return self.__runtimes[(g_id, h_id)] | |||||
return self._runtimes[(g_id, h_id)] | |||||
def get_init_time(self): | def get_init_time(self): | ||||
@@ -469,7 +469,7 @@ class GEDEnv(object): | |||||
* @return Runtime of the last call to init_method(). | * @return Runtime of the last call to init_method(). | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__ged_method.get_init_time() | |||||
return self._ged_method.get_init_time() | |||||
def get_node_map(self, g_id, h_id): | def get_node_map(self, g_id, h_id): | ||||
@@ -481,9 +481,9 @@ class GEDEnv(object): | |||||
* @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. | * @return Node map computed by the last call to run_method() with arguments @p g_id and @p h_id. | ||||
*/ | */ | ||||
""" | """ | ||||
if (g_id, h_id) not in self.__node_maps: | |||||
if (g_id, h_id) not in self._node_maps: | |||||
raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') | raise Exception('Call run(' + str(g_id) + ',' + str(h_id) + ') before calling get_node_map(' + str(g_id) + ',' + str(h_id) + ').') | ||||
return self.__node_maps[(g_id, h_id)] | |||||
return self._node_maps[(g_id, h_id)] | |||||
def get_forward_map(self, g_id, h_id) : | def get_forward_map(self, g_id, h_id) : | ||||
@@ -531,7 +531,7 @@ class GEDEnv(object): | |||||
* @param[in,out] node_map Node map whose induced edit cost is to be computed. | * @param[in,out] node_map Node map whose induced edit cost is to be computed. | ||||
*/ | */ | ||||
""" | """ | ||||
self.__ged_data.compute_induced_cost(self.__ged_data._graphs[g_id], self.__ged_data._graphs[h_id], node_map) | |||||
self._ged_data.compute_induced_cost(self._ged_data._graphs[g_id], self._ged_data._graphs[h_id], node_map) | |||||
def get_nx_graph(self, graph_id): | def get_nx_graph(self, graph_id): | ||||
@@ -569,7 +569,7 @@ class GEDEnv(object): | |||||
.. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix() | .. seealso:: get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_edges(), get_graph_adjacence_matrix() | ||||
.. note:: These functions allow to collect all the graph's informations. | .. note:: These functions allow to collect all the graph's informations. | ||||
""" | """ | ||||
graph = self.__ged_data.graph(graph_id) | |||||
graph = self._ged_data.graph(graph_id) | |||||
node_labels = [] | node_labels = [] | ||||
for n in graph.nodes(): | for n in graph.nodes(): | ||||
node_labels.append(graph.nodes[n]['label']) | node_labels.append(graph.nodes[n]['label']) | ||||
@@ -590,7 +590,7 @@ class GEDEnv(object): | |||||
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix() | .. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_original_node_ids(), get_graph_node_labels(), get_graph_adjacence_matrix() | ||||
.. note:: These functions allow to collect all the graph's informations. | .. note:: These functions allow to collect all the graph's informations. | ||||
""" | """ | ||||
graph = self.__ged_data.graph(graph_id) | |||||
graph = self._ged_data.graph(graph_id) | |||||
if to_dict: | if to_dict: | ||||
edges = {} | edges = {} | ||||
for n1, n2, attr in graph.edges(data=True): | for n1, n2, attr in graph.edges(data=True): | ||||
@@ -608,7 +608,7 @@ class GEDEnv(object): | |||||
* @return Name of the input graph. | * @return Name of the input graph. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__ged_data._graph_names[graph_id] | |||||
return self._ged_data._graph_names[graph_id] | |||||
def get_graph_num_nodes(self, graph_id): | def get_graph_num_nodes(self, graph_id): | ||||
@@ -619,7 +619,7 @@ class GEDEnv(object): | |||||
* @return Number of nodes in the graph. | * @return Number of nodes in the graph. | ||||
*/ | */ | ||||
""" | """ | ||||
return nx.number_of_nodes(self.__ged_data.graph(graph_id)) | |||||
return nx.number_of_nodes(self._ged_data.graph(graph_id)) | |||||
def get_original_node_ids(self, graph_id): | def get_original_node_ids(self, graph_id): | ||||
@@ -634,11 +634,11 @@ class GEDEnv(object): | |||||
.. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix() | .. seealso::get_graph_internal_id(), get_graph_num_nodes(), get_graph_num_edges(), get_graph_node_labels(), get_graph_edges(), get_graph_adjacence_matrix() | ||||
.. note:: These functions allow to collect all the graph's informations. | .. note:: These functions allow to collect all the graph's informations. | ||||
""" | """ | ||||
return [i for i in self.__internal_to_original_node_ids[graph_id].values()] | |||||
return [i for i in self._internal_to_original_node_ids[graph_id].values()] | |||||
def get_node_cost(self, node_label_1, node_label_2): | def get_node_cost(self, node_label_1, node_label_2): | ||||
return self.__ged_data.node_cost(node_label_1, node_label_2) | |||||
return self._ged_data.node_cost(node_label_1, node_label_2) | |||||
def get_node_rel_cost(self, node_label_1, node_label_2): | def get_node_rel_cost(self, node_label_1, node_label_2): | ||||
@@ -654,7 +654,7 @@ class GEDEnv(object): | |||||
node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0])) | node_label_1 = tuple(sorted(node_label_1.items(), key=lambda kv: kv[0])) | ||||
if isinstance(node_label_2, dict): | if isinstance(node_label_2, dict): | ||||
node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0])) | node_label_2 = tuple(sorted(node_label_2.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.) | |||||
return self._ged_data._edit_cost.node_rel_cost_fun(node_label_1, node_label_2) # @todo: may need to use node_cost() instead (or change node_cost() and modify ged_method for pre-defined cost matrices.) | |||||
def get_node_del_cost(self, node_label): | def get_node_del_cost(self, node_label): | ||||
@@ -667,7 +667,7 @@ class GEDEnv(object): | |||||
""" | """ | ||||
if isinstance(node_label, dict): | if isinstance(node_label, dict): | ||||
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.node_del_cost_fun(node_label) | |||||
return self._ged_data._edit_cost.node_del_cost_fun(node_label) | |||||
def get_node_ins_cost(self, node_label): | def get_node_ins_cost(self, node_label): | ||||
@@ -680,11 +680,11 @@ class GEDEnv(object): | |||||
""" | """ | ||||
if isinstance(node_label, dict): | if isinstance(node_label, dict): | ||||
node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | node_label = tuple(sorted(node_label.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.node_ins_cost_fun(node_label) | |||||
return self._ged_data._edit_cost.node_ins_cost_fun(node_label) | |||||
def get_edge_cost(self, edge_label_1, edge_label_2): | def get_edge_cost(self, edge_label_1, edge_label_2): | ||||
return self.__ged_data.edge_cost(edge_label_1, edge_label_2) | |||||
return self._ged_data.edge_cost(edge_label_1, edge_label_2) | |||||
def get_edge_rel_cost(self, edge_label_1, edge_label_2): | def get_edge_rel_cost(self, edge_label_1, edge_label_2): | ||||
@@ -700,7 +700,7 @@ class GEDEnv(object): | |||||
edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0])) | edge_label_1 = tuple(sorted(edge_label_1.items(), key=lambda kv: kv[0])) | ||||
if isinstance(edge_label_2, dict): | if isinstance(edge_label_2, dict): | ||||
edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0])) | edge_label_2 = tuple(sorted(edge_label_2.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2) | |||||
return self._ged_data._edit_cost.edge_rel_cost_fun(edge_label_1, edge_label_2) | |||||
def get_edge_del_cost(self, edge_label): | def get_edge_del_cost(self, edge_label): | ||||
@@ -713,7 +713,7 @@ class GEDEnv(object): | |||||
""" | """ | ||||
if isinstance(edge_label, dict): | if isinstance(edge_label, dict): | ||||
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.edge_del_cost_fun(edge_label) | |||||
return self._ged_data._edit_cost.edge_del_cost_fun(edge_label) | |||||
def get_edge_ins_cost(self, edge_label): | def get_edge_ins_cost(self, edge_label): | ||||
@@ -726,8 +726,8 @@ class GEDEnv(object): | |||||
""" | """ | ||||
if isinstance(edge_label, dict): | if isinstance(edge_label, dict): | ||||
edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | edge_label = tuple(sorted(edge_label.items(), key=lambda kv: kv[0])) | ||||
return self.__ged_data._edit_cost.edge_ins_cost_fun(edge_label) | |||||
return self._ged_data._edit_cost.edge_ins_cost_fun(edge_label) | |||||
def get_all_graph_ids(self): | def get_all_graph_ids(self): | ||||
return [i for i in range(0, self.__ged_data._num_graphs_without_shuffled_copies)] | |||||
return [i for i in range(0, self._ged_data._num_graphs_without_shuffled_copies)] |
@@ -12,9 +12,9 @@ from gklearn.utils import dummy_node, undefined_node | |||||
class NodeMap(object): | class NodeMap(object): | ||||
def __init__(self, num_nodes_g, num_nodes_h): | def __init__(self, num_nodes_g, num_nodes_h): | ||||
self.__forward_map = [undefined_node()] * num_nodes_g | |||||
self.__backward_map = [undefined_node()] * num_nodes_h | |||||
self.__induced_cost = np.inf | |||||
self._forward_map = [undefined_node()] * num_nodes_g | |||||
self._backward_map = [undefined_node()] * num_nodes_h | |||||
self._induced_cost = np.inf | |||||
def clear(self): | def clear(self): | ||||
@@ -23,29 +23,29 @@ class NodeMap(object): | |||||
* @brief Clears the node map. | * @brief Clears the node map. | ||||
*/ | */ | ||||
""" | """ | ||||
self.__forward_map = [undefined_node() for i in range(len(self.__forward_map))] | |||||
self.__backward_map = [undefined_node() for i in range(len(self.__backward_map))] | |||||
self._forward_map = [undefined_node() for i in range(len(self._forward_map))] | |||||
self._backward_map = [undefined_node() for i in range(len(self._backward_map))] | |||||
def num_source_nodes(self): | def num_source_nodes(self): | ||||
return len(self.__forward_map) | |||||
return len(self._forward_map) | |||||
def num_target_nodes(self): | def num_target_nodes(self): | ||||
return len(self.__backward_map) | |||||
return len(self._backward_map) | |||||
def image(self, node): | def image(self, node): | ||||
if node < len(self.__forward_map): | |||||
return self.__forward_map[node] | |||||
if node < len(self._forward_map): | |||||
return self._forward_map[node] | |||||
else: | else: | ||||
raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.') | ||||
return undefined_node() | return undefined_node() | ||||
def pre_image(self, node): | def pre_image(self, node): | ||||
if node < len(self.__backward_map): | |||||
return self.__backward_map[node] | |||||
if node < len(self._backward_map): | |||||
return self._backward_map[node] | |||||
else: | else: | ||||
raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | raise Exception('The node with ID ', str(node), ' is not contained in the target nodes of the node map.') | ||||
return undefined_node() | return undefined_node() | ||||
@@ -53,50 +53,50 @@ class NodeMap(object): | |||||
def as_relation(self, relation): | def as_relation(self, relation): | ||||
relation.clear() | relation.clear() | ||||
for i in range(0, len(self.__forward_map)): | |||||
k = self.__forward_map[i] | |||||
for i in range(0, len(self._forward_map)): | |||||
k = self._forward_map[i] | |||||
if k != undefined_node(): | if k != undefined_node(): | ||||
relation.append(tuple((i, k))) | relation.append(tuple((i, k))) | ||||
for k in range(0, len(self.__backward_map)): | |||||
i = self.__backward_map[k] | |||||
for k in range(0, len(self._backward_map)): | |||||
i = self._backward_map[k] | |||||
if i == dummy_node(): | if i == dummy_node(): | ||||
relation.append(tuple((i, k))) | relation.append(tuple((i, k))) | ||||
def add_assignment(self, i, k): | def add_assignment(self, i, k): | ||||
if i != dummy_node(): | if i != dummy_node(): | ||||
if i < len(self.__forward_map): | |||||
self.__forward_map[i] = k | |||||
if i < len(self._forward_map): | |||||
self._forward_map[i] = k | |||||
else: | else: | ||||
raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | raise Exception('The node with ID ', str(i), ' is not contained in the source nodes of the node map.') | ||||
if k != dummy_node(): | if k != dummy_node(): | ||||
if k < len(self.__backward_map): | |||||
self.__backward_map[k] = i | |||||
if k < len(self._backward_map): | |||||
self._backward_map[k] = i | |||||
else: | else: | ||||
raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | raise Exception('The node with ID ', str(k), ' is not contained in the target nodes of the node map.') | ||||
def set_induced_cost(self, induced_cost): | def set_induced_cost(self, induced_cost): | ||||
self.__induced_cost = induced_cost | |||||
self._induced_cost = induced_cost | |||||
def induced_cost(self): | def induced_cost(self): | ||||
return self.__induced_cost | |||||
return self._induced_cost | |||||
@property | @property | ||||
def forward_map(self): | def forward_map(self): | ||||
return self.__forward_map | |||||
return self._forward_map | |||||
@forward_map.setter | @forward_map.setter | ||||
def forward_map(self, value): | def forward_map(self, value): | ||||
self.__forward_map = value | |||||
self._forward_map = value | |||||
@property | @property | ||||
def backward_map(self): | def backward_map(self): | ||||
return self.__backward_map | |||||
return self._backward_map | |||||
@backward_map.setter | @backward_map.setter | ||||
def backward_map(self, value): | def backward_map(self, value): | ||||
self.__backward_map = value | |||||
self._backward_map = value |
@@ -49,7 +49,7 @@ class CostMatricesLearner(CostsLearner): | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
self.__execute_cvx(prob) | |||||
self._execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif not self._triangle_rule and not self._allow_zeros: # @todo | elif not self._triangle_rule and not self._allow_zeros: # @todo | ||||
@@ -57,7 +57,7 @@ class CostMatricesLearner(CostsLearner): | |||||
cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) | ||||
constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] | constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
self.__execute_cvx(prob) | |||||
self._execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
elif self._triangle_rule and not self._allow_zeros: # @todo | elif self._triangle_rule and not self._allow_zeros: # @todo | ||||
@@ -67,7 +67,7 @@ class CostMatricesLearner(CostsLearner): | |||||
np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, | ||||
np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] | ||||
prob = cp.Problem(cp.Minimize(cost_fun), constraints) | prob = cp.Problem(cp.Minimize(cost_fun), constraints) | ||||
self.__execute_cvx(prob) | |||||
self._execute_cvx(prob) | |||||
edit_costs_new = x.value | edit_costs_new = x.value | ||||
residual = np.sqrt(prob.value) | residual = np.sqrt(prob.value) | ||||
else: | else: | ||||
@@ -113,7 +113,7 @@ class CostMatricesLearner(CostsLearner): | |||||
elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec: | elif abs(cost - self._cost_list[-2][i]) / cost > self._epsilon_ec: | ||||
self._ec_changed = True | self._ec_changed = True | ||||
break | break | ||||
# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: | |||||
# if abs(cost - edit_cost_list[-2][i]) > self._epsilon_ec: | |||||
# ec_changed = True | # ec_changed = True | ||||
# break | # break | ||||
self._residual_changed = False | self._residual_changed = False | ||||
@@ -135,7 +135,7 @@ class CostMatricesLearner(CostsLearner): | |||||
print('-------------------------------------------------------------------------') | print('-------------------------------------------------------------------------') | ||||
print('States of iteration', self._itrs + 1) | print('States of iteration', self._itrs + 1) | ||||
print('-------------------------------------------------------------------------') | print('-------------------------------------------------------------------------') | ||||
# print('Time spend:', self.__runtime_optimize_ec) | |||||
# print('Time spend:', self._runtime_optimize_ec) | |||||
print('Total number of iterations for optimizing:', self._itrs + 1) | print('Total number of iterations for optimizing:', self._itrs + 1) | ||||
print('Total number of updating edit costs:', self._num_updates_ecs) | print('Total number of updating edit costs:', self._num_updates_ecs) | ||||
print('Was optimization of edit costs converged:', self._converged) | print('Was optimization of edit costs converged:', self._converged) | ||||
@@ -126,8 +126,8 @@ class CostsLearner(object): | |||||
def termination_criterion_met(self, converged, timer, itr, itrs_without_update): | def termination_criterion_met(self, converged, timer, itr, itrs_without_update): | ||||
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): | if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): | ||||
# if self.__state == AlgorithmState.TERMINATED: | |||||
# self.__state = AlgorithmState.INITIALIZED | |||||
# if self._state == AlgorithmState.TERMINATED: | |||||
# self._state = AlgorithmState.INITIALIZED | |||||
return True | return True | ||||
return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) | return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) | ||||
@@ -19,27 +19,27 @@ class LSAPESolver(object): | |||||
* @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. | * @param[in] cost_matrix Pointer to the LSAPE problem instance that should be solved. | ||||
*/ | */ | ||||
""" | """ | ||||
self.__cost_matrix = cost_matrix | |||||
self.__model = 'ECBP' | |||||
self.__greedy_method = 'BASIC' | |||||
self.__solve_optimally = True | |||||
self.__minimal_cost = 0 | |||||
self.__row_to_col_assignments = [] | |||||
self.__col_to_row_assignments = [] | |||||
self.__dual_var_rows = [] # @todo | |||||
self.__dual_var_cols = [] # @todo | |||||
self._cost_matrix = cost_matrix | |||||
self._model = 'ECBP' | |||||
self._greedy_method = 'BASIC' | |||||
self._solve_optimally = True | |||||
self._minimal_cost = 0 | |||||
self._row_to_col_assignments = [] | |||||
self._col_to_row_assignments = [] | |||||
self._dual_var_rows = [] # @todo | |||||
self._dual_var_cols = [] # @todo | |||||
def clear_solution(self): | def clear_solution(self): | ||||
"""Clears a previously computed solution. | """Clears a previously computed solution. | ||||
""" | """ | ||||
self.__minimal_cost = 0 | |||||
self.__row_to_col_assignments.clear() | |||||
self.__col_to_row_assignments.clear() | |||||
self.__row_to_col_assignments.append([]) # @todo | |||||
self.__col_to_row_assignments.append([]) | |||||
self.__dual_var_rows = [] # @todo | |||||
self.__dual_var_cols = [] # @todo | |||||
self._minimal_cost = 0 | |||||
self._row_to_col_assignments.clear() | |||||
self._col_to_row_assignments.clear() | |||||
self._row_to_col_assignments.append([]) # @todo | |||||
self._col_to_row_assignments.append([]) | |||||
self._dual_var_rows = [] # @todo | |||||
self._dual_var_cols = [] # @todo | |||||
def set_model(self, model): | def set_model(self, model): | ||||
@@ -49,8 +49,8 @@ class LSAPESolver(object): | |||||
* @param[in] model The model that should be used. | * @param[in] model The model that should be used. | ||||
*/ | */ | ||||
""" | """ | ||||
self.__solve_optimally = True | |||||
self.__model = model | |||||
self._solve_optimally = True | |||||
self._model = model | |||||
def solve(self, num_solutions=1): | def solve(self, num_solutions=1): | ||||
@@ -61,17 +61,17 @@ class LSAPESolver(object): | |||||
*/ | */ | ||||
""" | """ | ||||
self.clear_solution() | self.clear_solution() | ||||
if self.__solve_optimally: | |||||
row_ind, col_ind = linear_sum_assignment(self.__cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||||
self.__row_to_col_assignments[0] = col_ind | |||||
self.__col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind | |||||
self.__compute_cost_from_assignments() | |||||
if self._solve_optimally: | |||||
row_ind, col_ind = linear_sum_assignment(self._cost_matrix) # @todo: only hungarianLSAPE ('ECBP') can be used. | |||||
self._row_to_col_assignments[0] = col_ind | |||||
self._col_to_row_assignments[0] = np.argsort(col_ind) # @todo: might be slow, can use row_ind | |||||
self._compute_cost_from_assignments() | |||||
if num_solutions > 1: | if num_solutions > 1: | ||||
pass # @todo: | pass # @todo: | ||||
else: | else: | ||||
print('here is non op.') | print('here is non op.') | ||||
pass # @todo: greedy. | pass # @todo: greedy. | ||||
# self.__ | |||||
# self._ | |||||
def minimal_cost(self): | def minimal_cost(self): | ||||
@@ -81,7 +81,7 @@ class LSAPESolver(object): | |||||
* @return Cost of computed solutions. | * @return Cost of computed solutions. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__minimal_cost | |||||
return self._minimal_cost | |||||
def get_assigned_col(self, row, solution_id=0): | def get_assigned_col(self, row, solution_id=0): | ||||
@@ -93,7 +93,7 @@ class LSAPESolver(object): | |||||
* @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. | * @returns Column to which @p row is assigned to in solution with ID @p solution_id or ged::undefined() if @p row is not assigned to any column. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__row_to_col_assignments[solution_id][row] | |||||
return self._row_to_col_assignments[solution_id][row] | |||||
def get_assigned_row(self, col, solution_id=0): | def get_assigned_row(self, col, solution_id=0): | ||||
@@ -105,7 +105,7 @@ class LSAPESolver(object): | |||||
* @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. | * @returns Row to which @p col is assigned to in solution with ID @p solution_id or ged::undefined() if @p col is not assigned to any row. | ||||
*/ | */ | ||||
""" | """ | ||||
return self.__col_to_row_assignments[solution_id][col] | |||||
return self._col_to_row_assignments[solution_id][col] | |||||
def num_solutions(self): | def num_solutions(self): | ||||
@@ -115,8 +115,8 @@ class LSAPESolver(object): | |||||
* @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. | * @returns Actual number of solutions computed by solve(). Might be smaller than @p num_solutions. | ||||
*/ | */ | ||||
""" | """ | ||||
return len(self.__row_to_col_assignments) | |||||
return len(self._row_to_col_assignments) | |||||
def __compute_cost_from_assignments(self): # @todo | |||||
self.__minimal_cost = np.sum(self.__cost_matrix[range(0, len(self.__row_to_col_assignments[0])), self.__row_to_col_assignments[0]]) | |||||
def _compute_cost_from_assignments(self): # @todo | |||||
self._minimal_cost = np.sum(self._cost_matrix[range(0, len(self._row_to_col_assignments[0])), self._row_to_col_assignments[0]]) |
@@ -70,10 +70,16 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True | |||||
# compute ged. | # compute ged. | ||||
# options used to compute numbers of edit operations. | # options used to compute numbers of edit operations. | ||||
neo_options = {'edit_cost': options['edit_cost'], | |||||
'is_cml': True, | |||||
'node_labels': node_labels, | |||||
'edge_labels': edge_labels} | |||||
if node_label_costs is None and edge_label_costs is None: | |||||
neo_options = {'edit_cost': options['edit_cost'], | |||||
'is_cml': False, | |||||
'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], | |||||
'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']} | |||||
else: | |||||
neo_options = {'edit_cost': options['edit_cost'], | |||||
'is_cml': True, | |||||
'node_labels': node_labels, | |||||
'edge_labels': edge_labels} | |||||
ged_mat = np.zeros((len(graphs), len(graphs))) | ged_mat = np.zeros((len(graphs), len(graphs))) | ||||
if parallel: | if parallel: | ||||
len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | len_itr = int(len(graphs) * (len(graphs) - 1) / 2) | ||||
@@ -26,18 +26,18 @@ class CommonWalk(GraphKernel): | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__weight = kwargs.get('weight', 1) | |||||
self.__compute_method = kwargs.get('compute_method', None) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self.__compute_method = self.__compute_method.lower() | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._weight = kwargs.get('weight', 1) | |||||
self._compute_method = kwargs.get('compute_method', None) | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
self._compute_method = self._compute_method.lower() | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
self.__check_graphs(self._graphs) | |||||
self.__add_dummy_labels(self._graphs) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._check_graphs(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
if not self._ds_infos['directed']: # convert | |||||
self._graphs = [G.to_directed() for G in self._graphs] | self._graphs = [G.to_directed() for G in self._graphs] | ||||
# compute Gram matrix. | # compute Gram matrix. | ||||
@@ -51,15 +51,15 @@ class CommonWalk(GraphKernel): | |||||
iterator = itr | iterator = itr | ||||
# direct product graph method - exponential | # direct product graph method - exponential | ||||
if self.__compute_method == 'exp': | |||||
if self._compute_method == 'exp': | |||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) | |||||
kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
# direct product graph method - geometric | # direct product graph method - geometric | ||||
elif self.__compute_method == 'geo': | |||||
elif self._compute_method == 'geo': | |||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) | |||||
kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -67,9 +67,9 @@ class CommonWalk(GraphKernel): | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__check_graphs(self._graphs) | |||||
self.__add_dummy_labels(self._graphs) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._check_graphs(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
if not self._ds_infos['directed']: # convert | |||||
self._graphs = [G.to_directed() for G in self._graphs] | self._graphs = [G.to_directed() for G in self._graphs] | ||||
# compute Gram matrix. | # compute Gram matrix. | ||||
@@ -80,10 +80,10 @@ class CommonWalk(GraphKernel): | |||||
# G_gn = gn_toshare | # G_gn = gn_toshare | ||||
# direct product graph method - exponential | # direct product graph method - exponential | ||||
if self.__compute_method == 'exp': | |||||
if self._compute_method == 'exp': | |||||
do_fun = self._wrapper_kernel_do_exp | do_fun = self._wrapper_kernel_do_exp | ||||
# direct product graph method - geometric | # direct product graph method - geometric | ||||
elif self.__compute_method == 'geo': | |||||
elif self._compute_method == 'geo': | |||||
do_fun = self._wrapper_kernel_do_geo | do_fun = self._wrapper_kernel_do_geo | ||||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | ||||
@@ -93,9 +93,9 @@ class CommonWalk(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
self.__check_graphs(g_list + [g1]) | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._check_graphs(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
if not self._ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
g_list = [G.to_directed() for G in g_list] | g_list = [G.to_directed() for G in g_list] | ||||
@@ -107,23 +107,23 @@ class CommonWalk(GraphKernel): | |||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
# direct product graph method - exponential | # direct product graph method - exponential | ||||
if self.__compute_method == 'exp': | |||||
if self._compute_method == 'exp': | |||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) | |||||
kernel = self._kernel_do_exp(g1, g_list[i], self._weight) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
# direct product graph method - geometric | # direct product graph method - geometric | ||||
elif self.__compute_method == 'geo': | |||||
elif self._compute_method == 'geo': | |||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) | |||||
kernel = self._kernel_do_geo(g1, g_list[i], self._weight) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__check_graphs(g_list + [g1]) | |||||
self.__add_dummy_labels(g_list + [g1]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._check_graphs(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
if not self._ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
g_list = [G.to_directed() for G in g_list] | g_list = [G.to_directed() for G in g_list] | ||||
@@ -136,10 +136,10 @@ class CommonWalk(GraphKernel): | |||||
# G_g_list = g_list_toshare | # G_g_list = g_list_toshare | ||||
# direct product graph method - exponential | # direct product graph method - exponential | ||||
if self.__compute_method == 'exp': | |||||
if self._compute_method == 'exp': | |||||
do_fun = self._wrapper_kernel_list_do_exp | do_fun = self._wrapper_kernel_list_do_exp | ||||
# direct product graph method - geometric | # direct product graph method - geometric | ||||
elif self.__compute_method == 'geo': | |||||
elif self._compute_method == 'geo': | |||||
do_fun = self._wrapper_kernel_list_do_geo | do_fun = self._wrapper_kernel_list_do_geo | ||||
def func_assign(result, var_to_assign): | def func_assign(result, var_to_assign): | ||||
@@ -154,31 +154,31 @@ class CommonWalk(GraphKernel): | |||||
def _wrapper_kernel_list_do_exp(self, itr): | def _wrapper_kernel_list_do_exp(self, itr): | ||||
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) | |||||
return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight) | |||||
def _wrapper_kernel_list_do_geo(self, itr): | def _wrapper_kernel_list_do_geo(self, itr): | ||||
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) | |||||
return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
self.__check_graphs([g1] + [g2]) | |||||
self.__add_dummy_labels([g1] + [g2]) | |||||
if not self.__ds_infos['directed']: # convert | |||||
self._check_graphs([g1] + [g2]) | |||||
self._add_dummy_labels([g1] + [g2]) | |||||
if not self._ds_infos['directed']: # convert | |||||
g1 = g1.to_directed() | g1 = g1.to_directed() | ||||
g2 = g2.to_directed() | g2 = g2.to_directed() | ||||
# direct product graph method - exponential | # direct product graph method - exponential | ||||
if self.__compute_method == 'exp': | |||||
kernel = self.__kernel_do_exp(g1, g2, self.__weight) | |||||
if self._compute_method == 'exp': | |||||
kernel = self._kernel_do_exp(g1, g2, self._weight) | |||||
# direct product graph method - geometric | # direct product graph method - geometric | ||||
elif self.__compute_method == 'geo': | |||||
kernel = self.__kernel_do_geo(g1, g2, self.__weight) | |||||
elif self._compute_method == 'geo': | |||||
kernel = self._kernel_do_geo(g1, g2, self._weight) | |||||
return kernel | return kernel | ||||
def __kernel_do_exp(self, g1, g2, beta): | |||||
def _kernel_do_exp(self, g1, g2, beta): | |||||
"""Compute common walk graph kernel between 2 graphs using exponential | """Compute common walk graph kernel between 2 graphs using exponential | ||||
series. | series. | ||||
@@ -195,7 +195,7 @@ class CommonWalk(GraphKernel): | |||||
The common walk Kernel between 2 graphs. | The common walk Kernel between 2 graphs. | ||||
""" | """ | ||||
# get tensor product / direct product | # get tensor product / direct product | ||||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||||
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) | |||||
# return 0 if the direct product graph have no more than 1 node. | # return 0 if the direct product graph have no more than 1 node. | ||||
if nx.number_of_nodes(gp) < 2: | if nx.number_of_nodes(gp) < 2: | ||||
return 0 | return 0 | ||||
@@ -227,10 +227,10 @@ class CommonWalk(GraphKernel): | |||||
def _wrapper_kernel_do_exp(self, itr): | def _wrapper_kernel_do_exp(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) | |||||
return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight) | |||||
def __kernel_do_geo(self, g1, g2, gamma): | |||||
def _kernel_do_geo(self, g1, g2, gamma): | |||||
"""Compute common walk graph kernel between 2 graphs using geometric | """Compute common walk graph kernel between 2 graphs using geometric | ||||
series. | series. | ||||
@@ -247,7 +247,7 @@ class CommonWalk(GraphKernel): | |||||
The common walk Kernel between 2 graphs. | The common walk Kernel between 2 graphs. | ||||
""" | """ | ||||
# get tensor product / direct product | # get tensor product / direct product | ||||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||||
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) | |||||
# return 0 if the direct product graph have no more than 1 node. | # return 0 if the direct product graph have no more than 1 node. | ||||
if nx.number_of_nodes(gp) < 2: | if nx.number_of_nodes(gp) < 2: | ||||
return 0 | return 0 | ||||
@@ -262,24 +262,24 @@ class CommonWalk(GraphKernel): | |||||
def _wrapper_kernel_do_geo(self, itr): | def _wrapper_kernel_do_geo(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) | |||||
return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight) | |||||
def __check_graphs(self, Gn): | |||||
def _check_graphs(self, Gn): | |||||
for g in Gn: | for g in Gn: | ||||
if nx.number_of_nodes(g) == 1: | if nx.number_of_nodes(g) == 1: | ||||
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | ||||
def __add_dummy_labels(self, Gn): | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
def _add_dummy_labels(self, Gn): | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__edge_labels = [SpecialLabel.DUMMY] | |||||
self._edge_labels = [SpecialLabel.DUMMY] | |||||
def _init_worker_gm(gn_toshare): | def _init_worker_gm(gn_toshare): | ||||
@@ -60,7 +60,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda) | |||||
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -127,7 +127,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do(g1, g_list[i], lmda) | |||||
kernel = self._kernel_do(g1, g_list[i], lmda) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
else: # @todo | else: # @todo | ||||
@@ -190,7 +190,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | ||||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
kernel = self.__kernel_do(g1, g2, lmda) | |||||
kernel = self._kernel_do(g1, g2, lmda) | |||||
else: # @todo | else: # @todo | ||||
pass | pass | ||||
@@ -198,7 +198,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
return kernel | return kernel | ||||
def __kernel_do(self, g1, g2, lmda): | |||||
def _kernel_do(self, g1, g2, lmda): | |||||
# Frist, compute kernels between all pairs of nodes using the method borrowed | # Frist, compute kernels between all pairs of nodes using the method borrowed | ||||
# from FCSP. It is faster than directly computing all edge kernels | # from FCSP. It is faster than directly computing all edge kernels | ||||
@@ -222,7 +222,7 @@ class ConjugateGradient(RandomWalkMeta): | |||||
def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight) | |||||
return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | |||||
def _func_fp(x, p_times, lmda, w_times): | def _func_fp(x, p_times, lmda, w_times): | ||||
@@ -246,19 +246,19 @@ class ConjugateGradient(RandomWalkMeta): | |||||
# Define edge kernels. | # Define edge kernels. | ||||
def compute_ek_11(e1, e2, ke): | def compute_ek_11(e1, e2, ke): | ||||
e1_labels = [e1[2][el] for el in self._edge_labels] | e1_labels = [e1[2][el] for el in self._edge_labels] | ||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ||||
def compute_ek_10(e1, e2, ke): | def compute_ek_10(e1, e2, ke): | ||||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
return ke(e1_labels, e2_labels) | return ke(e1_labels, e2_labels) | ||||
def compute_ek_01(e1, e2, ke): | def compute_ek_01(e1, e2, ke): | ||||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||||
return ke(e1_attrs, e2_attrs) | return ke(e1_attrs, e2_attrs) | ||||
def compute_ek_00(e1, e2, ke): | def compute_ek_00(e1, e2, ke): | ||||
@@ -60,7 +60,7 @@ class FixedPoint(RandomWalkMeta): | |||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j], lmda) | |||||
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -127,7 +127,7 @@ class FixedPoint(RandomWalkMeta): | |||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do(g1, g_list[i], lmda) | |||||
kernel = self._kernel_do(g1, g_list[i], lmda) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
else: # @todo | else: # @todo | ||||
@@ -190,7 +190,7 @@ class FixedPoint(RandomWalkMeta): | |||||
g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | g2 = nx.convert_node_labels_to_integers(g2, first_label=0, label_attribute='label_orignal') | ||||
if self._p is None and self._q is None: # p and q are uniform distributions as default. | if self._p is None and self._q is None: # p and q are uniform distributions as default. | ||||
kernel = self.__kernel_do(g1, g2, lmda) | |||||
kernel = self._kernel_do(g1, g2, lmda) | |||||
else: # @todo | else: # @todo | ||||
pass | pass | ||||
@@ -198,7 +198,7 @@ class FixedPoint(RandomWalkMeta): | |||||
return kernel | return kernel | ||||
def __kernel_do(self, g1, g2, lmda): | |||||
def _kernel_do(self, g1, g2, lmda): | |||||
# Frist, compute kernels between all pairs of nodes using the method borrowed | # Frist, compute kernels between all pairs of nodes using the method borrowed | ||||
# from FCSP. It is faster than directly computing all edge kernels | # from FCSP. It is faster than directly computing all edge kernels | ||||
@@ -221,10 +221,10 @@ class FixedPoint(RandomWalkMeta): | |||||
def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do(G_gn[i], G_gn[j], self._weight) | |||||
return i, j, self._kernel_do(G_gn[i], G_gn[j], self._weight) | |||||
def _func_fp(x, p_times, lmda, w_times): | |||||
def _func_fp(self, x, p_times, lmda, w_times): | |||||
haha = w_times * x | haha = w_times * x | ||||
haha = lmda * haha | haha = lmda * haha | ||||
haha = p_times + haha | haha = p_times + haha | ||||
@@ -245,19 +245,19 @@ class FixedPoint(RandomWalkMeta): | |||||
# Define edge kernels. | # Define edge kernels. | ||||
def compute_ek_11(e1, e2, ke): | def compute_ek_11(e1, e2, ke): | ||||
e1_labels = [e1[2][el] for el in self._edge_labels] | e1_labels = [e1[2][el] for el in self._edge_labels] | ||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | ||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | ||||
return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ||||
def compute_ek_10(e1, e2, ke): | def compute_ek_10(e1, e2, ke): | ||||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
return ke(e1_labels, e2_labels) | return ke(e1_labels, e2_labels) | ||||
def compute_ek_01(e1, e2, ke): | def compute_ek_01(e1, e2, ke): | ||||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||||
return ke(e1_attrs, e2_attrs) | return ke(e1_attrs, e2_attrs) | ||||
def compute_ek_00(e1, e2, ke): | def compute_ek_00(e1, e2, ke): | ||||
@@ -37,7 +37,7 @@ class GraphKernel(object): | |||||
raise Exception('The graph list given is empty. No computation was performed.') | raise Exception('The graph list given is empty. No computation was performed.') | ||||
else: | else: | ||||
self._graphs = [g.copy() for g in graphs[0]] | self._graphs = [g.copy() for g in graphs[0]] | ||||
self._gram_matrix = self.__compute_gram_matrix() | |||||
self._gram_matrix = self._compute_gram_matrix() | |||||
self._gram_matrix_unnorm = np.copy(self._gram_matrix) | self._gram_matrix_unnorm = np.copy(self._gram_matrix) | ||||
if self._normalize: | if self._normalize: | ||||
self._gram_matrix = self.normalize_gm(self._gram_matrix) | self._gram_matrix = self.normalize_gm(self._gram_matrix) | ||||
@@ -45,17 +45,17 @@ class GraphKernel(object): | |||||
elif len(graphs) == 2: | elif len(graphs) == 2: | ||||
if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): | if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): | ||||
kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy()) | |||||
kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy()) | |||||
return kernel, self._run_time | return kernel, self._run_time | ||||
elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): | elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): | ||||
g1 = graphs[0].copy() | g1 = graphs[0].copy() | ||||
g_list = [g.copy() for g in graphs[1]] | g_list = [g.copy() for g in graphs[1]] | ||||
kernel_list = self.__compute_kernel_list(g1, g_list) | |||||
kernel_list = self._compute_kernel_list(g1, g_list) | |||||
return kernel_list, self._run_time | return kernel_list, self._run_time | ||||
elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): | elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): | ||||
g1 = graphs[1].copy() | g1 = graphs[1].copy() | ||||
g_list = [g.copy() for g in graphs[0]] | g_list = [g.copy() for g in graphs[0]] | ||||
kernel_list = self.__compute_kernel_list(g1, g_list) | |||||
kernel_list = self._compute_kernel_list(g1, g_list) | |||||
return kernel_list, self._run_time | return kernel_list, self._run_time | ||||
else: | else: | ||||
raise Exception('Cannot detect graphs.') | raise Exception('Cannot detect graphs.') | ||||
@@ -99,7 +99,7 @@ class GraphKernel(object): | |||||
return dis_mat, dis_max, dis_min, dis_mean | return dis_mat, dis_max, dis_min, dis_mean | ||||
def __compute_gram_matrix(self): | |||||
def _compute_gram_matrix(self): | |||||
start_time = time.time() | start_time = time.time() | ||||
if self._parallel == 'imap_unordered': | if self._parallel == 'imap_unordered': | ||||
@@ -125,7 +125,7 @@ class GraphKernel(object): | |||||
pass | pass | ||||
def __compute_kernel_list(self, g1, g_list): | |||||
def _compute_kernel_list(self, g1, g_list): | |||||
start_time = time.time() | start_time = time.time() | ||||
if self._parallel == 'imap_unordered': | if self._parallel == 'imap_unordered': | ||||
@@ -151,7 +151,7 @@ class GraphKernel(object): | |||||
pass | pass | ||||
def __compute_single_kernel(self, g1, g2): | |||||
def _compute_single_kernel(self, g1, g2): | |||||
start_time = time.time() | start_time = time.time() | ||||
kernel = self._compute_single_kernel_series(g1, g2) | kernel = self._compute_single_kernel_series(g1, g2) | ||||
@@ -33,25 +33,25 @@ class Marginalized(GraphKernel): | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__p_quit = kwargs.get('p_quit', 0.5) | |||||
self.__n_iteration = kwargs.get('n_iteration', 10) | |||||
self.__remove_totters = kwargs.get('remove_totters', False) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self.__n_iteration = int(self.__n_iteration) | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._p_quit = kwargs.get('p_quit', 0.5) | |||||
self._n_iteration = kwargs.get('n_iteration', 10) | |||||
self._remove_totters = kwargs.get('remove_totters', False) | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
self._n_iteration = int(self._n_iteration) | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
if self.__remove_totters: | |||||
if self._remove_totters: | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) | iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) | ||||
else: | else: | ||||
iterator = self._graphs | iterator = self._graphs | ||||
# @todo: this may not work. | # @todo: this may not work. | ||||
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||||
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
@@ -63,7 +63,7 @@ class Marginalized(GraphKernel): | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) | |||||
kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel # @todo: no directed graph considered? | gram_matrix[j][i] = kernel # @todo: no directed graph considered? | ||||
@@ -71,9 +71,9 @@ class Marginalized(GraphKernel): | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
if self.__remove_totters: | |||||
if self._remove_totters: | |||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = range(0, len(self._graphs)) | itr = range(0, len(self._graphs)) | ||||
if len(self._graphs) < 100 * self._n_jobs: | if len(self._graphs) < 100 * self._n_jobs: | ||||
@@ -105,16 +105,16 @@ class Marginalized(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
if self.__remove_totters: | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
if self._remove_totters: | |||||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | ||||
else: | else: | ||||
iterator = g_list | iterator = g_list | ||||
# @todo: this may not work. | # @todo: this may not work. | ||||
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||||
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
@@ -123,17 +123,17 @@ class Marginalized(GraphKernel): | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do(g1, g_list[i]) | |||||
kernel = self._kernel_do(g1, g_list[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
if self.__remove_totters: | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
if self._remove_totters: | |||||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = range(0, len(g_list)) | itr = range(0, len(g_list)) | ||||
if len(g_list) < 100 * self._n_jobs: | if len(g_list) < 100 * self._n_jobs: | ||||
@@ -171,19 +171,19 @@ class Marginalized(GraphKernel): | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
return itr, self.__kernel_do(G_g1, G_g_list[itr]) | |||||
return itr, self._kernel_do(G_g1, G_g_list[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
self.__add_dummy_labels([g1] + [g2]) | |||||
if self.__remove_totters: | |||||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) | |||||
kernel = self.__kernel_do(g1, g2) | |||||
self._add_dummy_labels([g1] + [g2]) | |||||
if self._remove_totters: | |||||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||||
g2 = untotterTransformation(g2, self._node_labels, self._edge_labels) | |||||
kernel = self._kernel_do(g1, g2) | |||||
return kernel | return kernel | ||||
def __kernel_do(self, g1, g2): | |||||
def _kernel_do(self, g1, g2): | |||||
"""Compute marginalized graph kernel between 2 graphs. | """Compute marginalized graph kernel between 2 graphs. | ||||
Parameters | Parameters | ||||
@@ -205,7 +205,7 @@ class Marginalized(GraphKernel): | |||||
p_init_G1 = 1 / num_nodes_G1 | p_init_G1 = 1 / num_nodes_G1 | ||||
p_init_G2 = 1 / num_nodes_G2 | p_init_G2 = 1 / num_nodes_G2 | ||||
q = self.__p_quit * self.__p_quit | |||||
q = self._p_quit * self._p_quit | |||||
r1 = q | r1 = q | ||||
# # initial R_inf | # # initial R_inf | ||||
@@ -260,36 +260,36 @@ class Marginalized(GraphKernel): | |||||
if len(g2[node2]) > 0: | if len(g2[node2]) > 0: | ||||
R_inf[(node1, node2)] = r1 | R_inf[(node1, node2)] = r1 | ||||
else: | else: | ||||
R_inf[(node1, node2)] = self.__p_quit | |||||
R_inf[(node1, node2)] = self._p_quit | |||||
else: | else: | ||||
if len(g2[node2]) > 0: | if len(g2[node2]) > 0: | ||||
R_inf[(node1, node2)] = self.__p_quit | |||||
R_inf[(node1, node2)] = self._p_quit | |||||
else: | else: | ||||
R_inf[(node1, node2)] = 1 | R_inf[(node1, node2)] = 1 | ||||
# compute all transition probability first. | # compute all transition probability first. | ||||
t_dict = {} | t_dict = {} | ||||
if self.__n_iteration > 1: | |||||
if self._n_iteration > 1: | |||||
for node1 in g1.nodes(): | for node1 in g1.nodes(): | ||||
neighbor_n1 = g1[node1] | neighbor_n1 = g1[node1] | ||||
# the transition probability distribution in the random walks | # the transition probability distribution in the random walks | ||||
# generating step (uniform distribution over the vertices adjacent | # generating step (uniform distribution over the vertices adjacent | ||||
# to the current vertex) | # to the current vertex) | ||||
if len(neighbor_n1) > 0: | if len(neighbor_n1) > 0: | ||||
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) | |||||
p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1) | |||||
for node2 in g2.nodes(): | for node2 in g2.nodes(): | ||||
neighbor_n2 = g2[node2] | neighbor_n2 = g2[node2] | ||||
if len(neighbor_n2) > 0: | if len(neighbor_n2) > 0: | ||||
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) | |||||
p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2) | |||||
for neighbor1 in neighbor_n1: | for neighbor1 in neighbor_n1: | ||||
for neighbor2 in neighbor_n2: | for neighbor2 in neighbor_n2: | ||||
t_dict[(node1, node2, neighbor1, neighbor2)] = \ | t_dict[(node1, node2, neighbor1, neighbor2)] = \ | ||||
p_trans_n1 * p_trans_n2 * \ | p_trans_n1 * p_trans_n2 * \ | ||||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | |||||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | |||||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \ | |||||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels)) | |||||
# Compute R_inf with a simple interative method | # Compute R_inf with a simple interative method | ||||
for i in range(2, self.__n_iteration + 1): | |||||
for i in range(2, self._n_iteration + 1): | |||||
R_inf_old = R_inf.copy() | R_inf_old = R_inf.copy() | ||||
# Compute R_inf for each pair of nodes | # Compute R_inf for each pair of nodes | ||||
@@ -311,7 +311,7 @@ class Marginalized(GraphKernel): | |||||
# add elements of R_inf up and compute kernel. | # add elements of R_inf up and compute kernel. | ||||
for (n1, n2), value in R_inf.items(): | for (n1, n2), value in R_inf.items(): | ||||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | |||||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels)) | |||||
kernel += s * value # ref [1] equation (6) | kernel += s * value # ref [1] equation (6) | ||||
return kernel | return kernel | ||||
@@ -320,19 +320,19 @@ class Marginalized(GraphKernel): | |||||
def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do(G_gn[i], G_gn[j]) | |||||
return i, j, self._kernel_do(G_gn[i], G_gn[j]) | |||||
def _wrapper_untotter(self, i): | def _wrapper_untotter(self, i): | ||||
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||||
return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work. | |||||
def __add_dummy_labels(self, Gn): | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
def _add_dummy_labels(self, Gn): | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__edge_labels = [SpecialLabel.DUMMY] | |||||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__depth = int(kwargs.get('depth', 10)) | |||||
self.__k_func = kwargs.get('k_func', 'MinMax') | |||||
self.__compute_method = kwargs.get('compute_method', 'trie') | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._depth = int(kwargs.get('depth', 10)) | |||||
self._k_func = kwargs.get('k_func', 'MinMax') | |||||
self._compute_method = kwargs.get('compute_method', 'trie') | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
if self.__compute_method == 'trie': | |||||
all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] | |||||
if self._compute_method == 'trie': | |||||
all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] | |||||
for i, j in iterator_kernel: | for i, j in iterator_kernel: | ||||
kernel = self.__kernel_do_trie(all_paths[i], all_paths[j]) | |||||
kernel = self._kernel_do_trie(all_paths[i], all_paths[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
else: | else: | ||||
all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] | |||||
all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] | |||||
for i, j in iterator_kernel: | for i, j in iterator_kernel: | ||||
kernel = self.__kernel_do_naive(all_paths[i], all_paths[j]) | |||||
kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
# get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
# but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
all_paths = [[] for _ in range(len(self._graphs))] | all_paths = [[] for _ in range(len(self._graphs))] | ||||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||||
if self._compute_method == 'trie' and self._k_func is not None: | |||||
get_ps_fun = self._wrapper_find_all_path_as_trie | get_ps_fun = self._wrapper_find_all_path_as_trie | ||||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||||
elif self._compute_method != 'trie' and self._k_func is not None: | |||||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | ||||
else: | else: | ||||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | ||||
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||||
if self._compute_method == 'trie' and self._k_func is not None: | |||||
def init_worker(trie_toshare): | def init_worker(trie_toshare): | ||||
global G_trie | global G_trie | ||||
G_trie = trie_toshare | G_trie = trie_toshare | ||||
do_fun = self._wrapper_kernel_do_trie | do_fun = self._wrapper_kernel_do_trie | ||||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||||
elif self._compute_method != 'trie' and self._k_func is not None: | |||||
def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
global G_plist | global G_plist | ||||
G_plist = plist_toshare | G_plist = plist_toshare | ||||
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
global G_plist | global G_plist | ||||
G_plist = plist_toshare | G_plist = plist_toshare | ||||
do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this? | |||||
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | |||||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | ||||
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | ||||
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self.__compute_method == 'trie': | |||||
paths_g1 = self.__find_all_path_as_trie(g1) | |||||
paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps] | |||||
if self._compute_method == 'trie': | |||||
paths_g1 = self._find_all_path_as_trie(g1) | |||||
paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps] | |||||
for i in iterator_kernel: | for i in iterator_kernel: | ||||
kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i]) | |||||
kernel = self._kernel_do_trie(paths_g1, paths_g_list[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
else: | else: | ||||
paths_g1 = self.__find_all_paths_until_length(g1) | |||||
paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps] | |||||
paths_g1 = self._find_all_paths_until_length(g1) | |||||
paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps] | |||||
for i in iterator_kernel: | for i in iterator_kernel: | ||||
kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i]) | |||||
kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
# get all paths of all graphs before computing kernels to save time, | # get all paths of all graphs before computing kernels to save time, | ||||
# but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
paths_g_list = [[] for _ in range(len(g_list))] | paths_g_list = [[] for _ in range(len(g_list))] | ||||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||||
paths_g1 = self.__find_all_path_as_trie(g1) | |||||
if self._compute_method == 'trie' and self._k_func is not None: | |||||
paths_g1 = self._find_all_path_as_trie(g1) | |||||
get_ps_fun = self._wrapper_find_all_path_as_trie | get_ps_fun = self._wrapper_find_all_path_as_trie | ||||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||||
paths_g1 = self.__find_all_paths_until_length(g1) | |||||
elif self._compute_method != 'trie' and self._k_func is not None: | |||||
paths_g1 = self._find_all_paths_until_length(g1) | |||||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | ||||
else: | else: | ||||
paths_g1 = self.__find_all_paths_until_length(g1) | |||||
paths_g1 = self._find_all_paths_until_length(g1) | |||||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), | iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), | ||||
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||||
return itr, self.__kernel_do_trie(G_p1, G_plist[itr]) | |||||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||||
return itr, self.__kernel_do_naive(G_p1, G_plist[itr]) | |||||
if self._compute_method == 'trie' and self._k_func is not None: | |||||
return itr, self._kernel_do_trie(G_p1, G_plist[itr]) | |||||
elif self._compute_method != 'trie' and self._k_func is not None: | |||||
return itr, self._kernel_do_naive(G_p1, G_plist[itr]) | |||||
else: | else: | ||||
return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr]) | |||||
return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
self.__add_dummy_labels([g1] + [g2]) | |||||
if self.__compute_method == 'trie': | |||||
paths_g1 = self.__find_all_path_as_trie(g1) | |||||
paths_g2 = self.__find_all_path_as_trie(g2) | |||||
kernel = self.__kernel_do_trie(paths_g1, paths_g2) | |||||
self._add_dummy_labels([g1] + [g2]) | |||||
if self._compute_method == 'trie': | |||||
paths_g1 = self._find_all_path_as_trie(g1) | |||||
paths_g2 = self._find_all_path_as_trie(g2) | |||||
kernel = self._kernel_do_trie(paths_g1, paths_g2) | |||||
else: | else: | ||||
paths_g1 = self.__find_all_paths_until_length(g1) | |||||
paths_g2 = self.__find_all_paths_until_length(g2) | |||||
kernel = self.__kernel_do_naive(paths_g1, paths_g2) | |||||
paths_g1 = self._find_all_paths_until_length(g1) | |||||
paths_g2 = self._find_all_paths_until_length(g2) | |||||
kernel = self._kernel_do_naive(paths_g1, paths_g2) | |||||
return kernel | return kernel | ||||
def __kernel_do_trie(self, trie1, trie2): | |||||
def _kernel_do_trie(self, trie1, trie2): | |||||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | """Compute path graph kernels up to depth d between 2 graphs using trie. | ||||
Parameters | Parameters | ||||
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
kernel : float | kernel : float | ||||
Path kernel up to h between 2 graphs. | Path kernel up to h between 2 graphs. | ||||
""" | """ | ||||
if self.__k_func == 'tanimoto': | |||||
if self._k_func == 'tanimoto': | |||||
# traverse all paths in graph1 and search them in graph2. Deep-first | # traverse all paths in graph1 and search them in graph2. Deep-first | ||||
# search is applied. | # search is applied. | ||||
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | ||||
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# print(setlist) | # print(setlist) | ||||
kernel = setlist[0] / setlist[1] | kernel = setlist[0] / setlist[1] | ||||
elif self.__k_func == 'MinMax': # MinMax kernel | |||||
elif self._k_func == 'MinMax': # MinMax kernel | |||||
# traverse all paths in graph1 and search them in graph2. Deep-first | # traverse all paths in graph1 and search them in graph2. Deep-first | ||||
# search is applied. | # search is applied. | ||||
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | ||||
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _wrapper_kernel_do_trie(self, itr): | def _wrapper_kernel_do_trie(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j]) | |||||
return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) | |||||
def __kernel_do_naive(self, paths1, paths2): | |||||
def _kernel_do_naive(self, paths1, paths2): | |||||
"""Compute path graph kernels up to depth d between 2 graphs naively. | """Compute path graph kernels up to depth d between 2 graphs naively. | ||||
Parameters | Parameters | ||||
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
""" | """ | ||||
all_paths = list(set(paths1 + paths2)) | all_paths = list(set(paths1 + paths2)) | ||||
if self.__k_func == 'tanimoto': | |||||
if self._k_func == 'tanimoto': | |||||
length_union = len(set(paths1 + paths2)) | length_union = len(set(paths1 + paths2)) | ||||
kernel = (len(set(paths1)) + len(set(paths2)) - | kernel = (len(set(paths1)) + len(set(paths2)) - | ||||
length_union) / length_union | length_union) / length_union | ||||
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# kernel_uv = np.dot(vector1, vector2) | # kernel_uv = np.dot(vector1, vector2) | ||||
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | ||||
elif self.__k_func == 'MinMax': # MinMax kernel | |||||
elif self._k_func == 'MinMax': # MinMax kernel | |||||
path_count1 = Counter(paths1) | path_count1 = Counter(paths1) | ||||
path_count2 = Counter(paths2) | path_count2 = Counter(paths2) | ||||
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | ||||
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
kernel = np.sum(np.minimum(vector1, vector2)) / \ | kernel = np.sum(np.minimum(vector1, vector2)) / \ | ||||
np.sum(np.maximum(vector1, vector2)) | np.sum(np.maximum(vector1, vector2)) | ||||
elif self.__k_func is None: # no sub-kernel used; compare paths directly. | |||||
elif self._k_func is None: # no sub-kernel used; compare paths directly. | |||||
path_count1 = Counter(paths1) | path_count1 = Counter(paths1) | ||||
path_count2 = Counter(paths2) | path_count2 = Counter(paths2) | ||||
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | ||||
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _wrapper_kernel_do_naive(self, itr): | def _wrapper_kernel_do_naive(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j]) | |||||
return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) | |||||
def __find_all_path_as_trie(self, G): | |||||
def _find_all_path_as_trie(self, G): | |||||
# all_path = find_all_paths_until_length(G, length, ds_attrs, | # all_path = find_all_paths_until_length(G, length, ds_attrs, | ||||
# node_label=node_label, | # node_label=node_label, | ||||
# edge_label=edge_label) | # edge_label=edge_label) | ||||
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# them. Deep-first search is applied. Notice the reverse of each path is | # them. Deep-first search is applied. Notice the reverse of each path is | ||||
# also stored to the trie. | # also stored to the trie. | ||||
def traverseGraph(root, ptrie, G, pcurrent=[]): | def traverseGraph(root, ptrie, G, pcurrent=[]): | ||||
if len(pcurrent) < self.__depth + 1: | |||||
if len(pcurrent) < self._depth + 1: | |||||
for neighbor in G[root]: | for neighbor in G[root]: | ||||
if neighbor not in pcurrent: | if neighbor not in pcurrent: | ||||
pcurrent.append(neighbor) | pcurrent.append(neighbor) | ||||
plstr = self.__paths2labelseqs([pcurrent], G) | |||||
plstr = self._paths2labelseqs([pcurrent], G) | |||||
ptrie.insertWord(plstr[0]) | ptrie.insertWord(plstr[0]) | ||||
traverseGraph(neighbor, ptrie, G, pcurrent) | traverseGraph(neighbor, ptrie, G, pcurrent) | ||||
del pcurrent[-1] | del pcurrent[-1] | ||||
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
ptrie = Trie() | ptrie = Trie() | ||||
path_l = [[n] for n in G.nodes] # paths of length l | path_l = [[n] for n in G.nodes] # paths of length l | ||||
path_l_str = self.__paths2labelseqs(path_l, G) | |||||
path_l_str = self._paths2labelseqs(path_l, G) | |||||
for p in path_l_str: | for p in path_l_str: | ||||
ptrie.insertWord(p) | ptrie.insertWord(p) | ||||
for n in G.nodes: | for n in G.nodes: | ||||
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def _wrapper_find_all_path_as_trie(self, itr_item): | def _wrapper_find_all_path_as_trie(self, itr_item): | ||||
g = itr_item[0] | g = itr_item[0] | ||||
i = itr_item[1] | i = itr_item[1] | ||||
return i, self.__find_all_path_as_trie(g) | |||||
return i, self._find_all_path_as_trie(g) | |||||
# @todo: (can be removed maybe) this method find paths repetively, it could be faster. | # @todo: (can be removed maybe) this method find paths repetively, it could be faster. | ||||
def __find_all_paths_until_length(self, G, tolabelseqs=True): | |||||
def _find_all_paths_until_length(self, G, tolabelseqs=True): | |||||
"""Find all paths no longer than a certain maximum length in a graph. A | """Find all paths no longer than a certain maximum length in a graph. A | ||||
recursive depth first search is applied. | recursive depth first search is applied. | ||||
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
""" | """ | ||||
# path_l = [tuple([n]) for n in G.nodes] # paths of length l | # path_l = [tuple([n]) for n in G.nodes] # paths of length l | ||||
# all_paths = path_l[:] | # all_paths = path_l[:] | ||||
# for l in range(1, self.__depth + 1): | |||||
# for l in range(1, self._depth + 1): | |||||
# path_l_new = [] | # path_l_new = [] | ||||
# for path in path_l: | # for path in path_l: | ||||
# for neighbor in G[path[-1]]: | # for neighbor in G[path[-1]]: | ||||
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
path_l = [[n] for n in G.nodes] # paths of length l | path_l = [[n] for n in G.nodes] # paths of length l | ||||
all_paths = [p.copy() for p in path_l] | all_paths = [p.copy() for p in path_l] | ||||
for l in range(1, self.__depth + 1): | |||||
for l in range(1, self._depth + 1): | |||||
path_lplus1 = [] | path_lplus1 = [] | ||||
for path in path_l: | for path in path_l: | ||||
for neighbor in G[path[-1]]: | for neighbor in G[path[-1]]: | ||||
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
all_paths += path_lplus1 | all_paths += path_lplus1 | ||||
path_l = [p.copy() for p in path_lplus1] | path_l = [p.copy() for p in path_lplus1] | ||||
# for i in range(0, self.__depth + 1): | |||||
# for i in range(0, self._depth + 1): | |||||
# new_paths = find_all_paths(G, i) | # new_paths = find_all_paths(G, i) | ||||
# if new_paths == []: | # if new_paths == []: | ||||
# break | # break | ||||
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# consider labels | # consider labels | ||||
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | ||||
# print() | # print() | ||||
return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | |||||
return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | |||||
def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): | def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): | ||||
g = itr_item[0] | g = itr_item[0] | ||||
i = itr_item[1] | i = itr_item[1] | ||||
return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | |||||
return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | |||||
def __paths2labelseqs(self, plist, G): | |||||
if len(self.__node_labels) > 0: | |||||
if len(self.__edge_labels) > 0: | |||||
def _paths2labelseqs(self, plist, G): | |||||
if len(self._node_labels) > 0: | |||||
if len(self._edge_labels) > 0: | |||||
path_strs = [] | path_strs = [] | ||||
for path in plist: | for path in plist: | ||||
pths_tmp = [] | pths_tmp = [] | ||||
for idx, node in enumerate(path[:-1]): | for idx, node in enumerate(path[:-1]): | ||||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) | |||||
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels)) | |||||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) | |||||
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels)) | |||||
path_strs.append(tuple(pths_tmp)) | path_strs.append(tuple(pths_tmp)) | ||||
else: | else: | ||||
path_strs = [] | path_strs = [] | ||||
for path in plist: | for path in plist: | ||||
pths_tmp = [] | pths_tmp = [] | ||||
for node in path: | for node in path: | ||||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||||
path_strs.append(tuple(pths_tmp)) | path_strs.append(tuple(pths_tmp)) | ||||
return path_strs | return path_strs | ||||
else: | else: | ||||
if len(self.__edge_labels) > 0: | |||||
if len(self._edge_labels) > 0: | |||||
path_strs = [] | path_strs = [] | ||||
for path in plist: | for path in plist: | ||||
if len(path) == 1: | if len(path) == 1: | ||||
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
else: | else: | ||||
pths_tmp = [] | pths_tmp = [] | ||||
for idx, node in enumerate(path[:-1]): | for idx, node in enumerate(path[:-1]): | ||||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) | |||||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) | |||||
path_strs.append(tuple(pths_tmp)) | path_strs.append(tuple(pths_tmp)) | ||||
return path_strs | return path_strs | ||||
else: | else: | ||||
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
# return [tuple([len(path)]) for path in all_paths] | # return [tuple([len(path)]) for path in all_paths] | ||||
def __add_dummy_labels(self, Gn): | |||||
if self.__k_func is not None: | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
def _add_dummy_labels(self, Gn): | |||||
if self._k_func is not None: | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__edge_labels = [SpecialLabel.DUMMY] | |||||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel): | |||||
def _add_dummy_labels(self, Gn): | def _add_dummy_labels(self, Gn): | ||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__edge_labels = [SpecialLabel.DUMMY] | |||||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -26,11 +26,11 @@ class ShortestPath(GraphKernel): | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__node_attrs = kwargs.get('node_attrs', []) | |||||
self.__edge_weight = kwargs.get('edge_weight', None) | |||||
self.__node_kernels = kwargs.get('node_kernels', None) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._node_attrs = kwargs.get('node_attrs', []) | |||||
self._edge_weight = kwargs.get('edge_weight', None) | |||||
self._node_kernels = kwargs.get('node_kernels', None) | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
@@ -39,7 +39,7 @@ class ShortestPath(GraphKernel): | |||||
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | ||||
else: | else: | ||||
iterator = self._graphs | iterator = self._graphs | ||||
self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] | |||||
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
@@ -51,7 +51,7 @@ class ShortestPath(GraphKernel): | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__sp_do(self._graphs[i], self._graphs[j]) | |||||
kernel = self._sp_do(self._graphs[i], self._graphs[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
# get shortest path graphs of g1 and each graph in g_list. | # get shortest path graphs of g1 and each graph in g_list. | ||||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | ||||
else: | else: | ||||
iterator = g_list | iterator = g_list | ||||
g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] | |||||
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
@@ -106,7 +106,7 @@ class ShortestPath(GraphKernel): | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__sp_do(g1, g_list[i]) | |||||
kernel = self._sp_do(g1, g_list[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
@@ -114,7 +114,7 @@ class ShortestPath(GraphKernel): | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
# get shortest path graphs of g1 and each graph in g_list. | # get shortest path graphs of g1 and each graph in g_list. | ||||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
get_sp_graphs_fun = self._wrapper_get_sp_graphs | get_sp_graphs_fun = self._wrapper_get_sp_graphs | ||||
itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
@@ -151,55 +151,55 @@ class ShortestPath(GraphKernel): | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
return itr, self.__sp_do(G_g1, G_gl[itr]) | |||||
return itr, self._sp_do(G_g1, G_gl[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||||
g2 = getSPGraph(g2, edge_weight=self.__edge_weight) | |||||
kernel = self.__sp_do(g1, g2) | |||||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||||
g2 = getSPGraph(g2, edge_weight=self._edge_weight) | |||||
kernel = self._sp_do(g1, g2) | |||||
return kernel | return kernel | ||||
def _wrapper_get_sp_graphs(self, itr_item): | def _wrapper_get_sp_graphs(self, itr_item): | ||||
g = itr_item[0] | g = itr_item[0] | ||||
i = itr_item[1] | i = itr_item[1] | ||||
return i, getSPGraph(g, edge_weight=self.__edge_weight) | |||||
return i, getSPGraph(g, edge_weight=self._edge_weight) | |||||
def __sp_do(self, g1, g2): | |||||
def _sp_do(self, g1, g2): | |||||
kernel = 0 | kernel = 0 | ||||
# compute shortest path matrices first, method borrowed from FCSP. | # compute shortest path matrices first, method borrowed from FCSP. | ||||
vk_dict = {} # shortest path matrices dict | vk_dict = {} # shortest path matrices dict | ||||
if len(self.__node_labels) > 0: | |||||
if len(self._node_labels) > 0: | |||||
# node symb and non-synb labeled | # node symb and non-synb labeled | ||||
if len(self.__node_attrs) > 0: | |||||
kn = self.__node_kernels['mix'] | |||||
if len(self._node_attrs) > 0: | |||||
kn = self._node_kernels['mix'] | |||||
for n1, n2 in product( | for n1, n2 in product( | ||||
g1.nodes(data=True), g2.nodes(data=True)): | g1.nodes(data=True), g2.nodes(data=True)): | ||||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||||
n1_labels = [n1[1][nl] for nl in self._node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self._node_labels] | |||||
n1_attrs = [n1[1][na] for na in self._node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self._node_attrs] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | ||||
# node symb labeled | # node symb labeled | ||||
else: | else: | ||||
kn = self.__node_kernels['symb'] | |||||
kn = self._node_kernels['symb'] | |||||
for n1 in g1.nodes(data=True): | for n1 in g1.nodes(data=True): | ||||
for n2 in g2.nodes(data=True): | for n2 in g2.nodes(data=True): | ||||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||||
n1_labels = [n1[1][nl] for nl in self._node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self._node_labels] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | ||||
else: | else: | ||||
# node non-synb labeled | # node non-synb labeled | ||||
if len(self.__node_attrs) > 0: | |||||
kn = self.__node_kernels['nsymb'] | |||||
if len(self._node_attrs) > 0: | |||||
kn = self._node_kernels['nsymb'] | |||||
for n1 in g1.nodes(data=True): | for n1 in g1.nodes(data=True): | ||||
for n2 in g2.nodes(data=True): | for n2 in g2.nodes(data=True): | ||||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||||
n1_attrs = [n1[1][na] for na in self._node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self._node_attrs] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | ||||
# node unlabeled | # node unlabeled | ||||
else: | else: | ||||
@@ -210,7 +210,7 @@ class ShortestPath(GraphKernel): | |||||
return kernel | return kernel | ||||
# compute graph kernels | # compute graph kernels | ||||
if self.__ds_infos['directed']: | |||||
if self._ds_infos['directed']: | |||||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | ||||
if e1[2]['cost'] == e2[2]['cost']: | if e1[2]['cost'] == e2[2]['cost']: | ||||
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] | nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] | ||||
@@ -261,4 +261,4 @@ class ShortestPath(GraphKernel): | |||||
def _wrapper_sp_do(self, itr): | def _wrapper_sp_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__sp_do(G_gs[i], G_gs[j]) | |||||
return i, j, self._sp_do(G_gs[i], G_gs[j]) |
@@ -66,7 +66,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | |||||
kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
@@ -162,7 +162,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | |||||
kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
else: # @todo | else: # @todo | ||||
@@ -190,9 +190,9 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
P_list = [] | P_list = [] | ||||
D_list = [] | D_list = [] | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='spectral decompose', file=sys.stdout) | |||||
iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | |||||
iterator = g_list | |||||
for G in iterator: | for G in iterator: | ||||
# don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# A actually is the transpose of the adjacency matrix. | # A actually is the transpose of the adjacency matrix. | ||||
@@ -252,7 +252,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
if self._p is None: # p is uniform distribution as default. | if self._p is None: # p is uniform distribution as default. | ||||
q_T1 = 1 / nx.number_of_nodes(g1) | q_T1 = 1 / nx.number_of_nodes(g1) | ||||
q_T2 = 1 / nx.number_of_nodes(g2) | q_T2 = 1 / nx.number_of_nodes(g2) | ||||
kernel = self.__kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel) | |||||
kernel = self._kernel_do(q_T1, q_T2, P1, P2, D1, D2, self._weight, self._sub_kernel) | |||||
else: # @todo | else: # @todo | ||||
pass | pass | ||||
else: # @todo | else: # @todo | ||||
@@ -261,7 +261,7 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
return kernel | return kernel | ||||
def __kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||||
def _kernel_do(self, q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||||
# use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | kl = kron(np.dot(q_T1, P1), np.dot(q_T2, P2)).todense() | ||||
# @todo: this is not needed when p = q (kr = kl.T) for undirected graphs. | # @todo: this is not needed when p = q (kr = kl.T) for undirected graphs. | ||||
@@ -280,4 +280,4 @@ class SpectralDecomposition(RandomWalkMeta): | |||||
def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel) | |||||
return i, j, self._kernel_do(G_q_T_list[i], G_q_T_list[j], G_P_list[i], G_P_list[j], G_D_list[i], G_D_list[j], self._weight, self._sub_kernel) |
@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel): | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__node_attrs = kwargs.get('node_attrs', []) | |||||
self.__edge_attrs = kwargs.get('edge_attrs', []) | |||||
self.__edge_weight = kwargs.get('edge_weight', None) | |||||
self.__node_kernels = kwargs.get('node_kernels', None) | |||||
self.__edge_kernels = kwargs.get('edge_kernels', None) | |||||
self.__compute_method = kwargs.get('compute_method', 'naive') | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._node_attrs = kwargs.get('node_attrs', []) | |||||
self._edge_attrs = kwargs.get('edge_attrs', []) | |||||
self._edge_weight = kwargs.get('edge_weight', None) | |||||
self._node_kernels = kwargs.get('node_kernels', None) | |||||
self._edge_kernels = kwargs.get('edge_kernels', None) | |||||
self._compute_method = kwargs.get('compute_method', 'naive') | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel): | |||||
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | ||||
else: | else: | ||||
iterator = self._graphs | iterator = self._graphs | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
for g in iterator: | for g in iterator: | ||||
splist.append(self.__get_sps_as_trie(g)) | |||||
splist.append(self._get_sps_as_trie(g)) | |||||
else: | else: | ||||
for g in iterator: | for g in iterator: | ||||
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) | |||||
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
@@ -60,14 +60,14 @@ class StructuralSP(GraphKernel): | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | ||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||||
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel | gram_matrix[j][i] = kernel | ||||
else: | else: | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||||
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||||
# if(kernel > 1): | # if(kernel > 1): | ||||
# print("error here ") | # print("error here ") | ||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel): | |||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
# get shortest path graphs of self._graphs | # get shortest path graphs of self._graphs | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
get_sps_fun = self._wrapper_get_sps_trie | get_sps_fun = self._wrapper_get_sps_trie | ||||
else: | else: | ||||
get_sps_fun = self._wrapper_get_sps_naive | get_sps_fun = self._wrapper_get_sps_naive | ||||
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel): | |||||
global G_spl, G_gs | global G_spl, G_gs | ||||
G_spl = spl_toshare | G_spl = spl_toshare | ||||
G_gs = gs_toshare | G_gs = gs_toshare | ||||
if self.__compute_method == 'trie': | |||||
do_fun = self.__wrapper_ssp_do_trie | |||||
if self._compute_method == 'trie': | |||||
do_fun = self._wrapper_ssp_do_trie | |||||
else: | else: | ||||
do_fun = self._wrapper_ssp_do_naive | do_fun = self._wrapper_ssp_do_naive | ||||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | ||||
@@ -119,18 +119,18 @@ class StructuralSP(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
# get shortest paths of g1 and each graph in g_list. | # get shortest paths of g1 and each graph in g_list. | ||||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||||
splist = [] | splist = [] | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | ||||
else: | else: | ||||
iterator = g_list | iterator = g_list | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
for g in iterator: | for g in iterator: | ||||
splist.append(self.__get_sps_as_trie(g)) | |||||
splist.append(self._get_sps_as_trie(g)) | |||||
else: | else: | ||||
for g in iterator: | for g in iterator: | ||||
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) | |||||
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
@@ -138,13 +138,13 @@ class StructuralSP(GraphKernel): | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | ||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i]) | |||||
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
else: | else: | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i]) | |||||
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel): | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
# get shortest paths of g1 and each graph in g_list. | # get shortest paths of g1 and each graph in g_list. | ||||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||||
splist = [None] * len(g_list) | splist = [None] * len(g_list) | ||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel): | |||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
# get shortest path graphs of g_list | # get shortest path graphs of g_list | ||||
if self.__compute_method == 'trie': | |||||
if self._compute_method == 'trie': | |||||
get_sps_fun = self._wrapper_get_sps_trie | get_sps_fun = self._wrapper_get_sps_trie | ||||
else: | else: | ||||
get_sps_fun = self._wrapper_get_sps_naive | get_sps_fun = self._wrapper_get_sps_naive | ||||
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel): | |||||
G_spl = spl_toshare | G_spl = spl_toshare | ||||
G_g1 = g1_toshare | G_g1 = g1_toshare | ||||
G_gl = gl_toshare | G_gl = gl_toshare | ||||
if self.__compute_method == 'trie': | |||||
do_fun = self.__wrapper_ssp_do_trie | |||||
if self._compute_method == 'trie': | |||||
do_fun = self._wrapper_ssp_do_trie | |||||
else: | else: | ||||
do_fun = self._wrapper_kernel_list_do | do_fun = self._wrapper_kernel_list_do | ||||
def func_assign(result, var_to_assign): | def func_assign(result, var_to_assign): | ||||
@@ -199,36 +199,36 @@ class StructuralSP(GraphKernel): | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) | |||||
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||||
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed']) | |||||
if self.__compute_method == 'trie': | |||||
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2) | |||||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||||
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed']) | |||||
if self._compute_method == 'trie': | |||||
kernel = self._ssp_do_trie(g1, g2, sp1, sp2) | |||||
else: | else: | ||||
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2) | |||||
kernel = self._ssp_do_naive(g1, g2, sp1, sp2) | |||||
return kernel | return kernel | ||||
def _wrapper_get_sps_naive(self, itr_item): | def _wrapper_get_sps_naive(self, itr_item): | ||||
g = itr_item[0] | g = itr_item[0] | ||||
i = itr_item[1] | i = itr_item[1] | ||||
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']) | |||||
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']) | |||||
def __ssp_do_naive(self, g1, g2, spl1, spl2): | |||||
def _ssp_do_naive(self, g1, g2, spl1, spl2): | |||||
kernel = 0 | kernel = 0 | ||||
# First, compute shortest path matrices, method borrowed from FCSP. | # First, compute shortest path matrices, method borrowed from FCSP. | ||||
vk_dict = self.__get_all_node_kernels(g1, g2) | |||||
vk_dict = self._get_all_node_kernels(g1, g2) | |||||
# Then, compute kernels between all pairs of edges, which is an idea of | # Then, compute kernels between all pairs of edges, which is an idea of | ||||
# extension of FCSP. It suits sparse graphs, which is the most case we | # extension of FCSP. It suits sparse graphs, which is the most case we | ||||
# went though. For dense graphs, this would be slow. | # went though. For dense graphs, this would be slow. | ||||
ek_dict = self.__get_all_edge_kernels(g1, g2) | |||||
ek_dict = self._get_all_edge_kernels(g1, g2) | |||||
# compute graph kernels | # compute graph kernels | ||||
if vk_dict: | if vk_dict: | ||||
@@ -314,27 +314,27 @@ class StructuralSP(GraphKernel): | |||||
def _wrapper_ssp_do_naive(self, itr): | def _wrapper_ssp_do_naive(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) | |||||
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) | |||||
def __get_all_node_kernels(self, g1, g2): | |||||
def _get_all_node_kernels(self, g1, g2): | |||||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | ||||
def __get_all_edge_kernels(self, g1, g2): | |||||
def _get_all_edge_kernels(self, g1, g2): | |||||
# compute kernels between all pairs of edges, which is an idea of | # compute kernels between all pairs of edges, which is an idea of | ||||
# extension of FCSP. It suits sparse graphs, which is the most case we | # extension of FCSP. It suits sparse graphs, which is the most case we | ||||
# went though. For dense graphs, this would be slow. | # went though. For dense graphs, this would be slow. | ||||
ek_dict = {} # dict of edge kernels | ek_dict = {} # dict of edge kernels | ||||
if len(self.__edge_labels) > 0: | |||||
if len(self._edge_labels) > 0: | |||||
# edge symb and non-synb labeled | # edge symb and non-synb labeled | ||||
if len(self.__edge_attrs) > 0: | |||||
ke = self.__edge_kernels['mix'] | |||||
if len(self._edge_attrs) > 0: | |||||
ke = self._edge_kernels['mix'] | |||||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | ||||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||||
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | ||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ||||
@@ -342,11 +342,11 @@ class StructuralSP(GraphKernel): | |||||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | ||||
# edge symb labeled | # edge symb labeled | ||||
else: | else: | ||||
ke = self.__edge_kernels['symb'] | |||||
ke = self._edge_kernels['symb'] | |||||
for e1 in g1.edges(data=True): | for e1 in g1.edges(data=True): | ||||
for e2 in g2.edges(data=True): | for e2 in g2.edges(data=True): | ||||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||||
ek_temp = ke(e1_labels, e2_labels) | ek_temp = ke(e1_labels, e2_labels) | ||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ||||
@@ -354,12 +354,12 @@ class StructuralSP(GraphKernel): | |||||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | ||||
else: | else: | ||||
# edge non-synb labeled | # edge non-synb labeled | ||||
if len(self.__edge_attrs) > 0: | |||||
ke = self.__edge_kernels['nsymb'] | |||||
if len(self._edge_attrs) > 0: | |||||
ke = self._edge_kernels['nsymb'] | |||||
for e1 in g1.edges(data=True): | for e1 in g1.edges(data=True): | ||||
for e2 in g2.edges(data=True): | for e2 in g2.edges(data=True): | ||||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||||
ek_temp = ke(e1_attrs, e2_attrs) | ek_temp = ke(e1_attrs, e2_attrs) | ||||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | ||||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | ||||
@@ -28,16 +28,16 @@ class Treelet(GraphKernel): | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__sub_kernel = kwargs.get('sub_kernel', None) | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
if self.__sub_kernel is None: | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._sub_kernel = kwargs.get('sub_kernel', None) | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
if self._sub_kernel is None: | |||||
raise Exception('Sub kernel not set.') | raise Exception('Sub kernel not set.') | ||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
# get all canonical keys of all graphs before computing kernels to save | # get all canonical keys of all graphs before computing kernels to save | ||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
@@ -47,7 +47,7 @@ class Treelet(GraphKernel): | |||||
else: | else: | ||||
iterator = self._graphs | iterator = self._graphs | ||||
for g in iterator: | for g in iterator: | ||||
canonkeys.append(self.__get_canonkeys(g)) | |||||
canonkeys.append(self._get_canonkeys(g)) | |||||
# compute Gram matrix. | # compute Gram matrix. | ||||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | ||||
@@ -59,7 +59,7 @@ class Treelet(GraphKernel): | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
kernel = self.__kernel_do(canonkeys[i], canonkeys[j]) | |||||
kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | |||||
gram_matrix[i][j] = kernel | gram_matrix[i][j] = kernel | ||||
gram_matrix[j][i] = kernel # @todo: no directed graph considered? | gram_matrix[j][i] = kernel # @todo: no directed graph considered? | ||||
@@ -67,7 +67,7 @@ class Treelet(GraphKernel): | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__add_dummy_labels(self._graphs) | |||||
self._add_dummy_labels(self._graphs) | |||||
# get all canonical keys of all graphs before computing kernels to save | # get all canonical keys of all graphs before computing kernels to save | ||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
@@ -103,18 +103,18 @@ class Treelet(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
# get all canonical keys of all graphs before computing kernels to save | # get all canonical keys of all graphs before computing kernels to save | ||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_1 = self._get_canonkeys(g1) | |||||
canonkeys_list = [] | canonkeys_list = [] | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) | iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) | ||||
else: | else: | ||||
iterator = g_list | iterator = g_list | ||||
for g in iterator: | for g in iterator: | ||||
canonkeys_list.append(self.__get_canonkeys(g)) | |||||
canonkeys_list.append(self._get_canonkeys(g)) | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
@@ -123,18 +123,18 @@ class Treelet(GraphKernel): | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i]) | |||||
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | |||||
kernel_list[i] = kernel | kernel_list[i] = kernel | ||||
return kernel_list | return kernel_list | ||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | |||||
self._add_dummy_labels(g_list + [g1]) | |||||
# get all canonical keys of all graphs before computing kernels to save | # get all canonical keys of all graphs before computing kernels to save | ||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_1 = self._get_canonkeys(g1) | |||||
canonkeys_list = [[] for _ in range(len(g_list))] | canonkeys_list = [[] for _ in range(len(g_list))] | ||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
@@ -173,18 +173,18 @@ class Treelet(GraphKernel): | |||||
def _wrapper_kernel_list_do(self, itr): | def _wrapper_kernel_list_do(self, itr): | ||||
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr]) | |||||
return itr, self._kernel_do(G_ck_1, G_ck_list[itr]) | |||||
def _compute_single_kernel_series(self, g1, g2): | def _compute_single_kernel_series(self, g1, g2): | ||||
self.__add_dummy_labels([g1] + [g2]) | |||||
canonkeys_1 = self.__get_canonkeys(g1) | |||||
canonkeys_2 = self.__get_canonkeys(g2) | |||||
kernel = self.__kernel_do(canonkeys_1, canonkeys_2) | |||||
self._add_dummy_labels([g1] + [g2]) | |||||
canonkeys_1 = self._get_canonkeys(g1) | |||||
canonkeys_2 = self._get_canonkeys(g2) | |||||
kernel = self._kernel_do(canonkeys_1, canonkeys_2) | |||||
return kernel | return kernel | ||||
def __kernel_do(self, canonkey1, canonkey2): | |||||
def _kernel_do(self, canonkey1, canonkey2): | |||||
"""Compute treelet graph kernel between 2 graphs. | """Compute treelet graph kernel between 2 graphs. | ||||
Parameters | Parameters | ||||
@@ -200,17 +200,17 @@ class Treelet(GraphKernel): | |||||
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | ||||
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | ||||
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | ||||
kernel = self.__sub_kernel(vector1, vector2) | |||||
kernel = self._sub_kernel(vector1, vector2) | |||||
return kernel | return kernel | ||||
def _wrapper_kernel_do(self, itr): | def _wrapper_kernel_do(self, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j]) | |||||
return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j]) | |||||
def __get_canonkeys(self, G): | |||||
def _get_canonkeys(self, G): | |||||
"""Generate canonical keys of all treelets in a graph. | """Generate canonical keys of all treelets in a graph. | ||||
Parameters | Parameters | ||||
@@ -236,7 +236,7 @@ class Treelet(GraphKernel): | |||||
patterns['0'] = list(G.nodes()) | patterns['0'] = list(G.nodes()) | ||||
canonkey['0'] = nx.number_of_nodes(G) | canonkey['0'] = nx.number_of_nodes(G) | ||||
for i in range(1, 6): # for i in range(1, 6): | for i in range(1, 6): # for i in range(1, 6): | ||||
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed']) | |||||
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) | |||||
canonkey[str(i)] = len(patterns[str(i)]) | canonkey[str(i)] = len(patterns[str(i)]) | ||||
# n-star patterns | # n-star patterns | ||||
@@ -330,11 +330,11 @@ class Treelet(GraphKernel): | |||||
### pattern obtained in the structural analysis section above, which is a | ### pattern obtained in the structural analysis section above, which is a | ||||
### string corresponding to a unique treelet. A dictionary is built to keep | ### string corresponding to a unique treelet. A dictionary is built to keep | ||||
### track of the amount of every treelet. | ### track of the amount of every treelet. | ||||
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0: | |||||
if len(self._node_labels) > 0 or len(self._edge_labels) > 0: | |||||
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | ||||
# linear patterns | # linear patterns | ||||
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels)) | |||||
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) | |||||
for key in canonkey_t: | for key in canonkey_t: | ||||
canonkey_l[('0', key)] = canonkey_t[key] | canonkey_l[('0', key)] = canonkey_t[key] | ||||
@@ -343,9 +343,9 @@ class Treelet(GraphKernel): | |||||
for pattern in patterns[str(i)]: | for pattern in patterns[str(i)]: | ||||
canonlist = [] | canonlist = [] | ||||
for idx, node in enumerate(pattern[:-1]): | for idx, node in enumerate(pattern[:-1]): | ||||
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||||
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels)) | |||||
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels)) | |||||
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||||
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels)) | |||||
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels)) | |||||
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | ||||
treelet.append(tuple([str(i)] + canonkey_t)) | treelet.append(tuple([str(i)] + canonkey_t)) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -356,13 +356,13 @@ class Treelet(GraphKernel): | |||||
for pattern in patterns[str(i) + 'star']: | for pattern in patterns[str(i) + 'star']: | ||||
canonlist = [] | canonlist = [] | ||||
for leaf in pattern[1:]: | for leaf in pattern[1:]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
canonlist.sort() | canonlist.sort() | ||||
canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | ||||
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
+ canonlist) | + canonlist) | ||||
treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -372,17 +372,17 @@ class Treelet(GraphKernel): | |||||
for pattern in patterns['7']: | for pattern in patterns['7']: | ||||
canonlist = [] | canonlist = [] | ||||
for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
canonlist.sort() | canonlist.sort() | ||||
canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
canonkey_t = tuple(['7'] | canonkey_t = tuple(['7'] | ||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]) | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) | |||||
treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -391,38 +391,38 @@ class Treelet(GraphKernel): | |||||
for pattern in patterns['11']: | for pattern in patterns['11']: | ||||
canonlist = [] | canonlist = [] | ||||
for leaf in pattern[1:4]: | for leaf in pattern[1:4]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
canonlist.sort() | canonlist.sort() | ||||
canonlist = list(chain.from_iterable(canonlist)) | canonlist = list(chain.from_iterable(canonlist)) | ||||
canonkey_t = tuple(['b'] | canonkey_t = tuple(['b'] | ||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]) | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] | |||||
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) | |||||
treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
# pattern 10 | # pattern 10 | ||||
treelet = [] | treelet = [] | ||||
for pattern in patterns['10']: | for pattern in patterns['10']: | ||||
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)] | |||||
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||||
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)] | |||||
canonlist = [] | canonlist = [] | ||||
for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
canonlist.append(tuple((nlabels, elabels))) | canonlist.append(tuple((nlabels, elabels))) | ||||
canonlist.sort() | canonlist.sort() | ||||
canonkey0 = list(chain.from_iterable(canonlist)) | canonkey0 = list(chain.from_iterable(canonlist)) | ||||
canonkey_t = tuple(['a'] | canonkey_t = tuple(['a'] | ||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
+ canonkey4 + canonkey0) | + canonkey4 + canonkey0) | ||||
treelet.append(canonkey_t) | treelet.append(canonkey_t) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -432,15 +432,15 @@ class Treelet(GraphKernel): | |||||
for pattern in patterns['12']: | for pattern in patterns['12']: | ||||
canonlist0 = [] | canonlist0 = [] | ||||
for leaf in pattern[1:3]: | for leaf in pattern[1:3]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||||
canonlist0.append(tuple((nlabels, elabels))) | canonlist0.append(tuple((nlabels, elabels))) | ||||
canonlist0.sort() | canonlist0.sort() | ||||
canonlist0 = list(chain.from_iterable(canonlist0)) | canonlist0 = list(chain.from_iterable(canonlist0)) | ||||
canonlist3 = [] | canonlist3 = [] | ||||
for leaf in pattern[4:6]: | for leaf in pattern[4:6]: | ||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||||
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels) | |||||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||||
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels) | |||||
canonlist3.append(tuple((nlabels, elabels))) | canonlist3.append(tuple((nlabels, elabels))) | ||||
canonlist3.sort() | canonlist3.sort() | ||||
canonlist3 = list(chain.from_iterable(canonlist3)) | canonlist3 = list(chain.from_iterable(canonlist3)) | ||||
@@ -448,14 +448,14 @@ class Treelet(GraphKernel): | |||||
# 2 possible key can be generated from 2 nodes with extended label 3, | # 2 possible key can be generated from 2 nodes with extended label 3, | ||||
# select the one with lower lexicographic order. | # select the one with lower lexicographic order. | ||||
canonkey_t1 = tuple(['c'] | canonkey_t1 = tuple(['c'] | ||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0 | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
+ canonlist3) | + canonlist3) | ||||
canonkey_t2 = tuple(['c'] | canonkey_t2 = tuple(['c'] | ||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3 | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||||
+ canonlist0) | + canonlist0) | ||||
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -463,24 +463,24 @@ class Treelet(GraphKernel): | |||||
# pattern 9 | # pattern 9 | ||||
treelet = [] | treelet = [] | ||||
for pattern in patterns['9']: | for pattern in patterns['9']: | ||||
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)] | |||||
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)] | |||||
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)] | |||||
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels), | |||||
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||||
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels), | |||||
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)] | |||||
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||||
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] | |||||
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), | |||||
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] | |||||
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), | |||||
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||||
if prekey2 + canonkey2 < prekey3 + canonkey3: | if prekey2 + canonkey2 < prekey3 + canonkey3: | ||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||||
+ prekey2 + prekey3 + canonkey2 + canonkey3 | + prekey2 + prekey3 + canonkey2 + canonkey3 | ||||
else: | else: | ||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||||
+ prekey3 + prekey2 + canonkey3 + canonkey2 | + prekey3 + prekey2 + canonkey3 + canonkey2 | ||||
treelet.append(tuple(['9'] | treelet.append(tuple(['9'] | ||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||||
+ canonkey_t)) | + canonkey_t)) | ||||
canonkey_l.update(Counter(treelet)) | canonkey_l.update(Counter(treelet)) | ||||
@@ -492,15 +492,15 @@ class Treelet(GraphKernel): | |||||
def _wrapper_get_canonkeys(self, itr_item): | def _wrapper_get_canonkeys(self, itr_item): | ||||
g = itr_item[0] | g = itr_item[0] | ||||
i = itr_item[1] | i = itr_item[1] | ||||
return i, self.__get_canonkeys(g) | |||||
return i, self._get_canonkeys(g) | |||||
def __add_dummy_labels(self, Gn): | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
def _add_dummy_labels(self, Gn): | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__edge_labels = [SpecialLabel.DUMMY] | |||||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
self.__node_labels = kwargs.get('node_labels', []) | |||||
self.__edge_labels = kwargs.get('edge_labels', []) | |||||
self.__height = int(kwargs.get('height', 0)) | |||||
self.__base_kernel = kwargs.get('base_kernel', 'subtree') | |||||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||||
self._node_labels = kwargs.get('node_labels', []) | |||||
self._edge_labels = kwargs.get('edge_labels', []) | |||||
self._height = int(kwargs.get('height', 0)) | |||||
self._base_kernel = kwargs.get('base_kernel', 'subtree') | |||||
self._ds_infos = kwargs.get('ds_infos', {}) | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
@@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
import warnings | import warnings | ||||
warnings.warn('A part of the computation is parallelized.') | warnings.warn('A part of the computation is parallelized.') | ||||
self.__add_dummy_node_labels(self._graphs) | |||||
self._add_dummy_node_labels(self._graphs) | |||||
# for WL subtree kernel | # for WL subtree kernel | ||||
if self.__base_kernel == 'subtree': | |||||
gram_matrix = self.__subtree_kernel_do(self._graphs) | |||||
if self._base_kernel == 'subtree': | |||||
gram_matrix = self._subtree_kernel_do(self._graphs) | |||||
# for WL shortest path kernel | # for WL shortest path kernel | ||||
elif self.__base_kernel == 'sp': | |||||
gram_matrix = self.__sp_kernel_do(self._graphs) | |||||
elif self._base_kernel == 'sp': | |||||
gram_matrix = self._sp_kernel_do(self._graphs) | |||||
# for WL edge kernel | # for WL edge kernel | ||||
elif self.__base_kernel == 'edge': | |||||
gram_matrix = self.__edge_kernel_do(self._graphs) | |||||
elif self._base_kernel == 'edge': | |||||
gram_matrix = self._edge_kernel_do(self._graphs) | |||||
# for user defined base kernel | # for user defined base kernel | ||||
else: | else: | ||||
gram_matrix = self.__user_kernel_do(self._graphs) | |||||
gram_matrix = self._user_kernel_do(self._graphs) | |||||
return gram_matrix | return gram_matrix | ||||
@@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
import warnings | import warnings | ||||
warnings.warn('A part of the computation is parallelized.') | warnings.warn('A part of the computation is parallelized.') | ||||
self.__add_dummy_node_labels(g_list + [g1]) | |||||
self._add_dummy_node_labels(g_list + [g1]) | |||||
# for WL subtree kernel | # for WL subtree kernel | ||||
if self.__base_kernel == 'subtree': | |||||
gram_matrix = self.__subtree_kernel_do(g_list + [g1]) | |||||
if self._base_kernel == 'subtree': | |||||
gram_matrix = self._subtree_kernel_do(g_list + [g1]) | |||||
# for WL shortest path kernel | # for WL shortest path kernel | ||||
elif self.__base_kernel == 'sp': | |||||
gram_matrix = self.__sp_kernel_do(g_list + [g1]) | |||||
elif self._base_kernel == 'sp': | |||||
gram_matrix = self._sp_kernel_do(g_list + [g1]) | |||||
# for WL edge kernel | # for WL edge kernel | ||||
elif self.__base_kernel == 'edge': | |||||
gram_matrix = self.__edge_kernel_do(g_list + [g1]) | |||||
elif self._base_kernel == 'edge': | |||||
gram_matrix = self._edge_kernel_do(g_list + [g1]) | |||||
# for user defined base kernel | # for user defined base kernel | ||||
else: | else: | ||||
gram_matrix = self.__user_kernel_do(g_list + [g1]) | |||||
gram_matrix = self._user_kernel_do(g_list + [g1]) | |||||
return list(gram_matrix[-1][0:-1]) | return list(gram_matrix[-1][0:-1]) | ||||
@@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. | def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. | ||||
self.__add_dummy_node_labels([g1] + [g2]) | |||||
self._add_dummy_node_labels([g1] + [g2]) | |||||
# for WL subtree kernel | # for WL subtree kernel | ||||
if self.__base_kernel == 'subtree': | |||||
gram_matrix = self.__subtree_kernel_do([g1] + [g2]) | |||||
if self._base_kernel == 'subtree': | |||||
gram_matrix = self._subtree_kernel_do([g1] + [g2]) | |||||
# for WL shortest path kernel | # for WL shortest path kernel | ||||
elif self.__base_kernel == 'sp': | |||||
gram_matrix = self.__sp_kernel_do([g1] + [g2]) | |||||
elif self._base_kernel == 'sp': | |||||
gram_matrix = self._sp_kernel_do([g1] + [g2]) | |||||
# for WL edge kernel | # for WL edge kernel | ||||
elif self.__base_kernel == 'edge': | |||||
gram_matrix = self.__edge_kernel_do([g1] + [g2]) | |||||
elif self._base_kernel == 'edge': | |||||
gram_matrix = self._edge_kernel_do([g1] + [g2]) | |||||
# for user defined base kernel | # for user defined base kernel | ||||
else: | else: | ||||
gram_matrix = self.__user_kernel_do([g1] + [g2]) | |||||
gram_matrix = self._user_kernel_do([g1] + [g2]) | |||||
return gram_matrix[0][1] | return gram_matrix[0][1] | ||||
def __subtree_kernel_do(self, Gn): | |||||
def _subtree_kernel_do(self, Gn): | |||||
"""Compute Weisfeiler-Lehman kernels between graphs. | """Compute Weisfeiler-Lehman kernels between graphs. | ||||
Parameters | Parameters | ||||
@@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
for G in Gn: | for G in Gn: | ||||
# set all labels into a tuple. | # set all labels into a tuple. | ||||
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. | for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. | ||||
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels) | |||||
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels) | |||||
# get the set of original labels | # get the set of original labels | ||||
labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) | labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) | ||||
# number of occurence of each label in G | # number of occurence of each label in G | ||||
all_num_of_each_label.append(dict(Counter(labels_ori))) | all_num_of_each_label.append(dict(Counter(labels_ori))) | ||||
# Compute subtree kernel with the 0th iteration and add it to the final kernel. | # Compute subtree kernel with the 0th iteration and add it to the final kernel. | ||||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||||
self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn) | |||||
# iterate each height | # iterate each height | ||||
for h in range(1, self.__height + 1): | |||||
for h in range(1, self._height + 1): | |||||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | ||||
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | ||||
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration | # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration | ||||
@@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
all_num_of_each_label.append(dict(Counter(labels_comp))) | all_num_of_each_label.append(dict(Counter(labels_comp))) | ||||
# Compute subtree kernel with h iterations and add it to the final kernel | # Compute subtree kernel with h iterations and add it to the final kernel | ||||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||||
self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn) | |||||
return gram_matrix | return gram_matrix | ||||
def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn): | |||||
def _compute_gram_itr(self, gram_matrix, all_num_of_each_label, Gn): | |||||
"""Compute Gram matrix using the base kernel. | """Compute Gram matrix using the base kernel. | ||||
""" | """ | ||||
if self._parallel == 'imap_unordered': | if self._parallel == 'imap_unordered': | ||||
@@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
elif self._parallel is None: | elif self._parallel is None: | ||||
for i in range(len(gram_matrix)): | for i in range(len(gram_matrix)): | ||||
for j in range(i, len(gram_matrix)): | for j in range(i, len(gram_matrix)): | ||||
gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i], | |||||
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], | |||||
all_num_of_each_label[j], gram_matrix[i][j]) | all_num_of_each_label[j], gram_matrix[i][j]) | ||||
gram_matrix[j][i] = gram_matrix[i][j] | gram_matrix[j][i] = gram_matrix[i][j] | ||||
def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): | |||||
def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): | |||||
"""Compute the subtree kernel. | """Compute the subtree kernel. | ||||
""" | """ | ||||
labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) | labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) | ||||
@@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): | def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) | |||||
return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) | |||||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | def _wl_spkernel_do(Gn, node_label, edge_label, height): | ||||
@@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
return gram_matrix | return gram_matrix | ||||
def __add_dummy_node_labels(self, Gn): | |||||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||||
def _add_dummy_node_labels(self, Gn): | |||||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||||
for i in range(len(Gn)): | for i in range(len(Gn)): | ||||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | ||||
self.__node_labels = [SpecialLabel.DUMMY] | |||||
self._node_labels = [SpecialLabel.DUMMY] | |||||
class WLSubtree(WeisfeilerLehman): | class WLSubtree(WeisfeilerLehman): | ||||
@@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav | |||||
if save_results: | if save_results: | ||||
# create result files. | # create result files. | ||||
print('creating output files...') | print('creating output files...') | ||||
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save) | |||||
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save) | |||||
dis_k_dataset_list = [] | dis_k_dataset_list = [] | ||||
@@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav | |||||
print('\ncomplete.\n') | print('\ncomplete.\n') | ||||
def __init_output_file_preimage(ds_name, gkernel, dir_output): | |||||
def _init_output_file_preimage(ds_name, gkernel, dir_output): | |||||
if not os.path.exists(dir_output): | if not os.path.exists(dir_output): | ||||
os.makedirs(dir_output) | os.makedirs(dir_output) | ||||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | ||||
@@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti | |||||
if save_results: | if save_results: | ||||
# create result files. | # create result files. | ||||
print('creating output files...') | print('creating output files...') | ||||
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
else: | else: | ||||
fn_output_detail, fn_output_summary = None, None | fn_output_detail, fn_output_summary = None, None | ||||
# 2. compute/load Gram matrix a priori. | # 2. compute/load Gram matrix a priori. | ||||
print('2. computing/loading Gram matrix...') | print('2. computing/loading Gram matrix...') | ||||
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) | |||||
gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) | |||||
# 3. perform k-nn CV. | # 3. perform k-nn CV. | ||||
print('3. performing k-nn CV...') | print('3. performing k-nn CV...') | ||||
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': | if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': | ||||
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
_kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
elif train_examples == 'best-dataset': | elif train_examples == 'best-dataset': | ||||
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
_kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
elif train_examples == 'trainset': | elif train_examples == 'trainset': | ||||
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
_kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||||
print('\ncomplete.\n') | print('\ncomplete.\n') | ||||
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
Gn = dataset_all.graphs | Gn = dataset_all.graphs | ||||
y_all = dataset_all.targets | y_all = dataset_all.targets | ||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | ||||
# get shuffles. | # get shuffles. | ||||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||||
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [[], [], []] | accuracies = [[], [], []] | ||||
for trial in range(len(train_indices)): | for trial in range(len(train_indices)): | ||||
@@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||||
mge_options['update_order'] = True | mge_options['update_order'] = True | ||||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | ||||
mpg_options['runtime_precompute_gm'] = 0 | mpg_options['runtime_precompute_gm'] = 0 | ||||
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
mge_options['update_order'] = False | mge_options['update_order'] = False | ||||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | ||||
mpg_options['runtime_precompute_gm'] = 0 | mpg_options['runtime_precompute_gm'] = 0 | ||||
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
_, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||||
medians[0].append(set_median) | medians[0].append(set_median) | ||||
medians[1].append(gen_median) | medians[1].append(gen_median) | ||||
medians[2].append(gen_median_uo) | medians[2].append(gen_median_uo) | ||||
@@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||||
# compute dis_mat between medians. | # compute dis_mat between medians. | ||||
dataset = dataset_all.copy() | dataset = dataset_all.copy() | ||||
dataset.load_graphs([g.copy() for g in G_app], targets=None) | dataset.load_graphs([g.copy() for g in G_app], targets=None) | ||||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
# compute the entire Gram matrix. | # compute the entire Gram matrix. | ||||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
kernels_to_medians = [] | kernels_to_medians = [] | ||||
for g in G_app: | for g in G_app: | ||||
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | ||||
@@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||||
f_summary.close() | f_summary.close() | ||||
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
Gn = dataset_all.graphs | Gn = dataset_all.graphs | ||||
y_all = dataset_all.targets | y_all = dataset_all.targets | ||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | ||||
# get shuffles. | # get shuffles. | ||||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||||
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [] | accuracies = [] | ||||
for trial in range(len(train_indices)): | for trial in range(len(train_indices)): | ||||
@@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g | |||||
# compute dis_mat between medians. | # compute dis_mat between medians. | ||||
dataset = dataset_all.copy() | dataset = dataset_all.copy() | ||||
dataset.load_graphs([g.copy() for g in best_graphs], targets=None) | dataset.load_graphs([g.copy() for g in best_graphs], targets=None) | ||||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||||
# compute the entire Gram matrix. | # compute the entire Gram matrix. | ||||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||||
kernels_to_best_graphs = [] | kernels_to_best_graphs = [] | ||||
for g in best_graphs: | for g in best_graphs: | ||||
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | ||||
@@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g | |||||
f_summary.close() | f_summary.close() | ||||
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||||
y_all = dataset_all.targets | y_all = dataset_all.targets | ||||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | ||||
@@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, | |||||
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) | dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) | ||||
# get shuffles. | # get shuffles. | ||||
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size) | |||||
train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size) | |||||
accuracies = [] | accuracies = [] | ||||
for trial in range(len(train_indices)): | for trial in range(len(train_indices)): | ||||
@@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, | |||||
f_summary.close() | f_summary.close() | ||||
def __get_shuffles(y_all, n_splits, test_size): | |||||
def _get_shuffles(y_all, n_splits, test_size): | |||||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | ||||
train_indices = [[] for _ in range(n_splits)] | train_indices = [[] for _ in range(n_splits)] | ||||
test_indices = [[] for _ in range(n_splits)] | test_indices = [[] for _ in range(n_splits)] | ||||
@@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size): | |||||
return train_indices, test_indices, train_nums, keys | return train_indices, test_indices, train_nums, keys | ||||
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): | |||||
def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): | |||||
mpg = MedianPreimageGenerator() | mpg = MedianPreimageGenerator() | ||||
mpg.dataset = dataset.copy() | mpg.dataset = dataset.copy() | ||||
mpg.set_options(**mpg_options.copy()) | mpg.set_options(**mpg_options.copy()) | ||||
@@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option | |||||
return mpg.set_median, mpg.gen_median | return mpg.set_median, mpg.gen_median | ||||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||||
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||||
if load_gm == 'auto': | if load_gm == 'auto': | ||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | ||||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | ||||
@@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||||
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | ||||
time_precompute_gm = float(gmfile['run_time']) | time_precompute_gm = float(gmfile['run_time']) | ||||
else: | else: | ||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | ||||
elif not load_gm: | elif not load_gm: | ||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | ||||
else: | else: | ||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | ||||
@@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||||
return gram_matrix_unnorm, time_precompute_gm | return gram_matrix_unnorm, time_precompute_gm | ||||
def __get_graph_kernel(dataset, kernel_options): | |||||
def _get_graph_kernel(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | ||||
node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
@@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options): | |||||
return graph_kernel | return graph_kernel | ||||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
def _compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | ||||
node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
@@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
return gram_matrix_unnorm, run_time | return gram_matrix_unnorm, run_time | ||||
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output): | |||||
def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | if not os.path.exists(dir_output): | ||||
os.makedirs(dir_output) | os.makedirs(dir_output) | ||||
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' | fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' | ||||
@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
def __init__(self, dataset=None): | def __init__(self, dataset=None): | ||||
PreimageGenerator.__init__(self, dataset=dataset) | PreimageGenerator.__init__(self, dataset=dataset) | ||||
### arguments to set. | ### arguments to set. | ||||
self.__mge = None | |||||
self.__ged_options = {} | |||||
self.__mge_options = {} | |||||
# self.__fit_method = 'k-graphs' | |||||
self.__init_method = 'random' | |||||
self.__init_ecc = None | |||||
self.__parallel = True | |||||
self.__n_jobs = multiprocessing.cpu_count() | |||||
self.__ds_name = None | |||||
self._mge = None | |||||
self._ged_options = {} | |||||
self._mge_options = {} | |||||
# self._fit_method = 'k-graphs' | |||||
self._init_method = 'random' | |||||
self._init_ecc = None | |||||
self._parallel = True | |||||
self._n_jobs = multiprocessing.cpu_count() | |||||
self._ds_name = None | |||||
# for cml. | # for cml. | ||||
self.__time_limit_in_sec = 0 | |||||
self.__max_itrs = 100 | |||||
self.__max_itrs_without_update = 3 | |||||
self.__epsilon_residual = 0.01 | |||||
self.__epsilon_ec = 0.1 | |||||
self.__allow_zeros = True | |||||
# self.__triangle_rule = True | |||||
self._time_limit_in_sec = 0 | |||||
self._max_itrs = 100 | |||||
self._max_itrs_without_update = 3 | |||||
self._epsilon_residual = 0.01 | |||||
self._epsilon_ec = 0.1 | |||||
self._allow_zeros = True | |||||
# self._triangle_rule = True | |||||
### values to compute. | ### values to compute. | ||||
self.__runtime_optimize_ec = None | |||||
self.__runtime_generate_preimage = None | |||||
self.__runtime_total = None | |||||
self.__set_median = None | |||||
self.__gen_median = None | |||||
self.__best_from_dataset = None | |||||
self.__sod_set_median = None | |||||
self.__sod_gen_median = None | |||||
self.__k_dis_set_median = None | |||||
self.__k_dis_gen_median = None | |||||
self.__k_dis_dataset = None | |||||
self.__node_label_costs = None | |||||
self.__edge_label_costs = None | |||||
self._runtime_optimize_ec = None | |||||
self._runtime_generate_preimage = None | |||||
self._runtime_total = None | |||||
self._set_median = None | |||||
self._gen_median = None | |||||
self._best_from_dataset = None | |||||
self._sod_set_median = None | |||||
self._sod_gen_median = None | |||||
self._k_dis_set_median = None | |||||
self._k_dis_gen_median = None | |||||
self._k_dis_dataset = None | |||||
self._node_label_costs = None | |||||
self._edge_label_costs = None | |||||
# for cml. | # for cml. | ||||
self.__itrs = 0 | |||||
self.__converged = False | |||||
self.__num_updates_ecs = 0 | |||||
self._itrs = 0 | |||||
self._converged = False | |||||
self._num_updates_ecs = 0 | |||||
### values that can be set or to be computed. | ### values that can be set or to be computed. | ||||
self.__edit_cost_constants = [] | |||||
self.__gram_matrix_unnorm = None | |||||
self.__runtime_precompute_gm = None | |||||
self._edit_cost_constants = [] | |||||
self._gram_matrix_unnorm = None | |||||
self._runtime_precompute_gm = None | |||||
def set_options(self, **kwargs): | def set_options(self, **kwargs): | ||||
self._kernel_options = kwargs.get('kernel_options', {}) | self._kernel_options = kwargs.get('kernel_options', {}) | ||||
self._graph_kernel = kwargs.get('graph_kernel', None) | self._graph_kernel = kwargs.get('graph_kernel', None) | ||||
self._verbose = kwargs.get('verbose', 2) | self._verbose = kwargs.get('verbose', 2) | ||||
self.__ged_options = kwargs.get('ged_options', {}) | |||||
self.__mge_options = kwargs.get('mge_options', {}) | |||||
# self.__fit_method = kwargs.get('fit_method', 'k-graphs') | |||||
self.__init_method = kwargs.get('init_method', 'random') | |||||
self.__init_ecc = kwargs.get('init_ecc', None) | |||||
self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) | |||||
self.__parallel = kwargs.get('parallel', True) | |||||
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
self.__ds_name = kwargs.get('ds_name', None) | |||||
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||||
self.__max_itrs = kwargs.get('max_itrs', 100) | |||||
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) | |||||
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) | |||||
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||||
self.__allow_zeros = kwargs.get('allow_zeros', True) | |||||
# self.__triangle_rule = kwargs.get('triangle_rule', True) | |||||
self._ged_options = kwargs.get('ged_options', {}) | |||||
self._mge_options = kwargs.get('mge_options', {}) | |||||
# self._fit_method = kwargs.get('fit_method', 'k-graphs') | |||||
self._init_method = kwargs.get('init_method', 'random') | |||||
self._init_ecc = kwargs.get('init_ecc', None) | |||||
self._edit_cost_constants = kwargs.get('edit_cost_constants', []) | |||||
self._parallel = kwargs.get('parallel', True) | |||||
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
self._ds_name = kwargs.get('ds_name', None) | |||||
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||||
self._max_itrs = kwargs.get('max_itrs', 100) | |||||
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) | |||||
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01) | |||||
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||||
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||||
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||||
self._allow_zeros = kwargs.get('allow_zeros', True) | |||||
# self._triangle_rule = kwargs.get('triangle_rule', True) | |||||
def run(self): | def run(self): | ||||
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
start = time.time() | start = time.time() | ||||
# 1. precompute gram matrix. | # 1. precompute gram matrix. | ||||
if self.__gram_matrix_unnorm is None: | |||||
if self._gram_matrix_unnorm is None: | |||||
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | ||||
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||||
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||||
end_precompute_gm = time.time() | end_precompute_gm = time.time() | ||||
self.__runtime_precompute_gm = end_precompute_gm - start | |||||
self._runtime_precompute_gm = end_precompute_gm - start | |||||
else: | else: | ||||
if self.__runtime_precompute_gm is None: | |||||
if self._runtime_precompute_gm is None: | |||||
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | ||||
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||||
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm | |||||
if self._kernel_options['normalize']: | if self._kernel_options['normalize']: | ||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) | |||||
else: | else: | ||||
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||||
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) | |||||
end_precompute_gm = time.time() | end_precompute_gm = time.time() | ||||
start -= self.__runtime_precompute_gm | |||||
start -= self._runtime_precompute_gm | |||||
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': | |||||
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset': | |||||
# start = time.time() | # start = time.time() | ||||
# self.__runtime_precompute_gm = 0 | |||||
# self._runtime_precompute_gm = 0 | |||||
# end_precompute_gm = start | # end_precompute_gm = start | ||||
# 2. optimize edit cost constants. | # 2. optimize edit cost constants. | ||||
self.__optimize_edit_cost_vector() | |||||
self._optimize_edit_cost_vector() | |||||
end_optimize_ec = time.time() | end_optimize_ec = time.time() | ||||
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm | |||||
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm | |||||
# 3. compute set median and gen median using optimized edit costs. | # 3. compute set median and gen median using optimized edit costs. | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('\nstart computing set median and gen median using optimized edit costs...\n') | print('\nstart computing set median and gen median using optimized edit costs...\n') | ||||
self.__gmg_bcu() | |||||
self._gmg_bcu() | |||||
end_generate_preimage = time.time() | end_generate_preimage = time.time() | ||||
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec | |||||
self.__runtime_total = end_generate_preimage - start | |||||
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec | |||||
self._runtime_total = end_generate_preimage - start | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('medians computed.') | print('medians computed.') | ||||
print('SOD of the set median: ', self.__sod_set_median) | |||||
print('SOD of the generalized median: ', self.__sod_gen_median) | |||||
print('SOD of the set median: ', self._sod_set_median) | |||||
print('SOD of the generalized median: ', self._sod_gen_median) | |||||
# 4. compute kernel distances to the true median. | # 4. compute kernel distances to the true median. | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('\nstart computing distances to true median....\n') | print('\nstart computing distances to true median....\n') | ||||
self.__compute_distances_to_true_median() | |||||
self._compute_distances_to_true_median() | |||||
# 5. print out results. | # 5. print out results. | ||||
if self._verbose: | if self._verbose: | ||||
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
print('================================================================================') | print('================================================================================') | ||||
print('Finished generation of preimages.') | print('Finished generation of preimages.') | ||||
print('--------------------------------------------------------------------------------') | print('--------------------------------------------------------------------------------') | ||||
print('The optimized edit costs:', self.__edit_cost_constants) | |||||
print('SOD of the set median:', self.__sod_set_median) | |||||
print('SOD of the generalized median:', self.__sod_gen_median) | |||||
print('Distance in kernel space for set median:', self.__k_dis_set_median) | |||||
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||||
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||||
print('Time to optimize edit costs:', self.__runtime_optimize_ec) | |||||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||||
print('Total time:', self.__runtime_total) | |||||
print('Total number of iterations for optimizing:', self.__itrs) | |||||
print('Total number of updating edit costs:', self.__num_updates_ecs) | |||||
print('Is optimization of edit costs converged:', self.__converged) | |||||
print('The optimized edit costs:', self._edit_cost_constants) | |||||
print('SOD of the set median:', self._sod_set_median) | |||||
print('SOD of the generalized median:', self._sod_gen_median) | |||||
print('Distance in kernel space for set median:', self._k_dis_set_median) | |||||
print('Distance in kernel space for generalized median:', self._k_dis_gen_median) | |||||
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) | |||||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||||
print('Time to optimize edit costs:', self._runtime_optimize_ec) | |||||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||||
print('Total time:', self._runtime_total) | |||||
print('Total number of iterations for optimizing:', self._itrs) | |||||
print('Total number of updating edit costs:', self._num_updates_ecs) | |||||
print('Is optimization of edit costs converged:', self._converged) | |||||
print('================================================================================') | print('================================================================================') | ||||
print() | print() | ||||
def get_results(self): | def get_results(self): | ||||
results = {} | results = {} | ||||
results['edit_cost_constants'] = self.__edit_cost_constants | |||||
results['runtime_precompute_gm'] = self.__runtime_precompute_gm | |||||
results['runtime_optimize_ec'] = self.__runtime_optimize_ec | |||||
results['runtime_generate_preimage'] = self.__runtime_generate_preimage | |||||
results['runtime_total'] = self.__runtime_total | |||||
results['sod_set_median'] = self.__sod_set_median | |||||
results['sod_gen_median'] = self.__sod_gen_median | |||||
results['k_dis_set_median'] = self.__k_dis_set_median | |||||
results['k_dis_gen_median'] = self.__k_dis_gen_median | |||||
results['k_dis_dataset'] = self.__k_dis_dataset | |||||
results['itrs'] = self.__itrs | |||||
results['converged'] = self.__converged | |||||
results['num_updates_ecc'] = self.__num_updates_ecs | |||||
results['edit_cost_constants'] = self._edit_cost_constants | |||||
results['runtime_precompute_gm'] = self._runtime_precompute_gm | |||||
results['runtime_optimize_ec'] = self._runtime_optimize_ec | |||||
results['runtime_generate_preimage'] = self._runtime_generate_preimage | |||||
results['runtime_total'] = self._runtime_total | |||||
results['sod_set_median'] = self._sod_set_median | |||||
results['sod_gen_median'] = self._sod_gen_median | |||||
results['k_dis_set_median'] = self._k_dis_set_median | |||||
results['k_dis_gen_median'] = self._k_dis_gen_median | |||||
results['k_dis_dataset'] = self._k_dis_dataset | |||||
results['itrs'] = self._itrs | |||||
results['converged'] = self._converged | |||||
results['num_updates_ecc'] = self._num_updates_ecs | |||||
results['mge'] = {} | results['mge'] = {} | ||||
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||||
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||||
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||||
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased() | |||||
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased() | |||||
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents() | |||||
return results | return results | ||||
def __optimize_edit_cost_vector(self): | |||||
def _optimize_edit_cost_vector(self): | |||||
"""Learn edit cost vector. | """Learn edit cost vector. | ||||
""" | """ | ||||
# Initialize label costs randomly. | # Initialize label costs randomly. | ||||
if self.__init_method == 'random': | |||||
if self._init_method == 'random': | |||||
# Initialize label costs. | # Initialize label costs. | ||||
self.__initialize_label_costs() | |||||
self._initialize_label_costs() | |||||
# Optimize edit cost matrices. | # Optimize edit cost matrices. | ||||
self.__optimize_ecm_by_kernel_distances() | |||||
self._optimize_ecm_by_kernel_distances() | |||||
# Initialize all label costs with the same value. | # Initialize all label costs with the same value. | ||||
elif self.__init_method == 'uniform': # random | |||||
elif self._init_method == 'uniform': # random | |||||
pass | pass | ||||
elif self.__fit_method == 'random': # random | |||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
self.__edit_cost_constants = random.sample(range(1, 1000), 3) | |||||
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] | |||||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||||
elif self._fit_method == 'random': # random | |||||
if self._ged_options['edit_cost'] == 'LETTER': | |||||
self._edit_cost_constants = random.sample(range(1, 1000), 3) | |||||
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants] | |||||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||||
random.seed(time.time()) | random.seed(time.time()) | ||||
self.__edit_cost_constants = random.sample(range(1, 1000), 5) | |||||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||||
self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
self._edit_cost_constants = random.sample(range(1, 1000), 5) | |||||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||||
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||||
self._edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||||
if self._dataset.node_attrs == []: | if self._dataset.node_attrs == []: | ||||
self.__edit_cost_constants[2] = 0 | |||||
self._edit_cost_constants[2] = 0 | |||||
if self._dataset.edge_attrs == []: | if self._dataset.edge_attrs == []: | ||||
self.__edit_cost_constants[5] = 0 | |||||
self._edit_cost_constants[5] = 0 | |||||
else: | else: | ||||
self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||||
self._edit_cost_constants = random.sample(range(1, 1000), 6) | |||||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('edit cost constants used:', self.__edit_cost_constants) | |||||
elif self.__fit_method == 'expert': # expert | |||||
if self.__init_ecc is None: | |||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
self.__edit_cost_constants = [0.9, 1.7, 0.75] | |||||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||||
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
print('edit cost constants used:', self._edit_cost_constants) | |||||
elif self._fit_method == 'expert': # expert | |||||
if self._init_ecc is None: | |||||
if self._ged_options['edit_cost'] == 'LETTER': | |||||
self._edit_cost_constants = [0.9, 1.7, 0.75] | |||||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||||
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
else: | else: | ||||
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] | |||||
self._edit_cost_constants = [3, 3, 1, 3, 3, 1] | |||||
else: | else: | ||||
self.__edit_cost_constants = self.__init_ecc | |||||
elif self.__fit_method == 'k-graphs': | |||||
if self.__init_ecc is None: | |||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
self.__init_ecc = [0.9, 1.7, 0.75] | |||||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||||
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||||
self.__init_ecc = [0, 0, 1, 1, 1, 0] | |||||
self._edit_cost_constants = self._init_ecc | |||||
elif self._fit_method == 'k-graphs': | |||||
if self._init_ecc is None: | |||||
if self._ged_options['edit_cost'] == 'LETTER': | |||||
self._init_ecc = [0.9, 1.7, 0.75] | |||||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||||
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||||
self._init_ecc = [0, 0, 1, 1, 1, 0] | |||||
if self._dataset.node_attrs == []: | if self._dataset.node_attrs == []: | ||||
self.__init_ecc[2] = 0 | |||||
self._init_ecc[2] = 0 | |||||
if self._dataset.edge_attrs == []: | if self._dataset.edge_attrs == []: | ||||
self.__init_ecc[5] = 0 | |||||
self._init_ecc[5] = 0 | |||||
else: | else: | ||||
self.__init_ecc = [3, 3, 1, 3, 3, 1] | |||||
self._init_ecc = [3, 3, 1, 3, 3, 1] | |||||
# optimize on the k-graph subset. | # optimize on the k-graph subset. | ||||
self.__optimize_ecm_by_kernel_distances() | |||||
elif self.__fit_method == 'whole-dataset': | |||||
if self.__init_ecc is None: | |||||
if self.__ged_options['edit_cost'] == 'LETTER': | |||||
self.__init_ecc = [0.9, 1.7, 0.75] | |||||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||||
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
self._optimize_ecm_by_kernel_distances() | |||||
elif self._fit_method == 'whole-dataset': | |||||
if self._init_ecc is None: | |||||
if self._ged_options['edit_cost'] == 'LETTER': | |||||
self._init_ecc = [0.9, 1.7, 0.75] | |||||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||||
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||||
else: | else: | ||||
self.__init_ecc = [3, 3, 1, 3, 3, 1] | |||||
self._init_ecc = [3, 3, 1, 3, 3, 1] | |||||
# optimizeon the whole set. | # optimizeon the whole set. | ||||
self.__optimize_ecc_by_kernel_distances() | |||||
elif self.__fit_method == 'precomputed': | |||||
self._optimize_ecc_by_kernel_distances() | |||||
elif self._fit_method == 'precomputed': | |||||
pass | pass | ||||
def __initialize_label_costs(self): | |||||
self.__initialize_node_label_costs() | |||||
self.__initialize_edge_label_costs() | |||||
def _initialize_label_costs(self): | |||||
self._initialize_node_label_costs() | |||||
self._initialize_edge_label_costs() | |||||
def __initialize_node_label_costs(self): | |||||
def _initialize_node_label_costs(self): | |||||
# Get list of node labels. | # Get list of node labels. | ||||
nls = self._dataset.get_all_node_labels() | nls = self._dataset.get_all_node_labels() | ||||
# Generate random costs. | # Generate random costs. | ||||
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) | nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) | ||||
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) | rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) | ||||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | ||||
self.__node_label_costs = rand_costs | |||||
self._node_label_costs = rand_costs | |||||
def __initialize_edge_label_costs(self): | |||||
def _initialize_edge_label_costs(self): | |||||
# Get list of edge labels. | # Get list of edge labels. | ||||
els = self._dataset.get_all_edge_labels() | els = self._dataset.get_all_edge_labels() | ||||
# Generate random costs. | # Generate random costs. | ||||
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) | nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) | ||||
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) | rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) | ||||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | ||||
self.__edge_label_costs = rand_costs | |||||
self._edge_label_costs = rand_costs | |||||
def __optimize_ecm_by_kernel_distances(self): | |||||
def _optimize_ecm_by_kernel_distances(self): | |||||
# compute distances in feature space. | # compute distances in feature space. | ||||
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() | dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() | ||||
dis_k_vec = [] | dis_k_vec = [] | ||||
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
dis_k_vec = np.array(dis_k_vec) | dis_k_vec = np.array(dis_k_vec) | ||||
# Set GEDEnv options. | # Set GEDEnv options. | ||||
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||||
# self.__edit_cost_constants = self.__init_ecc | |||||
options = self.__ged_options.copy() | |||||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed. | |||||
# graphs = [self._clean_graph(g) for g in self._dataset.graphs] | |||||
# self._edit_cost_constants = self._init_ecc | |||||
options = self._ged_options.copy() | |||||
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed. | |||||
options['node_labels'] = self._dataset.node_labels | options['node_labels'] = self._dataset.node_labels | ||||
options['edge_labels'] = self._dataset.edge_labels | options['edge_labels'] = self._dataset.edge_labels | ||||
# options['node_attrs'] = self._dataset.node_attrs | # options['node_attrs'] = self._dataset.node_attrs | ||||
# options['edge_attrs'] = self._dataset.edge_attrs | # options['edge_attrs'] = self._dataset.edge_attrs | ||||
options['node_label_costs'] = self.__node_label_costs | |||||
options['edge_label_costs'] = self.__edge_label_costs | |||||
options['node_label_costs'] = self._node_label_costs | |||||
options['edge_label_costs'] = self._edge_label_costs | |||||
# Learner cost matrices. | # Learner cost matrices. | ||||
# Initialize cost learner. | # Initialize cost learner. | ||||
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo | |||||
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec) | |||||
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo | |||||
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec) | |||||
# Run cost learner. | # Run cost learner. | ||||
cml.update(dis_k_vec, self._dataset.graphs, options) | cml.update(dis_k_vec, self._dataset.graphs, options) | ||||
# Get results. | # Get results. | ||||
results = cml.get_results() | results = cml.get_results() | ||||
self.__converged = results['converged'] | |||||
self.__itrs = results['itrs'] | |||||
self.__num_updates_ecs = results['num_updates_ecs'] | |||||
self._converged = results['converged'] | |||||
self._itrs = results['itrs'] | |||||
self._num_updates_ecs = results['num_updates_ecs'] | |||||
cost_list = results['cost_list'] | cost_list = results['cost_list'] | ||||
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)] | |||||
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):] | |||||
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)] | |||||
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):] | |||||
def __gmg_bcu(self): | |||||
def _gmg_bcu(self): | |||||
""" | """ | ||||
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). | The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). | ||||
@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
# Set up the ged environment. | # Set up the ged environment. | ||||
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. | ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. | ||||
# gedlibpy.restart_env() | # gedlibpy.restart_env() | ||||
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants) | |||||
graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||||
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants) | |||||
graphs = [self._clean_graph(g) for g in self._dataset.graphs] | |||||
for g in graphs: | for g in graphs: | ||||
ged_env.add_nx_graph(g, '') | ged_env.add_nx_graph(g, '') | ||||
graph_ids = ged_env.get_all_graph_ids() | graph_ids = ged_env.get_all_graph_ids() | ||||
node_labels = ged_env.get_all_node_labels() | node_labels = ged_env.get_all_node_labels() | ||||
edge_labels = ged_env.get_all_edge_labels() | edge_labels = ged_env.get_all_edge_labels() | ||||
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels)) | |||||
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels)) | |||||
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels)) | |||||
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels)) | |||||
ged_env.set_label_costs(node_label_costs, edge_label_costs) | ged_env.set_label_costs(node_label_costs, edge_label_costs) | ||||
set_median_id = ged_env.add_graph('set_median') | set_median_id = ged_env.add_graph('set_median') | ||||
gen_median_id = ged_env.add_graph('gen_median') | gen_median_id = ged_env.add_graph('gen_median') | ||||
ged_env.init(init_type=self.__ged_options['init_option']) | |||||
ged_env.init(init_type=self._ged_options['init_option']) | |||||
# Set up the madian graph estimator. | # Set up the madian graph estimator. | ||||
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||||
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) | |||||
options = self.__mge_options.copy() | |||||
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost'])) | |||||
self._mge.set_refine_method(self._ged_options['method'], self._ged_options) | |||||
options = self._mge_options.copy() | |||||
if not 'seed' in options: | if not 'seed' in options: | ||||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | ||||
options['parallel'] = self.__parallel | |||||
options['parallel'] = self._parallel | |||||
# Select the GED algorithm. | # Select the GED algorithm. | ||||
self.__mge.set_options(mge_options_to_string(options)) | |||||
self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
self._mge.set_options(mge_options_to_string(options)) | |||||
self._mge.set_label_names(node_labels=self._dataset.node_labels, | |||||
edge_labels=self._dataset.edge_labels, | edge_labels=self._dataset.edge_labels, | ||||
node_attrs=self._dataset.node_attrs, | node_attrs=self._dataset.node_attrs, | ||||
edge_attrs=self._dataset.edge_attrs) | edge_attrs=self._dataset.edge_attrs) | ||||
ged_options = self.__ged_options.copy() | |||||
if self.__parallel: | |||||
ged_options = self._ged_options.copy() | |||||
if self._parallel: | |||||
ged_options['threads'] = 1 | ged_options['threads'] = 1 | ||||
self.__mge.set_init_method(ged_options['method'], ged_options) | |||||
self.__mge.set_descent_method(ged_options['method'], ged_options) | |||||
self._mge.set_init_method(ged_options['method'], ged_options) | |||||
self._mge.set_descent_method(ged_options['method'], ged_options) | |||||
# Run the estimator. | # Run the estimator. | ||||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||||
self._mge.run(graph_ids, set_median_id, gen_median_id) | |||||
# Get SODs. | # Get SODs. | ||||
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||||
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||||
self._sod_set_median = self._mge.get_sum_of_distances('initialized') | |||||
self._sod_gen_median = self._mge.get_sum_of_distances('converged') | |||||
# Get median graphs. | # Get median graphs. | ||||
self.__set_median = ged_env.get_nx_graph(set_median_id) | |||||
self.__gen_median = ged_env.get_nx_graph(gen_median_id) | |||||
self._set_median = ged_env.get_nx_graph(set_median_id) | |||||
self._gen_median = ged_env.get_nx_graph(gen_median_id) | |||||
def __compute_distances_to_true_median(self): | |||||
def _compute_distances_to_true_median(self): | |||||
# compute distance in kernel space for set median. | # compute distance in kernel space for set median. | ||||
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | |||||
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | |||||
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options) | |||||
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options) | |||||
if self._kernel_options['normalize']: | if self._kernel_options['normalize']: | ||||
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||||
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||||
kernel_sm = 1 | kernel_sm = 1 | ||||
# @todo: not correct kernel value | # @todo: not correct kernel value | ||||
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | ||||
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) | gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) | ||||
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||||
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | ||||
gram_with_sm, withterm3=False) | gram_with_sm, withterm3=False) | ||||
# compute distance in kernel space for generalized median. | # compute distance in kernel space for generalized median. | ||||
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | |||||
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | |||||
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options) | |||||
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options) | |||||
if self._kernel_options['normalize']: | if self._kernel_options['normalize']: | ||||
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||||
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||||
kernel_gm = 1 | kernel_gm = 1 | ||||
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | ||||
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) | gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) | ||||
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||||
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | ||||
gram_with_gm, withterm3=False) | gram_with_gm, withterm3=False) | ||||
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | ||||
gram_with_gm, withterm3=False)) | gram_with_gm, withterm3=False)) | ||||
idx_k_dis_median_set_min = np.argmin(k_dis_median_set) | idx_k_dis_median_set_min = np.argmin(k_dis_median_set) | ||||
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] | |||||
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() | |||||
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] | |||||
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print() | print() | ||||
print('distance in kernel space for set median:', self.__k_dis_set_median) | |||||
print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||||
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||||
print('distance in kernel space for set median:', self._k_dis_set_median) | |||||
print('distance in kernel space for generalized median:', self._k_dis_gen_median) | |||||
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) | |||||
print('distance in kernel space for each graph in median set:', k_dis_median_set) | print('distance in kernel space for each graph in median set:', k_dis_median_set) | ||||
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated. | |||||
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||||
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated. | |||||
""" | """ | ||||
Cleans node and edge labels and attributes of the given graph. | Cleans node and edge labels and attributes of the given graph. | ||||
""" | """ | ||||
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||||
@property | @property | ||||
def mge(self): | def mge(self): | ||||
return self.__mge | |||||
return self._mge | |||||
@property | @property | ||||
def ged_options(self): | def ged_options(self): | ||||
return self.__ged_options | |||||
return self._ged_options | |||||
@ged_options.setter | @ged_options.setter | ||||
def ged_options(self, value): | def ged_options(self, value): | ||||
self.__ged_options = value | |||||
self._ged_options = value | |||||
@property | @property | ||||
def mge_options(self): | def mge_options(self): | ||||
return self.__mge_options | |||||
return self._mge_options | |||||
@mge_options.setter | @mge_options.setter | ||||
def mge_options(self, value): | def mge_options(self, value): | ||||
self.__mge_options = value | |||||
self._mge_options = value | |||||
@property | @property | ||||
def fit_method(self): | def fit_method(self): | ||||
return self.__fit_method | |||||
return self._fit_method | |||||
@fit_method.setter | @fit_method.setter | ||||
def fit_method(self, value): | def fit_method(self, value): | ||||
self.__fit_method = value | |||||
self._fit_method = value | |||||
@property | @property | ||||
def init_ecc(self): | def init_ecc(self): | ||||
return self.__init_ecc | |||||
return self._init_ecc | |||||
@init_ecc.setter | @init_ecc.setter | ||||
def init_ecc(self, value): | def init_ecc(self, value): | ||||
self.__init_ecc = value | |||||
self._init_ecc = value | |||||
@property | @property | ||||
def set_median(self): | def set_median(self): | ||||
return self.__set_median | |||||
return self._set_median | |||||
@property | @property | ||||
def gen_median(self): | def gen_median(self): | ||||
return self.__gen_median | |||||
return self._gen_median | |||||
@property | @property | ||||
def best_from_dataset(self): | def best_from_dataset(self): | ||||
return self.__best_from_dataset | |||||
return self._best_from_dataset | |||||
@property | @property | ||||
def gram_matrix_unnorm(self): | def gram_matrix_unnorm(self): | ||||
return self.__gram_matrix_unnorm | |||||
return self._gram_matrix_unnorm | |||||
@gram_matrix_unnorm.setter | @gram_matrix_unnorm.setter | ||||
def gram_matrix_unnorm(self, value): | def gram_matrix_unnorm(self, value): | ||||
self.__gram_matrix_unnorm = value | |||||
self._gram_matrix_unnorm = value |
@@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
def __init__(self, dataset=None): | def __init__(self, dataset=None): | ||||
PreimageGenerator.__init__(self, dataset=dataset) | PreimageGenerator.__init__(self, dataset=dataset) | ||||
# arguments to set. | # arguments to set. | ||||
self.__k = 5 # number of nearest neighbors of phi in D_N. | |||||
self.__r_max = 10 # maximum number of iterations. | |||||
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. | |||||
self.__alphas = None # weights of linear combinations of points in kernel space. | |||||
self.__parallel = True | |||||
self.__n_jobs = multiprocessing.cpu_count() | |||||
self.__time_limit_in_sec = 0 | |||||
self.__max_itrs = 20 | |||||
self._k = 5 # number of nearest neighbors of phi in D_N. | |||||
self._r_max = 10 # maximum number of iterations. | |||||
self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. | |||||
self._alphas = None # weights of linear combinations of points in kernel space. | |||||
self._parallel = True | |||||
self._n_jobs = multiprocessing.cpu_count() | |||||
self._time_limit_in_sec = 0 | |||||
self._max_itrs = 20 | |||||
# values to compute. | # values to compute. | ||||
self.__runtime_generate_preimage = None | |||||
self.__runtime_total = None | |||||
self.__preimage = None | |||||
self.__best_from_dataset = None | |||||
self.__k_dis_preimage = None | |||||
self.__k_dis_dataset = None | |||||
self.__itrs = 0 | |||||
self.__converged = False # @todo | |||||
self.__num_updates = 0 | |||||
self._runtime_generate_preimage = None | |||||
self._runtime_total = None | |||||
self._preimage = None | |||||
self._best_from_dataset = None | |||||
self._k_dis_preimage = None | |||||
self._k_dis_dataset = None | |||||
self._itrs = 0 | |||||
self._converged = False # @todo | |||||
self._num_updates = 0 | |||||
# values that can be set or to be computed. | # values that can be set or to be computed. | ||||
self.__gram_matrix_unnorm = None | |||||
self.__runtime_precompute_gm = None | |||||
self._gram_matrix_unnorm = None | |||||
self._runtime_precompute_gm = None | |||||
def set_options(self, **kwargs): | def set_options(self, **kwargs): | ||||
self._kernel_options = kwargs.get('kernel_options', {}) | self._kernel_options = kwargs.get('kernel_options', {}) | ||||
self._graph_kernel = kwargs.get('graph_kernel', None) | self._graph_kernel = kwargs.get('graph_kernel', None) | ||||
self._verbose = kwargs.get('verbose', 2) | self._verbose = kwargs.get('verbose', 2) | ||||
self.__k = kwargs.get('k', 5) | |||||
self.__r_max = kwargs.get('r_max', 10) | |||||
self.__l = kwargs.get('l', 500) | |||||
self.__alphas = kwargs.get('alphas', None) | |||||
self.__parallel = kwargs.get('parallel', True) | |||||
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||||
self.__max_itrs = kwargs.get('max_itrs', 20) | |||||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||||
self._k = kwargs.get('k', 5) | |||||
self._r_max = kwargs.get('r_max', 10) | |||||
self._l = kwargs.get('l', 500) | |||||
self._alphas = kwargs.get('alphas', None) | |||||
self._parallel = kwargs.get('parallel', True) | |||||
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||||
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||||
self._max_itrs = kwargs.get('max_itrs', 20) | |||||
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||||
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||||
def run(self): | def run(self): | ||||
@@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
start = time.time() | start = time.time() | ||||
# 1. precompute gram matrix. | # 1. precompute gram matrix. | ||||
if self.__gram_matrix_unnorm is None: | |||||
if self._gram_matrix_unnorm is None: | |||||
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | ||||
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||||
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||||
end_precompute_gm = time.time() | end_precompute_gm = time.time() | ||||
self.__runtime_precompute_gm = end_precompute_gm - start | |||||
self._runtime_precompute_gm = end_precompute_gm - start | |||||
else: | else: | ||||
if self.__runtime_precompute_gm is None: | |||||
if self._runtime_precompute_gm is None: | |||||
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | ||||
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||||
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm | |||||
if self._kernel_options['normalize']: | if self._kernel_options['normalize']: | ||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) | |||||
else: | else: | ||||
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||||
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) | |||||
end_precompute_gm = time.time() | end_precompute_gm = time.time() | ||||
start -= self.__runtime_precompute_gm | |||||
start -= self._runtime_precompute_gm | |||||
# 2. compute k nearest neighbors of phi in D_N. | # 2. compute k nearest neighbors of phi in D_N. | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('\nstart computing k nearest neighbors of phi in D_N...\n') | print('\nstart computing k nearest neighbors of phi in D_N...\n') | ||||
D_N = self._dataset.graphs | D_N = self._dataset.graphs | ||||
if self.__alphas is None: | |||||
self.__alphas = [1 / len(D_N)] * len(D_N) | |||||
if self._alphas is None: | |||||
self._alphas = [1 / len(D_N)] * len(D_N) | |||||
k_dis_list = [] # distance between g_star and each graph. | k_dis_list = [] # distance between g_star and each graph. | ||||
term3 = 0 | term3 = 0 | ||||
for i1, a1 in enumerate(self.__alphas): | |||||
for i2, a2 in enumerate(self.__alphas): | |||||
for i1, a1 in enumerate(self._alphas): | |||||
for i2, a2 in enumerate(self._alphas): | |||||
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] | term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] | ||||
for idx in range(len(D_N)): | for idx in range(len(D_N)): | ||||
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) | |||||
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) | |||||
# sort. | # sort. | ||||
sort_idx = np.argsort(k_dis_list) | sort_idx = np.argsort(k_dis_list) | ||||
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. | |||||
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances. | |||||
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | ||||
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N | g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N | ||||
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. | |||||
self.__k_dis_dataset = dis_gs[0] | |||||
self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. | |||||
self._k_dis_dataset = dis_gs[0] | |||||
if self.__k_dis_dataset == 0: # get the exact pre-image. | |||||
if self._k_dis_dataset == 0: # get the exact pre-image. | |||||
end_generate_preimage = time.time() | end_generate_preimage = time.time() | ||||
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
self.__runtime_total = end_generate_preimage - start | |||||
self.__preimage = self.__best_from_dataset.copy() | |||||
self.__k_dis_preimage = self.__k_dis_dataset | |||||
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
self._runtime_total = end_generate_preimage - start | |||||
self._preimage = self._best_from_dataset.copy() | |||||
self._k_dis_preimage = self._k_dis_dataset | |||||
if self._verbose: | if self._verbose: | ||||
print() | print() | ||||
print('=============================================================================') | print('=============================================================================') | ||||
print('The exact pre-image is found from the input dataset.') | print('The exact pre-image is found from the input dataset.') | ||||
print('-----------------------------------------------------------------------------') | print('-----------------------------------------------------------------------------') | ||||
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) | |||||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||||
print('Total time:', self.__runtime_total) | |||||
print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset) | |||||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||||
print('Total time:', self._runtime_total) | |||||
print('=============================================================================') | print('=============================================================================') | ||||
print() | print() | ||||
return | return | ||||
dhat = dis_gs[0] # the nearest distance | dhat = dis_gs[0] # the nearest distance | ||||
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors | |||||
Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors | |||||
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] | Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] | ||||
# 3. start iterations. | # 3. start iterations. | ||||
@@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
dihat_list = [] | dihat_list = [] | ||||
r = 0 | r = 0 | ||||
dis_of_each_itr = [dhat] | dis_of_each_itr = [dhat] | ||||
if self.__parallel: | |||||
if self._parallel: | |||||
self._kernel_options['parallel'] = None | self._kernel_options['parallel'] = None | ||||
self.__itrs = 0 | |||||
self.__num_updates = 0 | |||||
timer = Timer(self.__time_limit_in_sec) | |||||
while not self.__termination_criterion_met(timer, self.__itrs, r): | |||||
self._itrs = 0 | |||||
self._num_updates = 0 | |||||
timer = Timer(self._time_limit_in_sec) | |||||
while not self._termination_criterion_met(timer, self._itrs, r): | |||||
print('\n- r =', r) | print('\n- r =', r) | ||||
found = False | found = False | ||||
dis_bests = dis_gs + dihat_list | dis_bests = dis_gs + dihat_list | ||||
@@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
nb_modif = 1 | nb_modif = 1 | ||||
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): | for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): | ||||
nb_modif *= nb / (fdgs_max - idx) | nb_modif *= nb / (fdgs_max - idx) | ||||
while fdgs_max < nb_vpairs_min and nb_modif < self.__l: | |||||
while fdgs_max < nb_vpairs_min and nb_modif < self._l: | |||||
fdgs_max += 1 | fdgs_max += 1 | ||||
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max | nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max | ||||
nb_increase = int(fdgs_max - fdgs_max_old) | nb_increase = int(fdgs_max - fdgs_max_old) | ||||
@@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
for ig, gs in enumerate(Gs_nearest + gihat_list): | for ig, gs in enumerate(Gs_nearest + gihat_list): | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | ||||
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||||
gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||||
if found: | if found: | ||||
r = 0 | r = 0 | ||||
@@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
r += 1 | r += 1 | ||||
dis_of_each_itr.append(dhat) | dis_of_each_itr.append(dhat) | ||||
self.__itrs += 1 | |||||
self._itrs += 1 | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('Total number of iterations is', self.__itrs, '.') | |||||
print('The preimage is updated', self.__num_updates, 'times.') | |||||
print('Total number of iterations is', self._itrs, '.') | |||||
print('The preimage is updated', self._num_updates, 'times.') | |||||
print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | ||||
# get results and print. | # get results and print. | ||||
end_generate_preimage = time.time() | end_generate_preimage = time.time() | ||||
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
self.__runtime_total = end_generate_preimage - start | |||||
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
self.__k_dis_preimage = dhat | |||||
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||||
self._runtime_total = end_generate_preimage - start | |||||
self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||||
self._k_dis_preimage = dhat | |||||
if self._verbose: | if self._verbose: | ||||
print() | print() | ||||
print('=============================================================================') | print('=============================================================================') | ||||
print('Finished generation of preimages.') | print('Finished generation of preimages.') | ||||
print('-----------------------------------------------------------------------------') | print('-----------------------------------------------------------------------------') | ||||
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) | |||||
print('Distance in kernel space for the preimage:', self.__k_dis_preimage) | |||||
print('Total number of iterations for optimizing:', self.__itrs) | |||||
print('Total number of updating preimage:', self.__num_updates) | |||||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||||
print('Total time:', self.__runtime_total) | |||||
print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset) | |||||
print('Distance in kernel space for the preimage:', self._k_dis_preimage) | |||||
print('Total number of iterations for optimizing:', self._itrs) | |||||
print('Total number of updating preimage:', self._num_updates) | |||||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||||
print('Total time:', self._runtime_total) | |||||
print('=============================================================================') | print('=============================================================================') | ||||
print() | print() | ||||
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||||
if self.__parallel: | |||||
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||||
def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||||
if self._parallel: | |||||
gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||||
else: | else: | ||||
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||||
gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||||
return gnew, dhat, found | return gnew, dhat, found | ||||
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||||
def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||||
gnew = None | gnew = None | ||||
updated = False | updated = False | ||||
for trial in range(0, self.__l): | |||||
for trial in range(0, self._l): | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('---', trial + 1, 'trial out of', self.__l) | |||||
print('---', trial + 1, 'trial out of', self._l) | |||||
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||||
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) | |||||
# get the better graph preimage. | # get the better graph preimage. | ||||
if dnew <= dhat: # @todo: the new distance is smaller or also equal? | if dnew <= dhat: # @todo: the new distance is smaller or also equal? | ||||
@@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
found = True # found better or equally good graph. | found = True # found better or equally good graph. | ||||
if updated: | if updated: | ||||
self.__num_updates += 1 | |||||
self._num_updates += 1 | |||||
return gnew, dhat, found | return gnew, dhat, found | ||||
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||||
def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||||
gnew = None | gnew = None | ||||
len_itr = self.__l | |||||
len_itr = self._l | |||||
gnew_list = [None] * len_itr | gnew_list = [None] * len_itr | ||||
dnew_list = [None] * len_itr | dnew_list = [None] * len_itr | ||||
itr = range(0, len_itr) | itr = range(0, len_itr) | ||||
@@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
print('I am smaller!') | print('I am smaller!') | ||||
print('index (as in D_k U {gihat}) =', str(ig)) | print('index (as in D_k U {gihat}) =', str(ig)) | ||||
print('distance:', dhat, '->', dnew, '\n') | print('distance:', dhat, '->', dnew, '\n') | ||||
self.__num_updates += 1 | |||||
self._num_updates += 1 | |||||
else: | else: | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
print('I am equal!') | print('I am equal!') | ||||
@@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | ||||
trial = itr | trial = itr | ||||
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||||
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) | |||||
return trial, gtemp, dnew | return trial, gtemp, dnew | ||||
def __do_trial(self, g_init, fdgs, term3, trial): | |||||
def _do_trial(self, g_init, fdgs, term3, trial): | |||||
# add and delete edges. | # add and delete edges. | ||||
gtemp = g_init.copy() | gtemp = g_init.copy() | ||||
seed = (trial + int(time.time())) % (2 ** 32 - 1) | seed = (trial + int(time.time())) % (2 ** 32 - 1) | ||||
@@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator): | |||||
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | ||||
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | ||||
if self._kernel_options['normalize']: | if self._kernel_options['normalize']: | ||||
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||||
kernel_gtmp = 1 | kernel_gtmp = 1 | ||||
# @todo: not correct kernel value | # @todo: not correct kernel value | ||||
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | ||||
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | ||||
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||||
return gtemp, dnew | return gtemp, dnew | ||||
def get_results(self): | def get_results(self): | ||||
results = {} | results = {} | ||||
results['runtime_precompute_gm'] = self.__runtime_precompute_gm | |||||
results['runtime_generate_preimage'] = self.__runtime_generate_preimage | |||||
results['runtime_total'] = self.__runtime_total | |||||
results['k_dis_dataset'] = self.__k_dis_dataset | |||||
results['k_dis_preimage'] = self.__k_dis_preimage | |||||
results['itrs'] = self.__itrs | |||||
results['num_updates'] = self.__num_updates | |||||
results['runtime_precompute_gm'] = self._runtime_precompute_gm | |||||
results['runtime_generate_preimage'] = self._runtime_generate_preimage | |||||
results['runtime_total'] = self._runtime_total | |||||
results['k_dis_dataset'] = self._k_dis_dataset | |||||
results['k_dis_preimage'] = self._k_dis_preimage | |||||
results['itrs'] = self._itrs | |||||
results['num_updates'] = self._num_updates | |||||
return results | return results | ||||
def __termination_criterion_met(self, timer, itr, r): | |||||
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | |||||
# if self.__state == AlgorithmState.TERMINATED: | |||||
# self.__state = AlgorithmState.INITIALIZED | |||||
def _termination_criterion_met(self, timer, itr, r): | |||||
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): | |||||
# if self._state == AlgorithmState.TERMINATED: | |||||
# self._state = AlgorithmState.INITIALIZED | |||||
return True | return True | ||||
return (r >= self.__r_max if self.__r_max >= 0 else False) | |||||
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) | |||||
return (r >= self._r_max if self._r_max >= 0 else False) | |||||
# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) | |||||
@property | @property | ||||
def preimage(self): | def preimage(self): | ||||
return self.__preimage | |||||
return self._preimage | |||||
@property | @property | ||||
def best_from_dataset(self): | def best_from_dataset(self): | ||||
return self.__best_from_dataset | |||||
return self._best_from_dataset | |||||
@property | @property | ||||
def gram_matrix_unnorm(self): | def gram_matrix_unnorm(self): | ||||
return self.__gram_matrix_unnorm | |||||
return self._gram_matrix_unnorm | |||||
@gram_matrix_unnorm.setter | @gram_matrix_unnorm.setter | ||||
def gram_matrix_unnorm(self, value): | def gram_matrix_unnorm(self, value): | ||||
self.__gram_matrix_unnorm = value | |||||
self._gram_matrix_unnorm = value |
@@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||||
if save_results: | if save_results: | ||||
# create result files. | # create result files. | ||||
print('creating output files...') | print('creating output files...') | ||||
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
else: | else: | ||||
fn_output_detail, fn_output_summary = None, None | fn_output_detail, fn_output_summary = None, None | ||||
# 2. compute/load Gram matrix a priori. | # 2. compute/load Gram matrix a priori. | ||||
print('2. computing/loading Gram matrix...') | print('2. computing/loading Gram matrix...') | ||||
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) | |||||
gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) | |||||
sod_sm_list = [] | sod_sm_list = [] | ||||
sod_gm_list = [] | sod_gm_list = [] | ||||
@@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||||
# 3. get the best graph and remove it from median set. | # 3. get the best graph and remove it from median set. | ||||
print('3. getting and removing the best graph...') | print('3. getting and removing the best graph...') | ||||
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] | gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] | ||||
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) | |||||
best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) | |||||
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] | median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] | ||||
num_graphs -= 1 | num_graphs -= 1 | ||||
if num_graphs == 1: | if num_graphs == 1: | ||||
@@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||||
print('\ncomplete.\n') | print('\ncomplete.\n') | ||||
def __get_best_graph(Gn, gram_matrix): | |||||
def _get_best_graph(Gn, gram_matrix): | |||||
k_dis_list = [] | k_dis_list = [] | ||||
for idx in range(len(Gn)): | for idx in range(len(Gn)): | ||||
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) | k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) | ||||
@@ -313,7 +313,7 @@ def get_relations(sign): | |||||
return 'worse' | return 'worse' | ||||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
if load_gm == 'auto': | if load_gm == 'auto': | ||||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | ||||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | ||||
@@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
gram_matrix_unnorm_list = [] | gram_matrix_unnorm_list = [] | ||||
time_precompute_gm_list = [] | time_precompute_gm_list = [] | ||||
for dataset in datasets: | for dataset in datasets: | ||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | gram_matrix_unnorm_list.append(gram_matrix_unnorm) | ||||
time_precompute_gm_list.append(time_precompute_gm) | time_precompute_gm_list.append(time_precompute_gm) | ||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | ||||
@@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
gram_matrix_unnorm_list = [] | gram_matrix_unnorm_list = [] | ||||
time_precompute_gm_list = [] | time_precompute_gm_list = [] | ||||
for dataset in datasets: | for dataset in datasets: | ||||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) | |||||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | gram_matrix_unnorm_list.append(gram_matrix_unnorm) | ||||
time_precompute_gm_list.append(time_precompute_gm) | time_precompute_gm_list.append(time_precompute_gm) | ||||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | ||||
@@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||||
return gram_matrix_unnorm_list, time_precompute_gm_list | return gram_matrix_unnorm_list, time_precompute_gm_list | ||||
def __get_graph_kernel(dataset, kernel_options): | |||||
def _get_graph_kernel(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | ||||
node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
@@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options): | |||||
return graph_kernel | return graph_kernel | ||||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
def _compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
from gklearn.utils.utils import get_graph_kernel_by_name | from gklearn.utils.utils import get_graph_kernel_by_name | ||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | ||||
node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
@@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||||
return gram_matrix_unnorm, run_time | return gram_matrix_unnorm, run_time | ||||
def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
def _init_output_file(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | if not os.path.exists(dir_output): | ||||
os.makedirs(dir_output) | os.makedirs(dir_output) | ||||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | ||||
@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
if save_results: | if save_results: | ||||
# create result files. | # create result files. | ||||
print('creating output files...') | print('creating output files...') | ||||
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||||
sod_sm_list = [] | sod_sm_list = [] | ||||
sod_gm_list = [] | sod_gm_list = [] | ||||
@@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||||
print('\ncomplete.\n') | print('\ncomplete.\n') | ||||
def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): | |||||
def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): | |||||
if not os.path.exists(dir_output): | if not os.path.exists(dir_output): | ||||
os.makedirs(dir_output) | os.makedirs(dir_output) | ||||
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | ||||
@@ -109,45 +109,183 @@ def test_Marginalized(ds_name, parallel, remove_totters): | |||||
assert False, exception | assert False, exception | ||||
@pytest.mark.parametrize('ds_name', ['Acyclic']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_SylvesterEquation(ds_name, parallel): | |||||
"""Test sylvester equation kernel. | |||||
""" | |||||
from gklearn.kernels import SylvesterEquation | |||||
dataset = chooseDataset(ds_name) | |||||
try: | |||||
graph_kernel = SylvesterEquation( | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
weight=1e-3, | |||||
p=None, | |||||
q=None, | |||||
edge_weight=None) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
@pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_ConjugateGradient(ds_name, parallel): | |||||
"""Test conjugate gradient kernel. | |||||
""" | |||||
from gklearn.kernels import ConjugateGradient | |||||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
import functools | |||||
dataset = chooseDataset(ds_name) | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
try: | |||||
graph_kernel = ConjugateGradient( | |||||
node_labels=dataset.node_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_labels=dataset.edge_labels, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
weight=1e-3, | |||||
p=None, | |||||
q=None, | |||||
edge_weight=None, | |||||
node_kernels=sub_kernels, | |||||
edge_kernels=sub_kernels) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
@pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_FixedPoint(ds_name, parallel): | |||||
"""Test fixed point kernel. | |||||
""" | |||||
from gklearn.kernels import FixedPoint | |||||
from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | |||||
import functools | |||||
dataset = chooseDataset(ds_name) | |||||
mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | |||||
sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
try: | |||||
graph_kernel = FixedPoint( | |||||
node_labels=dataset.node_labels, | |||||
node_attrs=dataset.node_attrs, | |||||
edge_labels=dataset.edge_labels, | |||||
edge_attrs=dataset.edge_attrs, | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
weight=1e-3, | |||||
p=None, | |||||
q=None, | |||||
edge_weight=None, | |||||
node_kernels=sub_kernels, | |||||
edge_kernels=sub_kernels) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
@pytest.mark.parametrize('ds_name', ['Acyclic']) | |||||
@pytest.mark.parametrize('sub_kernel', ['exp', 'geo']) | |||||
@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
def test_SpectralDecomposition(ds_name, sub_kernel, parallel): | |||||
"""Test spectral decomposition kernel. | |||||
""" | |||||
from gklearn.kernels import SpectralDecomposition | |||||
dataset = chooseDataset(ds_name) | |||||
try: | |||||
graph_kernel = SpectralDecomposition( | |||||
ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
weight=1e-3, | |||||
p=None, | |||||
q=None, | |||||
edge_weight=None, | |||||
sub_kernel=sub_kernel) | |||||
gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
except Exception as exception: | |||||
assert False, exception | |||||
# @pytest.mark.parametrize( | # @pytest.mark.parametrize( | ||||
# 'compute_method,ds_name,sub_kernel', | # 'compute_method,ds_name,sub_kernel', | ||||
# [ | # [ | ||||
# # ('sylvester', 'Alkane', None), | |||||
# # ('conjugate', 'Alkane', None), | |||||
# # ('conjugate', 'AIDS', None), | |||||
# # ('fp', 'Alkane', None), | |||||
# # ('fp', 'AIDS', None), | |||||
# ('sylvester', 'Alkane', None), | |||||
# ('conjugate', 'Alkane', None), | |||||
# ('conjugate', 'AIDS', None), | |||||
# ('fp', 'Alkane', None), | |||||
# ('fp', 'AIDS', None), | |||||
# ('spectral', 'Alkane', 'exp'), | # ('spectral', 'Alkane', 'exp'), | ||||
# ('spectral', 'Alkane', 'geo'), | # ('spectral', 'Alkane', 'geo'), | ||||
# ] | # ] | ||||
# ) | # ) | ||||
# #@pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
# def test_randomwalkkernel(ds_name, compute_method, sub_kernel): | |||||
# """Test random walk kernel kernel. | |||||
# @pytest.mark.parametrize('parallel', ['imap_unordered', None]) | |||||
# def test_RandomWalk(ds_name, compute_method, sub_kernel, parallel): | |||||
# """Test random walk kernel. | |||||
# """ | # """ | ||||
# from gklearn.kernels.randomWalkKernel import randomwalkkernel | |||||
# from gklearn.kernels import RandomWalk | |||||
# from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct | ||||
# import functools | # import functools | ||||
# Gn, y = chooseDataset(ds_name) | |||||
# | |||||
# dataset = chooseDataset(ds_name) | |||||
# mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) | ||||
# sub_kernels = [{'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}] | |||||
# try: | |||||
# Kmatrix, run_time, idx = randomwalkkernel(Gn, | |||||
# compute_method=compute_method, | |||||
# weight=1e-3, | |||||
# p=None, | |||||
# q=None, | |||||
# edge_weight=None, | |||||
# node_kernels=sub_kernels, | |||||
# edge_kernels=sub_kernels, | |||||
# node_label='atom', | |||||
# edge_label='bond_type', | |||||
# sub_kernel=sub_kernel, | |||||
# # parallel=parallel, | |||||
# n_jobs=multiprocessing.cpu_count(), | |||||
# verbose=True) | |||||
# sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} | |||||
# # try: | |||||
# graph_kernel = RandomWalk(node_labels=dataset.node_labels, | |||||
# node_attrs=dataset.node_attrs, | |||||
# edge_labels=dataset.edge_labels, | |||||
# edge_attrs=dataset.edge_attrs, | |||||
# ds_infos=dataset.get_dataset_infos(keys=['directed']), | |||||
# compute_method=compute_method, | |||||
# weight=1e-3, | |||||
# p=None, | |||||
# q=None, | |||||
# edge_weight=None, | |||||
# node_kernels=sub_kernels, | |||||
# edge_kernels=sub_kernels, | |||||
# sub_kernel=sub_kernel) | |||||
# gram_matrix, run_time = graph_kernel.compute(dataset.graphs, | |||||
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
# kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], | |||||
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
# kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], | |||||
# parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) | |||||
# except Exception as exception: | # except Exception as exception: | ||||
# assert False, exception | # assert False, exception | ||||
@@ -296,4 +434,9 @@ def test_WLSubtree(ds_name, parallel): | |||||
if __name__ == "__main__": | if __name__ == "__main__": | ||||
# test_spkernel('Alkane', 'imap_unordered') | # test_spkernel('Alkane', 'imap_unordered') | ||||
test_StructuralSP('Fingerprint_edge', 'imap_unordered') | |||||
# test_StructuralSP('Fingerprint_edge', 'imap_unordered') | |||||
test_WLSubtree('Acyclic', 'imap_unordered') | |||||
# test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered') | |||||
# test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered') | |||||
# test_RandomWalk('Acyclic', 'fp', None, None) | |||||
# test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered') |
@@ -16,54 +16,54 @@ class Dataset(object): | |||||
def __init__(self, filename=None, filename_targets=None, **kwargs): | def __init__(self, filename=None, filename_targets=None, **kwargs): | ||||
if filename is None: | if filename is None: | ||||
self.__graphs = None | |||||
self.__targets = None | |||||
self.__node_labels = None | |||||
self.__edge_labels = None | |||||
self.__node_attrs = None | |||||
self.__edge_attrs = None | |||||
self._graphs = None | |||||
self._targets = None | |||||
self._node_labels = None | |||||
self._edge_labels = None | |||||
self._node_attrs = None | |||||
self._edge_attrs = None | |||||
else: | else: | ||||
self.load_dataset(filename, filename_targets=filename_targets, **kwargs) | self.load_dataset(filename, filename_targets=filename_targets, **kwargs) | ||||
self.__substructures = None | |||||
self.__node_label_dim = None | |||||
self.__edge_label_dim = None | |||||
self.__directed = None | |||||
self.__dataset_size = None | |||||
self.__total_node_num = None | |||||
self.__ave_node_num = None | |||||
self.__min_node_num = None | |||||
self.__max_node_num = None | |||||
self.__total_edge_num = None | |||||
self.__ave_edge_num = None | |||||
self.__min_edge_num = None | |||||
self.__max_edge_num = None | |||||
self.__ave_node_degree = None | |||||
self.__min_node_degree = None | |||||
self.__max_node_degree = None | |||||
self.__ave_fill_factor = None | |||||
self.__min_fill_factor = None | |||||
self.__max_fill_factor = None | |||||
self.__node_label_nums = None | |||||
self.__edge_label_nums = None | |||||
self.__node_attr_dim = None | |||||
self.__edge_attr_dim = None | |||||
self.__class_number = None | |||||
self._substructures = None | |||||
self._node_label_dim = None | |||||
self._edge_label_dim = None | |||||
self._directed = None | |||||
self._dataset_size = None | |||||
self._total_node_num = None | |||||
self._ave_node_num = None | |||||
self._min_node_num = None | |||||
self._max_node_num = None | |||||
self._total_edge_num = None | |||||
self._ave_edge_num = None | |||||
self._min_edge_num = None | |||||
self._max_edge_num = None | |||||
self._ave_node_degree = None | |||||
self._min_node_degree = None | |||||
self._max_node_degree = None | |||||
self._ave_fill_factor = None | |||||
self._min_fill_factor = None | |||||
self._max_fill_factor = None | |||||
self._node_label_nums = None | |||||
self._edge_label_nums = None | |||||
self._node_attr_dim = None | |||||
self._edge_attr_dim = None | |||||
self._class_number = None | |||||
def load_dataset(self, filename, filename_targets=None, **kwargs): | def load_dataset(self, filename, filename_targets=None, **kwargs): | ||||
self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) | |||||
self.__node_labels = label_names['node_labels'] | |||||
self.__node_attrs = label_names['node_attrs'] | |||||
self.__edge_labels = label_names['edge_labels'] | |||||
self.__edge_attrs = label_names['edge_attrs'] | |||||
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) | |||||
self._node_labels = label_names['node_labels'] | |||||
self._node_attrs = label_names['node_attrs'] | |||||
self._edge_labels = label_names['edge_labels'] | |||||
self._edge_attrs = label_names['edge_attrs'] | |||||
self.clean_labels() | self.clean_labels() | ||||
def load_graphs(self, graphs, targets=None): | def load_graphs(self, graphs, targets=None): | ||||
# this has to be followed by set_labels(). | # this has to be followed by set_labels(). | ||||
self.__graphs = graphs | |||||
self.__targets = targets | |||||
self._graphs = graphs | |||||
self._targets = targets | |||||
# self.set_labels_attrs() # @todo | # self.set_labels_attrs() # @todo | ||||
@@ -71,108 +71,108 @@ class Dataset(object): | |||||
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | ||||
if ds_name == 'Acyclic': | if ds_name == 'Acyclic': | ||||
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' | ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'AIDS': | elif ds_name == 'AIDS': | ||||
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' | ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Alkane': | elif ds_name == 'Alkane': | ||||
ds_file = current_path + '../../datasets/Alkane/dataset.ds' | ds_file = current_path + '../../datasets/Alkane/dataset.ds' | ||||
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' | fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) | |||||
elif ds_name == 'COIL-DEL': | elif ds_name == 'COIL-DEL': | ||||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'COIL-RAG': | elif ds_name == 'COIL-RAG': | ||||
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' | ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'COLORS-3': | elif ds_name == 'COLORS-3': | ||||
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Cuneiform': | elif ds_name == 'Cuneiform': | ||||
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'DD': | elif ds_name == 'DD': | ||||
ds_file = current_path + '../../datasets/DD/DD_A.txt' | ds_file = current_path + '../../datasets/DD/DD_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'ENZYMES': | elif ds_name == 'ENZYMES': | ||||
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Fingerprint': | elif ds_name == 'Fingerprint': | ||||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'FRANKENSTEIN': | elif ds_name == 'FRANKENSTEIN': | ||||
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-high': # node non-symb | elif ds_name == 'Letter-high': # node non-symb | ||||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-low': # node non-symb | elif ds_name == 'Letter-low': # node non-symb | ||||
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' | ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Letter-med': # node non-symb | elif ds_name == 'Letter-med': # node non-symb | ||||
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' | ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'MAO': | elif ds_name == 'MAO': | ||||
ds_file = current_path + '../../datasets/MAO/dataset.ds' | ds_file = current_path + '../../datasets/MAO/dataset.ds' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Monoterpenoides': | elif ds_name == 'Monoterpenoides': | ||||
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' | ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'MUTAG': | elif ds_name == 'MUTAG': | ||||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'NCI1': | elif ds_name == 'NCI1': | ||||
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' | ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'NCI109': | elif ds_name == 'NCI109': | ||||
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' | ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'PAH': | elif ds_name == 'PAH': | ||||
ds_file = current_path + '../../datasets/PAH/dataset.ds' | ds_file = current_path + '../../datasets/PAH/dataset.ds' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'SYNTHETIC': | elif ds_name == 'SYNTHETIC': | ||||
pass | pass | ||||
elif ds_name == 'SYNTHETICnew': | elif ds_name == 'SYNTHETICnew': | ||||
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | ||||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||||
elif ds_name == 'Synthie': | elif ds_name == 'Synthie': | ||||
pass | pass | ||||
else: | else: | ||||
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | ||||
self.__node_labels = label_names['node_labels'] | |||||
self.__node_attrs = label_names['node_attrs'] | |||||
self.__edge_labels = label_names['edge_labels'] | |||||
self.__edge_attrs = label_names['edge_attrs'] | |||||
self._node_labels = label_names['node_labels'] | |||||
self._node_attrs = label_names['node_attrs'] | |||||
self._edge_labels = label_names['edge_labels'] | |||||
self._edge_attrs = label_names['edge_attrs'] | |||||
self.clean_labels() | self.clean_labels() | ||||
def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | ||||
self.__node_labels = node_labels | |||||
self.__node_attrs = node_attrs | |||||
self.__edge_labels = edge_labels | |||||
self.__edge_attrs = edge_attrs | |||||
self._node_labels = node_labels | |||||
self._node_attrs = node_attrs | |||||
self._edge_labels = edge_labels | |||||
self._edge_attrs = edge_attrs | |||||
def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): | def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): | ||||
# @todo: remove labels which have only one possible values. | # @todo: remove labels which have only one possible values. | ||||
if node_labels is None: | if node_labels is None: | ||||
self.__node_labels = self.__graphs[0].graph['node_labels'] | |||||
self._node_labels = self._graphs[0].graph['node_labels'] | |||||
# # graphs are considered node unlabeled if all nodes have the same label. | # # graphs are considered node unlabeled if all nodes have the same label. | ||||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | ||||
if node_attrs is None: | if node_attrs is None: | ||||
self.__node_attrs = self.__graphs[0].graph['node_attrs'] | |||||
self._node_attrs = self._graphs[0].graph['node_attrs'] | |||||
# for G in Gn: | # for G in Gn: | ||||
# for n in G.nodes(data=True): | # for n in G.nodes(data=True): | ||||
# if 'attributes' in n[1]: | # if 'attributes' in n[1]: | ||||
# return len(n[1]['attributes']) | # return len(n[1]['attributes']) | ||||
# return 0 | # return 0 | ||||
if edge_labels is None: | if edge_labels is None: | ||||
self.__edge_labels = self.__graphs[0].graph['edge_labels'] | |||||
self._edge_labels = self._graphs[0].graph['edge_labels'] | |||||
# # graphs are considered edge unlabeled if all edges have the same label. | # # graphs are considered edge unlabeled if all edges have the same label. | ||||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | ||||
if edge_attrs is None: | if edge_attrs is None: | ||||
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | |||||
self._edge_attrs = self._graphs[0].graph['edge_attrs'] | |||||
# for G in Gn: | # for G in Gn: | ||||
# if nx.number_of_edges(G) > 0: | # if nx.number_of_edges(G) > 0: | ||||
# for e in G.edges(data=True): | # for e in G.edges(data=True): | ||||
@@ -291,145 +291,145 @@ class Dataset(object): | |||||
# dataset size | # dataset size | ||||
if 'dataset_size' in keys: | if 'dataset_size' in keys: | ||||
if self.__dataset_size is None: | |||||
self.__dataset_size = self.__get_dataset_size() | |||||
infos['dataset_size'] = self.__dataset_size | |||||
if self._dataset_size is None: | |||||
self._dataset_size = self._get_dataset_size() | |||||
infos['dataset_size'] = self._dataset_size | |||||
# graph node number | # graph node number | ||||
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): | if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): | ||||
all_node_nums = self.__get_all_node_nums() | |||||
all_node_nums = self._get_all_node_nums() | |||||
if 'total_node_num' in keys: | if 'total_node_num' in keys: | ||||
if self.__total_node_num is None: | |||||
self.__total_node_num = self.__get_total_node_num(all_node_nums) | |||||
infos['total_node_num'] = self.__total_node_num | |||||
if self._total_node_num is None: | |||||
self._total_node_num = self._get_total_node_num(all_node_nums) | |||||
infos['total_node_num'] = self._total_node_num | |||||
if 'ave_node_num' in keys: | if 'ave_node_num' in keys: | ||||
if self.__ave_node_num is None: | |||||
self.__ave_node_num = self.__get_ave_node_num(all_node_nums) | |||||
infos['ave_node_num'] = self.__ave_node_num | |||||
if self._ave_node_num is None: | |||||
self._ave_node_num = self._get_ave_node_num(all_node_nums) | |||||
infos['ave_node_num'] = self._ave_node_num | |||||
if 'min_node_num' in keys: | if 'min_node_num' in keys: | ||||
if self.__min_node_num is None: | |||||
self.__min_node_num = self.__get_min_node_num(all_node_nums) | |||||
infos['min_node_num'] = self.__min_node_num | |||||
if self._min_node_num is None: | |||||
self._min_node_num = self._get_min_node_num(all_node_nums) | |||||
infos['min_node_num'] = self._min_node_num | |||||
if 'max_node_num' in keys: | if 'max_node_num' in keys: | ||||
if self.__max_node_num is None: | |||||
self.__max_node_num = self.__get_max_node_num(all_node_nums) | |||||
infos['max_node_num'] = self.__max_node_num | |||||
if self._max_node_num is None: | |||||
self._max_node_num = self._get_max_node_num(all_node_nums) | |||||
infos['max_node_num'] = self._max_node_num | |||||
# graph edge number | # graph edge number | ||||
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): | if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): | ||||
all_edge_nums = self.__get_all_edge_nums() | |||||
all_edge_nums = self._get_all_edge_nums() | |||||
if 'total_edge_num' in keys: | if 'total_edge_num' in keys: | ||||
if self.__total_edge_num is None: | |||||
self.__total_edge_num = self.__get_total_edge_num(all_edge_nums) | |||||
infos['total_edge_num'] = self.__total_edge_num | |||||
if self._total_edge_num is None: | |||||
self._total_edge_num = self._get_total_edge_num(all_edge_nums) | |||||
infos['total_edge_num'] = self._total_edge_num | |||||
if 'ave_edge_num' in keys: | if 'ave_edge_num' in keys: | ||||
if self.__ave_edge_num is None: | |||||
self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums) | |||||
infos['ave_edge_num'] = self.__ave_edge_num | |||||
if self._ave_edge_num is None: | |||||
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums) | |||||
infos['ave_edge_num'] = self._ave_edge_num | |||||
if 'max_edge_num' in keys: | if 'max_edge_num' in keys: | ||||
if self.__max_edge_num is None: | |||||
self.__max_edge_num = self.__get_max_edge_num(all_edge_nums) | |||||
infos['max_edge_num'] = self.__max_edge_num | |||||
if self._max_edge_num is None: | |||||
self._max_edge_num = self._get_max_edge_num(all_edge_nums) | |||||
infos['max_edge_num'] = self._max_edge_num | |||||
if 'min_edge_num' in keys: | if 'min_edge_num' in keys: | ||||
if self.__min_edge_num is None: | |||||
self.__min_edge_num = self.__get_min_edge_num(all_edge_nums) | |||||
infos['min_edge_num'] = self.__min_edge_num | |||||
if self._min_edge_num is None: | |||||
self._min_edge_num = self._get_min_edge_num(all_edge_nums) | |||||
infos['min_edge_num'] = self._min_edge_num | |||||
# label number | # label number | ||||
if 'node_label_dim' in keys: | if 'node_label_dim' in keys: | ||||
if self.__node_label_dim is None: | |||||
self.__node_label_dim = self.__get_node_label_dim() | |||||
infos['node_label_dim'] = self.__node_label_dim | |||||
if self._node_label_dim is None: | |||||
self._node_label_dim = self._get_node_label_dim() | |||||
infos['node_label_dim'] = self._node_label_dim | |||||
if 'node_label_nums' in keys: | if 'node_label_nums' in keys: | ||||
if self.__node_label_nums is None: | |||||
self.__node_label_nums = {} | |||||
for node_label in self.__node_labels: | |||||
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||||
infos['node_label_nums'] = self.__node_label_nums | |||||
if self._node_label_nums is None: | |||||
self._node_label_nums = {} | |||||
for node_label in self._node_labels: | |||||
self._node_label_nums[node_label] = self._get_node_label_num(node_label) | |||||
infos['node_label_nums'] = self._node_label_nums | |||||
if 'edge_label_dim' in keys: | if 'edge_label_dim' in keys: | ||||
if self.__edge_label_dim is None: | |||||
self.__edge_label_dim = self.__get_edge_label_dim() | |||||
infos['edge_label_dim'] = self.__edge_label_dim | |||||
if self._edge_label_dim is None: | |||||
self._edge_label_dim = self._get_edge_label_dim() | |||||
infos['edge_label_dim'] = self._edge_label_dim | |||||
if 'edge_label_nums' in keys: | if 'edge_label_nums' in keys: | ||||
if self.__edge_label_nums is None: | |||||
self.__edge_label_nums = {} | |||||
for edge_label in self.__edge_labels: | |||||
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||||
infos['edge_label_nums'] = self.__edge_label_nums | |||||
if self._edge_label_nums is None: | |||||
self._edge_label_nums = {} | |||||
for edge_label in self._edge_labels: | |||||
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label) | |||||
infos['edge_label_nums'] = self._edge_label_nums | |||||
if 'directed' in keys or 'substructures' in keys: | if 'directed' in keys or 'substructures' in keys: | ||||
if self.__directed is None: | |||||
self.__directed = self.__is_directed() | |||||
infos['directed'] = self.__directed | |||||
if self._directed is None: | |||||
self._directed = self._is_directed() | |||||
infos['directed'] = self._directed | |||||
# node degree | # node degree | ||||
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): | if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): | ||||
all_node_degrees = self.__get_all_node_degrees() | |||||
all_node_degrees = self._get_all_node_degrees() | |||||
if 'ave_node_degree' in keys: | if 'ave_node_degree' in keys: | ||||
if self.__ave_node_degree is None: | |||||
self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees) | |||||
infos['ave_node_degree'] = self.__ave_node_degree | |||||
if self._ave_node_degree is None: | |||||
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees) | |||||
infos['ave_node_degree'] = self._ave_node_degree | |||||
if 'max_node_degree' in keys: | if 'max_node_degree' in keys: | ||||
if self.__max_node_degree is None: | |||||
self.__max_node_degree = self.__get_max_node_degree(all_node_degrees) | |||||
infos['max_node_degree'] = self.__max_node_degree | |||||
if self._max_node_degree is None: | |||||
self._max_node_degree = self._get_max_node_degree(all_node_degrees) | |||||
infos['max_node_degree'] = self._max_node_degree | |||||
if 'min_node_degree' in keys: | if 'min_node_degree' in keys: | ||||
if self.__min_node_degree is None: | |||||
self.__min_node_degree = self.__get_min_node_degree(all_node_degrees) | |||||
infos['min_node_degree'] = self.__min_node_degree | |||||
if self._min_node_degree is None: | |||||
self._min_node_degree = self._get_min_node_degree(all_node_degrees) | |||||
infos['min_node_degree'] = self._min_node_degree | |||||
# fill factor | # fill factor | ||||
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): | if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): | ||||
all_fill_factors = self.__get_all_fill_factors() | |||||
all_fill_factors = self._get_all_fill_factors() | |||||
if 'ave_fill_factor' in keys: | if 'ave_fill_factor' in keys: | ||||
if self.__ave_fill_factor is None: | |||||
self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors) | |||||
infos['ave_fill_factor'] = self.__ave_fill_factor | |||||
if self._ave_fill_factor is None: | |||||
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors) | |||||
infos['ave_fill_factor'] = self._ave_fill_factor | |||||
if 'max_fill_factor' in keys: | if 'max_fill_factor' in keys: | ||||
if self.__max_fill_factor is None: | |||||
self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors) | |||||
infos['max_fill_factor'] = self.__max_fill_factor | |||||
if self._max_fill_factor is None: | |||||
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors) | |||||
infos['max_fill_factor'] = self._max_fill_factor | |||||
if 'min_fill_factor' in keys: | if 'min_fill_factor' in keys: | ||||
if self.__min_fill_factor is None: | |||||
self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors) | |||||
infos['min_fill_factor'] = self.__min_fill_factor | |||||
if self._min_fill_factor is None: | |||||
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors) | |||||
infos['min_fill_factor'] = self._min_fill_factor | |||||
if 'substructures' in keys: | if 'substructures' in keys: | ||||
if self.__substructures is None: | |||||
self.__substructures = self.__get_substructures() | |||||
infos['substructures'] = self.__substructures | |||||
if self._substructures is None: | |||||
self._substructures = self._get_substructures() | |||||
infos['substructures'] = self._substructures | |||||
if 'class_number' in keys: | if 'class_number' in keys: | ||||
if self.__class_number is None: | |||||
self.__class_number = self.__get_class_number() | |||||
infos['class_number'] = self.__class_number | |||||
if self._class_number is None: | |||||
self._class_number = self._get_class_number() | |||||
infos['class_number'] = self._class_number | |||||
if 'node_attr_dim' in keys: | if 'node_attr_dim' in keys: | ||||
if self.__node_attr_dim is None: | |||||
self.__node_attr_dim = self.__get_node_attr_dim() | |||||
infos['node_attr_dim'] = self.__node_attr_dim | |||||
if self._node_attr_dim is None: | |||||
self._node_attr_dim = self._get_node_attr_dim() | |||||
infos['node_attr_dim'] = self._node_attr_dim | |||||
if 'edge_attr_dim' in keys: | if 'edge_attr_dim' in keys: | ||||
if self.__edge_attr_dim is None: | |||||
self.__edge_attr_dim = self.__get_edge_attr_dim() | |||||
infos['edge_attr_dim'] = self.__edge_attr_dim | |||||
if self._edge_attr_dim is None: | |||||
self._edge_attr_dim = self._get_edge_attr_dim() | |||||
infos['edge_attr_dim'] = self._edge_attr_dim | |||||
# entropy of degree distribution. | # entropy of degree distribution. | ||||
@@ -438,14 +438,14 @@ class Dataset(object): | |||||
base = params['all_degree_entropy']['base'] | base = params['all_degree_entropy']['base'] | ||||
else: | else: | ||||
base = None | base = None | ||||
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) | |||||
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base) | |||||
if 'ave_degree_entropy' in keys: | if 'ave_degree_entropy' in keys: | ||||
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): | if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): | ||||
base = params['ave_degree_entropy']['base'] | base = params['ave_degree_entropy']['base'] | ||||
else: | else: | ||||
base = None | base = None | ||||
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) | |||||
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base)) | |||||
return infos | return infos | ||||
@@ -457,12 +457,12 @@ class Dataset(object): | |||||
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | ||||
node_labels = [item for item in node_labels if item in self.__node_labels] | |||||
edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||||
node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||||
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||||
node_labels = [item for item in node_labels if item in self._node_labels] | |||||
edge_labels = [item for item in edge_labels if item in self._edge_labels] | |||||
node_attrs = [item for item in node_attrs if item in self._node_attrs] | |||||
edge_attrs = [item for item in edge_attrs if item in self._edge_attrs] | |||||
for g in self.__graphs: | |||||
for g in self._graphs: | |||||
for nd in g.nodes(): | for nd in g.nodes(): | ||||
for nl in node_labels: | for nl in node_labels: | ||||
del g.nodes[nd][nl] | del g.nodes[nd][nl] | ||||
@@ -474,99 +474,99 @@ class Dataset(object): | |||||
for ea in edge_attrs: | for ea in edge_attrs: | ||||
del g.edges[ed][ea] | del g.edges[ed][ea] | ||||
if len(node_labels) > 0: | if len(node_labels) > 0: | ||||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||||
self._node_labels = [nl for nl in self._node_labels if nl not in node_labels] | |||||
if len(edge_labels) > 0: | if len(edge_labels) > 0: | ||||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||||
self._edge_labels = [el for el in self._edge_labels if el not in edge_labels] | |||||
if len(node_attrs) > 0: | if len(node_attrs) > 0: | ||||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||||
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs] | |||||
if len(edge_attrs) > 0: | if len(edge_attrs) > 0: | ||||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||||
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs] | |||||
def clean_labels(self): | def clean_labels(self): | ||||
labels = [] | labels = [] | ||||
for name in self.__node_labels: | |||||
for name in self._node_labels: | |||||
label = set() | label = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
label = label | set(nx.get_node_attributes(G, name).values()) | label = label | set(nx.get_node_attributes(G, name).values()) | ||||
if len(label) > 1: | if len(label) > 1: | ||||
labels.append(name) | labels.append(name) | ||||
break | break | ||||
if len(label) < 2: | if len(label) < 2: | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
for nd in G.nodes(): | for nd in G.nodes(): | ||||
del G.nodes[nd][name] | del G.nodes[nd][name] | ||||
self.__node_labels = labels | |||||
self._node_labels = labels | |||||
labels = [] | labels = [] | ||||
for name in self.__edge_labels: | |||||
for name in self._edge_labels: | |||||
label = set() | label = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
label = label | set(nx.get_edge_attributes(G, name).values()) | label = label | set(nx.get_edge_attributes(G, name).values()) | ||||
if len(label) > 1: | if len(label) > 1: | ||||
labels.append(name) | labels.append(name) | ||||
break | break | ||||
if len(label) < 2: | if len(label) < 2: | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
for ed in G.edges(): | for ed in G.edges(): | ||||
del G.edges[ed][name] | del G.edges[ed][name] | ||||
self.__edge_labels = labels | |||||
self._edge_labels = labels | |||||
labels = [] | labels = [] | ||||
for name in self.__node_attrs: | |||||
for name in self._node_attrs: | |||||
label = set() | label = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
label = label | set(nx.get_node_attributes(G, name).values()) | label = label | set(nx.get_node_attributes(G, name).values()) | ||||
if len(label) > 1: | if len(label) > 1: | ||||
labels.append(name) | labels.append(name) | ||||
break | break | ||||
if len(label) < 2: | if len(label) < 2: | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
for nd in G.nodes(): | for nd in G.nodes(): | ||||
del G.nodes[nd][name] | del G.nodes[nd][name] | ||||
self.__node_attrs = labels | |||||
self._node_attrs = labels | |||||
labels = [] | labels = [] | ||||
for name in self.__edge_attrs: | |||||
for name in self._edge_attrs: | |||||
label = set() | label = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
label = label | set(nx.get_edge_attributes(G, name).values()) | label = label | set(nx.get_edge_attributes(G, name).values()) | ||||
if len(label) > 1: | if len(label) > 1: | ||||
labels.append(name) | labels.append(name) | ||||
break | break | ||||
if len(label) < 2: | if len(label) < 2: | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
for ed in G.edges(): | for ed in G.edges(): | ||||
del G.edges[ed][name] | del G.edges[ed][name] | ||||
self.__edge_attrs = labels | |||||
self._edge_attrs = labels | |||||
def cut_graphs(self, range_): | def cut_graphs(self, range_): | ||||
self.__graphs = [self.__graphs[i] for i in range_] | |||||
if self.__targets is not None: | |||||
self.__targets = [self.__targets[i] for i in range_] | |||||
self._graphs = [self._graphs[i] for i in range_] | |||||
if self._targets is not None: | |||||
self._targets = [self._targets[i] for i in range_] | |||||
self.clean_labels() | self.clean_labels() | ||||
def trim_dataset(self, edge_required=False): | def trim_dataset(self, edge_required=False): | ||||
if edge_required: | if edge_required: | ||||
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] | |||||
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] | |||||
else: | else: | ||||
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0] | |||||
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0] | |||||
idx = [p[0] for p in trimed_pairs] | idx = [p[0] for p in trimed_pairs] | ||||
self.__graphs = [p[1] for p in trimed_pairs] | |||||
self.__targets = [self.__targets[i] for i in idx] | |||||
self._graphs = [p[1] for p in trimed_pairs] | |||||
self._targets = [self._targets[i] for i in idx] | |||||
self.clean_labels() | self.clean_labels() | ||||
def copy(self): | def copy(self): | ||||
dataset = Dataset() | dataset = Dataset() | ||||
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None | |||||
target = self.__targets.copy() if self.__targets is not None else None | |||||
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None | |||||
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None | |||||
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None | |||||
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None | |||||
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None | |||||
target = self._targets.copy() if self._targets is not None else None | |||||
node_labels = self._node_labels.copy() if self._node_labels is not None else None | |||||
node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None | |||||
edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None | |||||
edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None | |||||
dataset.load_graphs(graphs, target) | dataset.load_graphs(graphs, target) | ||||
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | ||||
# @todo: clean_labels and add other class members? | # @todo: clean_labels and add other class members? | ||||
@@ -575,7 +575,7 @@ class Dataset(object): | |||||
def get_all_node_labels(self): | def get_all_node_labels(self): | ||||
node_labels = [] | node_labels = [] | ||||
for g in self.__graphs: | |||||
for g in self._graphs: | |||||
for n in g.nodes(): | for n in g.nodes(): | ||||
nl = tuple(g.nodes[n].items()) | nl = tuple(g.nodes[n].items()) | ||||
if nl not in node_labels: | if nl not in node_labels: | ||||
@@ -585,7 +585,7 @@ class Dataset(object): | |||||
def get_all_edge_labels(self): | def get_all_edge_labels(self): | ||||
edge_labels = [] | edge_labels = [] | ||||
for g in self.__graphs: | |||||
for g in self._graphs: | |||||
for e in g.edges(): | for e in g.edges(): | ||||
el = tuple(g.edges[e].items()) | el = tuple(g.edges[e].items()) | ||||
if el not in edge_labels: | if el not in edge_labels: | ||||
@@ -593,93 +593,93 @@ class Dataset(object): | |||||
return edge_labels | return edge_labels | ||||
def __get_dataset_size(self): | |||||
return len(self.__graphs) | |||||
def _get_dataset_size(self): | |||||
return len(self._graphs) | |||||
def __get_all_node_nums(self): | |||||
return [nx.number_of_nodes(G) for G in self.__graphs] | |||||
def _get_all_node_nums(self): | |||||
return [nx.number_of_nodes(G) for G in self._graphs] | |||||
def __get_total_node_nums(self, all_node_nums): | |||||
def _get_total_node_nums(self, all_node_nums): | |||||
return np.sum(all_node_nums) | return np.sum(all_node_nums) | ||||
def __get_ave_node_num(self, all_node_nums): | |||||
def _get_ave_node_num(self, all_node_nums): | |||||
return np.mean(all_node_nums) | return np.mean(all_node_nums) | ||||
def __get_min_node_num(self, all_node_nums): | |||||
def _get_min_node_num(self, all_node_nums): | |||||
return np.amin(all_node_nums) | return np.amin(all_node_nums) | ||||
def __get_max_node_num(self, all_node_nums): | |||||
def _get_max_node_num(self, all_node_nums): | |||||
return np.amax(all_node_nums) | return np.amax(all_node_nums) | ||||
def __get_all_edge_nums(self): | |||||
return [nx.number_of_edges(G) for G in self.__graphs] | |||||
def _get_all_edge_nums(self): | |||||
return [nx.number_of_edges(G) for G in self._graphs] | |||||
def __get_total_edge_nums(self, all_edge_nums): | |||||
def _get_total_edge_nums(self, all_edge_nums): | |||||
return np.sum(all_edge_nums) | return np.sum(all_edge_nums) | ||||
def __get_ave_edge_num(self, all_edge_nums): | |||||
def _get_ave_edge_num(self, all_edge_nums): | |||||
return np.mean(all_edge_nums) | return np.mean(all_edge_nums) | ||||
def __get_min_edge_num(self, all_edge_nums): | |||||
def _get_min_edge_num(self, all_edge_nums): | |||||
return np.amin(all_edge_nums) | return np.amin(all_edge_nums) | ||||
def __get_max_edge_num(self, all_edge_nums): | |||||
def _get_max_edge_num(self, all_edge_nums): | |||||
return np.amax(all_edge_nums) | return np.amax(all_edge_nums) | ||||
def __get_node_label_dim(self): | |||||
return len(self.__node_labels) | |||||
def _get_node_label_dim(self): | |||||
return len(self._node_labels) | |||||
def __get_node_label_num(self, node_label): | |||||
def _get_node_label_num(self, node_label): | |||||
nl = set() | nl = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
nl = nl | set(nx.get_node_attributes(G, node_label).values()) | nl = nl | set(nx.get_node_attributes(G, node_label).values()) | ||||
return len(nl) | return len(nl) | ||||
def __get_edge_label_dim(self): | |||||
return len(self.__edge_labels) | |||||
def _get_edge_label_dim(self): | |||||
return len(self._edge_labels) | |||||
def __get_edge_label_num(self, edge_label): | |||||
def _get_edge_label_num(self, edge_label): | |||||
el = set() | el = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
el = el | set(nx.get_edge_attributes(G, edge_label).values()) | el = el | set(nx.get_edge_attributes(G, edge_label).values()) | ||||
return len(el) | return len(el) | ||||
def __is_directed(self): | |||||
return nx.is_directed(self.__graphs[0]) | |||||
def _is_directed(self): | |||||
return nx.is_directed(self._graphs[0]) | |||||
def __get_all_node_degrees(self): | |||||
return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs] | |||||
def _get_all_node_degrees(self): | |||||
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs] | |||||
def __get_ave_node_degree(self, all_node_degrees): | |||||
def _get_ave_node_degree(self, all_node_degrees): | |||||
return np.mean(all_node_degrees) | return np.mean(all_node_degrees) | ||||
def __get_max_node_degree(self, all_node_degrees): | |||||
def _get_max_node_degree(self, all_node_degrees): | |||||
return np.amax(all_node_degrees) | return np.amax(all_node_degrees) | ||||
def __get_min_node_degree(self, all_node_degrees): | |||||
def _get_min_node_degree(self, all_node_degrees): | |||||
return np.amin(all_node_degrees) | return np.amin(all_node_degrees) | ||||
def __get_all_fill_factors(self): | |||||
def _get_all_fill_factors(self): | |||||
"""Get fill factor, the number of non-zero entries in the adjacency matrix. | """Get fill factor, the number of non-zero entries in the adjacency matrix. | ||||
Returns | Returns | ||||
@@ -687,24 +687,24 @@ class Dataset(object): | |||||
list[float] | list[float] | ||||
List of fill factors for all graphs. | List of fill factors for all graphs. | ||||
""" | """ | ||||
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs] | |||||
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs] | |||||
def __get_ave_fill_factor(self, all_fill_factors): | |||||
def _get_ave_fill_factor(self, all_fill_factors): | |||||
return np.mean(all_fill_factors) | return np.mean(all_fill_factors) | ||||
def __get_max_fill_factor(self, all_fill_factors): | |||||
def _get_max_fill_factor(self, all_fill_factors): | |||||
return np.amax(all_fill_factors) | return np.amax(all_fill_factors) | ||||
def __get_min_fill_factor(self, all_fill_factors): | |||||
def _get_min_fill_factor(self, all_fill_factors): | |||||
return np.amin(all_fill_factors) | return np.amin(all_fill_factors) | ||||
def __get_substructures(self): | |||||
def _get_substructures(self): | |||||
subs = set() | subs = set() | ||||
for G in self.__graphs: | |||||
for G in self._graphs: | |||||
degrees = list(dict(G.degree()).values()) | degrees = list(dict(G.degree()).values()) | ||||
if any(i == 2 for i in degrees): | if any(i == 2 for i in degrees): | ||||
subs.add('linear') | subs.add('linear') | ||||
@@ -713,8 +713,8 @@ class Dataset(object): | |||||
if 'linear' in subs and 'non linear' in subs: | if 'linear' in subs and 'non linear' in subs: | ||||
break | break | ||||
if self.__directed: | |||||
for G in self.__graphs: | |||||
if self._directed: | |||||
for G in self._graphs: | |||||
if len(list(nx.find_cycle(G))) > 0: | if len(list(nx.find_cycle(G))) > 0: | ||||
subs.add('cyclic') | subs.add('cyclic') | ||||
break | break | ||||
@@ -737,19 +737,19 @@ class Dataset(object): | |||||
return subs | return subs | ||||
def __get_class_num(self): | |||||
return len(set(self.__targets)) | |||||
def _get_class_num(self): | |||||
return len(set(self._targets)) | |||||
def __get_node_attr_dim(self): | |||||
return len(self.__node_attrs) | |||||
def _get_node_attr_dim(self): | |||||
return len(self._node_attrs) | |||||
def __get_edge_attr_dim(self): | |||||
return len(self.__edge_attrs) | |||||
def _get_edge_attr_dim(self): | |||||
return len(self._edge_attrs) | |||||
def __compute_all_degree_entropy(self, base=None): | |||||
def _compute_all_degree_entropy(self, base=None): | |||||
"""Compute the entropy of degree distribution of each graph. | """Compute the entropy of degree distribution of each graph. | ||||
Parameters | Parameters | ||||
@@ -765,7 +765,7 @@ class Dataset(object): | |||||
from gklearn.utils.stats import entropy | from gklearn.utils.stats import entropy | ||||
degree_entropy = [] | degree_entropy = [] | ||||
for g in self.__graphs: | |||||
for g in self._graphs: | |||||
degrees = list(dict(g.degree()).values()) | degrees = list(dict(g.degree()).values()) | ||||
en = entropy(degrees, base=base) | en = entropy(degrees, base=base) | ||||
degree_entropy.append(en) | degree_entropy.append(en) | ||||
@@ -774,32 +774,32 @@ class Dataset(object): | |||||
@property | @property | ||||
def graphs(self): | def graphs(self): | ||||
return self.__graphs | |||||
return self._graphs | |||||
@property | @property | ||||
def targets(self): | def targets(self): | ||||
return self.__targets | |||||
return self._targets | |||||
@property | @property | ||||
def node_labels(self): | def node_labels(self): | ||||
return self.__node_labels | |||||
return self._node_labels | |||||
@property | @property | ||||
def edge_labels(self): | def edge_labels(self): | ||||
return self.__edge_labels | |||||
return self._edge_labels | |||||
@property | @property | ||||
def node_attrs(self): | def node_attrs(self): | ||||
return self.__node_attrs | |||||
return self._node_attrs | |||||
@property | @property | ||||
def edge_attrs(self): | def edge_attrs(self): | ||||
return self.__edge_attrs | |||||
return self._edge_attrs | |||||
def split_dataset_by_target(dataset): | def split_dataset_by_target(dataset): | ||||
@@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets): | |||||
# remove the '#'s in file names | # remove the '#'s in file names | ||||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | ||||
data.append(g) | data.append(g) | ||||
__append_label_names(label_names, l_names) | |||||
_append_label_names(label_names, l_names) | |||||
y.append(float(tmp[1])) | y.append(float(tmp[1])) | ||||
else: # targets in a seperate file | else: # targets in a seperate file | ||||
for i in range(0, len(content)): | for i in range(0, len(content)): | ||||
@@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets): | |||||
# remove the '#'s in file names | # remove the '#'s in file names | ||||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | ||||
data.append(g) | data.append(g) | ||||
__append_label_names(label_names, l_names) | |||||
_append_label_names(label_names, l_names) | |||||
with open(filename_targets) as fnt: | with open(filename_targets) as fnt: | ||||
content_y = fnt.read().splitlines() | content_y = fnt.read().splitlines() | ||||
@@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None): | |||||
mol_class = graph.attrib['class'] | mol_class = graph.attrib['class'] | ||||
g, l_names = load_gxl(dir_dataset + '/' + mol_filename) | g, l_names = load_gxl(dir_dataset + '/' + mol_filename) | ||||
data.append(g) | data.append(g) | ||||
__append_label_names(label_names, l_names) | |||||
_append_label_names(label_names, l_names) | |||||
y.append(mol_class) | y.append(mol_class) | ||||
return data, y, label_names | return data, y, label_names | ||||
def __append_label_names(label_names, new_names): | |||||
def _append_label_names(label_names, new_names): | |||||
for key, val in label_names.items(): | for key, val in label_names.items(): | ||||
label_names[key] += [name for name in new_names[key] if name not in val] | label_names[key] += [name for name in new_names[key] if name not in val] | ||||
@@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t | |||||
y_all = dataset.targets | y_all = dataset.targets | ||||
# compute kernel distances. | # compute kernel distances. | ||||
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset) | |||||
dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset) | |||||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | ||||
@@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t | |||||
return results | return results | ||||
def __compute_kernel_distances(dataset, kernel_options, trainset=None): | |||||
def _compute_kernel_distances(dataset, kernel_options, trainset=None): | |||||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | ||||
node_labels=dataset.node_labels, | node_labels=dataset.node_labels, | ||||
edge_labels=dataset.edge_labels, | edge_labels=dataset.edge_labels, | ||||
@@ -23,8 +23,8 @@ class Timer(object): | |||||
time_limit_in_sec : string | time_limit_in_sec : string | ||||
The time limit in seconds. | The time limit in seconds. | ||||
""" | """ | ||||
self.__time_limit_in_sec = time_limit_in_sec | |||||
self.__start_time = time.time() | |||||
self._time_limit_in_sec = time_limit_in_sec | |||||
self._start_time = time.time() | |||||
def expired(self): | def expired(self): | ||||
@@ -34,7 +34,7 @@ class Timer(object): | |||||
------ | ------ | ||||
Boolean true if the time limit has expired and false otherwise. | Boolean true if the time limit has expired and false otherwise. | ||||
""" | """ | ||||
if self.__time_limit_in_sec > 0: | |||||
runtime = time.time() - self.__start_time | |||||
return runtime >= self.__time_limit_in_sec | |||||
if self._time_limit_in_sec > 0: | |||||
runtime = time.time() - self._start_time | |||||
return runtime >= self._time_limit_in_sec | |||||
return False | return False |