@@ -26,18 +26,18 @@ class CommonWalk(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__weight = kwargs.get('weight', 1) | |||
self.__compute_method = kwargs.get('compute_method', None) | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self.__compute_method = self.__compute_method.lower() | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._weight = kwargs.get('weight', 1) | |||
self._compute_method = kwargs.get('compute_method', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
self._compute_method = self._compute_method.lower() | |||
def _compute_gm_series(self): | |||
self.__check_graphs(self._graphs) | |||
self.__add_dummy_labels(self._graphs) | |||
if not self.__ds_infos['directed']: # convert | |||
self._check_graphs(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
if not self._ds_infos['directed']: # convert | |||
self._graphs = [G.to_directed() for G in self._graphs] | |||
# compute Gram matrix. | |||
@@ -51,15 +51,15 @@ class CommonWalk(GraphKernel): | |||
iterator = itr | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
if self._compute_method == 'exp': | |||
for i, j in iterator: | |||
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) | |||
kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
elif self._compute_method == 'geo': | |||
for i, j in iterator: | |||
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) | |||
kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
@@ -67,9 +67,9 @@ class CommonWalk(GraphKernel): | |||
def _compute_gm_imap_unordered(self): | |||
self.__check_graphs(self._graphs) | |||
self.__add_dummy_labels(self._graphs) | |||
if not self.__ds_infos['directed']: # convert | |||
self._check_graphs(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
if not self._ds_infos['directed']: # convert | |||
self._graphs = [G.to_directed() for G in self._graphs] | |||
# compute Gram matrix. | |||
@@ -80,10 +80,10 @@ class CommonWalk(GraphKernel): | |||
# G_gn = gn_toshare | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
if self._compute_method == 'exp': | |||
do_fun = self._wrapper_kernel_do_exp | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
elif self._compute_method == 'geo': | |||
do_fun = self._wrapper_kernel_do_geo | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, | |||
@@ -93,9 +93,9 @@ class CommonWalk(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__check_graphs(g_list + [g1]) | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if not self.__ds_infos['directed']: # convert | |||
self._check_graphs(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
if not self._ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g_list = [G.to_directed() for G in g_list] | |||
@@ -107,23 +107,23 @@ class CommonWalk(GraphKernel): | |||
iterator = range(len(g_list)) | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
if self._compute_method == 'exp': | |||
for i in iterator: | |||
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) | |||
kernel = self._kernel_do_exp(g1, g_list[i], self._weight) | |||
kernel_list[i] = kernel | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
elif self._compute_method == 'geo': | |||
for i in iterator: | |||
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) | |||
kernel = self._kernel_do_geo(g1, g_list[i], self._weight) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__check_graphs(g_list + [g1]) | |||
self.__add_dummy_labels(g_list + [g1]) | |||
if not self.__ds_infos['directed']: # convert | |||
self._check_graphs(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
if not self._ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g_list = [G.to_directed() for G in g_list] | |||
@@ -136,10 +136,10 @@ class CommonWalk(GraphKernel): | |||
# G_g_list = g_list_toshare | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
if self._compute_method == 'exp': | |||
do_fun = self._wrapper_kernel_list_do_exp | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
elif self._compute_method == 'geo': | |||
do_fun = self._wrapper_kernel_list_do_geo | |||
def func_assign(result, var_to_assign): | |||
@@ -154,31 +154,31 @@ class CommonWalk(GraphKernel): | |||
def _wrapper_kernel_list_do_exp(self, itr): | |||
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) | |||
return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight) | |||
def _wrapper_kernel_list_do_geo(self, itr): | |||
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) | |||
return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__check_graphs([g1] + [g2]) | |||
self.__add_dummy_labels([g1] + [g2]) | |||
if not self.__ds_infos['directed']: # convert | |||
self._check_graphs([g1] + [g2]) | |||
self._add_dummy_labels([g1] + [g2]) | |||
if not self._ds_infos['directed']: # convert | |||
g1 = g1.to_directed() | |||
g2 = g2.to_directed() | |||
# direct product graph method - exponential | |||
if self.__compute_method == 'exp': | |||
kernel = self.__kernel_do_exp(g1, g2, self.__weight) | |||
if self._compute_method == 'exp': | |||
kernel = self._kernel_do_exp(g1, g2, self._weight) | |||
# direct product graph method - geometric | |||
elif self.__compute_method == 'geo': | |||
kernel = self.__kernel_do_geo(g1, g2, self.__weight) | |||
elif self._compute_method == 'geo': | |||
kernel = self._kernel_do_geo(g1, g2, self._weight) | |||
return kernel | |||
def __kernel_do_exp(self, g1, g2, beta): | |||
def _kernel_do_exp(self, g1, g2, beta): | |||
"""Compute common walk graph kernel between 2 graphs using exponential | |||
series. | |||
@@ -195,7 +195,7 @@ class CommonWalk(GraphKernel): | |||
The common walk Kernel between 2 graphs. | |||
""" | |||
# get tensor product / direct product | |||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) | |||
# return 0 if the direct product graph have no more than 1 node. | |||
if nx.number_of_nodes(gp) < 2: | |||
return 0 | |||
@@ -227,10 +227,10 @@ class CommonWalk(GraphKernel): | |||
def _wrapper_kernel_do_exp(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) | |||
return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight) | |||
def __kernel_do_geo(self, g1, g2, gamma): | |||
def _kernel_do_geo(self, g1, g2, gamma): | |||
"""Compute common walk graph kernel between 2 graphs using geometric | |||
series. | |||
@@ -247,7 +247,7 @@ class CommonWalk(GraphKernel): | |||
The common walk Kernel between 2 graphs. | |||
""" | |||
# get tensor product / direct product | |||
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) | |||
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) | |||
# return 0 if the direct product graph have no more than 1 node. | |||
if nx.number_of_nodes(gp) < 2: | |||
return 0 | |||
@@ -262,24 +262,24 @@ class CommonWalk(GraphKernel): | |||
def _wrapper_kernel_do_geo(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) | |||
return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight) | |||
def __check_graphs(self, Gn): | |||
def _check_graphs(self, Gn): | |||
for g in Gn: | |||
if nx.number_of_nodes(g) == 1: | |||
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') | |||
def __add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
def _add_dummy_labels(self, Gn): | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
self._edge_labels = [SpecialLabel.DUMMY] | |||
def _init_worker_gm(gn_toshare): | |||
@@ -37,7 +37,7 @@ class GraphKernel(object): | |||
raise Exception('The graph list given is empty. No computation was performed.') | |||
else: | |||
self._graphs = [g.copy() for g in graphs[0]] | |||
self._gram_matrix = self.__compute_gram_matrix() | |||
self._gram_matrix = self._compute_gram_matrix() | |||
self._gram_matrix_unnorm = np.copy(self._gram_matrix) | |||
if self._normalize: | |||
self._gram_matrix = self.normalize_gm(self._gram_matrix) | |||
@@ -45,17 +45,17 @@ class GraphKernel(object): | |||
elif len(graphs) == 2: | |||
if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): | |||
kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy()) | |||
kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy()) | |||
return kernel, self._run_time | |||
elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): | |||
g1 = graphs[0].copy() | |||
g_list = [g.copy() for g in graphs[1]] | |||
kernel_list = self.__compute_kernel_list(g1, g_list) | |||
kernel_list = self._compute_kernel_list(g1, g_list) | |||
return kernel_list, self._run_time | |||
elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): | |||
g1 = graphs[1].copy() | |||
g_list = [g.copy() for g in graphs[0]] | |||
kernel_list = self.__compute_kernel_list(g1, g_list) | |||
kernel_list = self._compute_kernel_list(g1, g_list) | |||
return kernel_list, self._run_time | |||
else: | |||
raise Exception('Cannot detect graphs.') | |||
@@ -99,7 +99,7 @@ class GraphKernel(object): | |||
return dis_mat, dis_max, dis_min, dis_mean | |||
def __compute_gram_matrix(self): | |||
def _compute_gram_matrix(self): | |||
start_time = time.time() | |||
if self._parallel == 'imap_unordered': | |||
@@ -125,7 +125,7 @@ class GraphKernel(object): | |||
pass | |||
def __compute_kernel_list(self, g1, g_list): | |||
def _compute_kernel_list(self, g1, g_list): | |||
start_time = time.time() | |||
if self._parallel == 'imap_unordered': | |||
@@ -151,7 +151,7 @@ class GraphKernel(object): | |||
pass | |||
def __compute_single_kernel(self, g1, g2): | |||
def _compute_single_kernel(self, g1, g2): | |||
start_time = time.time() | |||
kernel = self._compute_single_kernel_series(g1, g2) | |||
@@ -33,25 +33,25 @@ class Marginalized(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__p_quit = kwargs.get('p_quit', 0.5) | |||
self.__n_iteration = kwargs.get('n_iteration', 10) | |||
self.__remove_totters = kwargs.get('remove_totters', False) | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self.__n_iteration = int(self.__n_iteration) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._p_quit = kwargs.get('p_quit', 0.5) | |||
self._n_iteration = kwargs.get('n_iteration', 10) | |||
self._remove_totters = kwargs.get('remove_totters', False) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
self._n_iteration = int(self._n_iteration) | |||
def _compute_gm_series(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
if self.__remove_totters: | |||
if self._remove_totters: | |||
if self._verbose >= 2: | |||
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
# @todo: this may not work. | |||
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
@@ -63,7 +63,7 @@ class Marginalized(GraphKernel): | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) | |||
kernel = self._kernel_do(self._graphs[i], self._graphs[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel # @todo: no directed graph considered? | |||
@@ -71,9 +71,9 @@ class Marginalized(GraphKernel): | |||
def _compute_gm_imap_unordered(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
if self.__remove_totters: | |||
if self._remove_totters: | |||
pool = Pool(self._n_jobs) | |||
itr = range(0, len(self._graphs)) | |||
if len(self._graphs) < 100 * self._n_jobs: | |||
@@ -105,16 +105,16 @@ class Marginalized(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
if self._remove_totters: | |||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
# @todo: this may not work. | |||
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] | |||
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
@@ -123,17 +123,17 @@ class Marginalized(GraphKernel): | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(g1, g_list[i]) | |||
kernel = self._kernel_do(g1, g_list[i]) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
if self._remove_totters: | |||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||
pool = Pool(self._n_jobs) | |||
itr = range(0, len(g_list)) | |||
if len(g_list) < 100 * self._n_jobs: | |||
@@ -171,19 +171,19 @@ class Marginalized(GraphKernel): | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self.__kernel_do(G_g1, G_g_list[itr]) | |||
return itr, self._kernel_do(G_g1, G_g_list[itr]) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__add_dummy_labels([g1] + [g2]) | |||
if self.__remove_totters: | |||
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) | |||
kernel = self.__kernel_do(g1, g2) | |||
self._add_dummy_labels([g1] + [g2]) | |||
if self._remove_totters: | |||
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. | |||
g2 = untotterTransformation(g2, self._node_labels, self._edge_labels) | |||
kernel = self._kernel_do(g1, g2) | |||
return kernel | |||
def __kernel_do(self, g1, g2): | |||
def _kernel_do(self, g1, g2): | |||
"""Compute marginalized graph kernel between 2 graphs. | |||
Parameters | |||
@@ -205,7 +205,7 @@ class Marginalized(GraphKernel): | |||
p_init_G1 = 1 / num_nodes_G1 | |||
p_init_G2 = 1 / num_nodes_G2 | |||
q = self.__p_quit * self.__p_quit | |||
q = self._p_quit * self._p_quit | |||
r1 = q | |||
# # initial R_inf | |||
@@ -260,36 +260,36 @@ class Marginalized(GraphKernel): | |||
if len(g2[node2]) > 0: | |||
R_inf[(node1, node2)] = r1 | |||
else: | |||
R_inf[(node1, node2)] = self.__p_quit | |||
R_inf[(node1, node2)] = self._p_quit | |||
else: | |||
if len(g2[node2]) > 0: | |||
R_inf[(node1, node2)] = self.__p_quit | |||
R_inf[(node1, node2)] = self._p_quit | |||
else: | |||
R_inf[(node1, node2)] = 1 | |||
# compute all transition probability first. | |||
t_dict = {} | |||
if self.__n_iteration > 1: | |||
if self._n_iteration > 1: | |||
for node1 in g1.nodes(): | |||
neighbor_n1 = g1[node1] | |||
# the transition probability distribution in the random walks | |||
# generating step (uniform distribution over the vertices adjacent | |||
# to the current vertex) | |||
if len(neighbor_n1) > 0: | |||
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) | |||
p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1) | |||
for node2 in g2.nodes(): | |||
neighbor_n2 = g2[node2] | |||
if len(neighbor_n2) > 0: | |||
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) | |||
p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2) | |||
for neighbor1 in neighbor_n1: | |||
for neighbor2 in neighbor_n2: | |||
t_dict[(node1, node2, neighbor1, neighbor2)] = \ | |||
p_trans_n1 * p_trans_n2 * \ | |||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | |||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | |||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \ | |||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels)) | |||
# Compute R_inf with a simple interative method | |||
for i in range(2, self.__n_iteration + 1): | |||
for i in range(2, self._n_iteration + 1): | |||
R_inf_old = R_inf.copy() | |||
# Compute R_inf for each pair of nodes | |||
@@ -311,7 +311,7 @@ class Marginalized(GraphKernel): | |||
# add elements of R_inf up and compute kernel. | |||
for (n1, n2), value in R_inf.items(): | |||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | |||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels)) | |||
kernel += s * value # ref [1] equation (6) | |||
return kernel | |||
@@ -320,19 +320,19 @@ class Marginalized(GraphKernel): | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_gn[i], G_gn[j]) | |||
return i, j, self._kernel_do(G_gn[i], G_gn[j]) | |||
def _wrapper_untotter(self, i): | |||
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. | |||
return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work. | |||
def __add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
def _add_dummy_labels(self, Gn): | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__depth = int(kwargs.get('depth', 10)) | |||
self.__k_func = kwargs.get('k_func', 'MinMax') | |||
self.__compute_method = kwargs.get('compute_method', 'trie') | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._depth = int(kwargs.get('depth', 10)) | |||
self._k_func = kwargs.get('k_func', 'MinMax') | |||
self._compute_method = kwargs.get('compute_method', 'trie') | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
def _compute_gm_series(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
from itertools import combinations_with_replacement | |||
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | |||
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self.__compute_method == 'trie': | |||
all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] | |||
if self._compute_method == 'trie': | |||
all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] | |||
for i, j in iterator_kernel: | |||
kernel = self.__kernel_do_trie(all_paths[i], all_paths[j]) | |||
kernel = self._kernel_do_trie(all_paths[i], all_paths[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: | |||
all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] | |||
all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] | |||
for i, j in iterator_kernel: | |||
kernel = self.__kernel_do_naive(all_paths[i], all_paths[j]) | |||
kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _compute_gm_imap_unordered(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
# get all paths of all graphs before computing kernels to save time, | |||
# but this may cost a lot of memory for large datasets. | |||
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
else: | |||
chunksize = 100 | |||
all_paths = [[] for _ in range(len(self._graphs))] | |||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||
if self._compute_method == 'trie' and self._k_func is not None: | |||
get_ps_fun = self._wrapper_find_all_path_as_trie | |||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||
elif self._compute_method != 'trie' and self._k_func is not None: | |||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||
else: | |||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | |||
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||
if self._compute_method == 'trie' and self._k_func is not None: | |||
def init_worker(trie_toshare): | |||
global G_trie | |||
G_trie = trie_toshare | |||
do_fun = self._wrapper_kernel_do_trie | |||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||
elif self._compute_method != 'trie' and self._k_func is not None: | |||
def init_worker(plist_toshare): | |||
global G_plist | |||
G_plist = plist_toshare | |||
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def init_worker(plist_toshare): | |||
global G_plist | |||
G_plist = plist_toshare | |||
do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this? | |||
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) | |||
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
if self._verbose >= 2: | |||
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | |||
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
kernel_list = [None] * len(g_list) | |||
if self.__compute_method == 'trie': | |||
paths_g1 = self.__find_all_path_as_trie(g1) | |||
paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps] | |||
if self._compute_method == 'trie': | |||
paths_g1 = self._find_all_path_as_trie(g1) | |||
paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps] | |||
for i in iterator_kernel: | |||
kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i]) | |||
kernel = self._kernel_do_trie(paths_g1, paths_g_list[i]) | |||
kernel_list[i] = kernel | |||
else: | |||
paths_g1 = self.__find_all_paths_until_length(g1) | |||
paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps] | |||
paths_g1 = self._find_all_paths_until_length(g1) | |||
paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps] | |||
for i in iterator_kernel: | |||
kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i]) | |||
kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
# get all paths of all graphs before computing kernels to save time, | |||
# but this may cost a lot of memory for large datasets. | |||
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
else: | |||
chunksize = 100 | |||
paths_g_list = [[] for _ in range(len(g_list))] | |||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||
paths_g1 = self.__find_all_path_as_trie(g1) | |||
if self._compute_method == 'trie' and self._k_func is not None: | |||
paths_g1 = self._find_all_path_as_trie(g1) | |||
get_ps_fun = self._wrapper_find_all_path_as_trie | |||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||
paths_g1 = self.__find_all_paths_until_length(g1) | |||
elif self._compute_method != 'trie' and self._k_func is not None: | |||
paths_g1 = self._find_all_paths_until_length(g1) | |||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) | |||
else: | |||
paths_g1 = self.__find_all_paths_until_length(g1) | |||
paths_g1 = self._find_all_paths_until_length(g1) | |||
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) | |||
if self._verbose >= 2: | |||
iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), | |||
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _wrapper_kernel_list_do(self, itr): | |||
if self.__compute_method == 'trie' and self.__k_func is not None: | |||
return itr, self.__kernel_do_trie(G_p1, G_plist[itr]) | |||
elif self.__compute_method != 'trie' and self.__k_func is not None: | |||
return itr, self.__kernel_do_naive(G_p1, G_plist[itr]) | |||
if self._compute_method == 'trie' and self._k_func is not None: | |||
return itr, self._kernel_do_trie(G_p1, G_plist[itr]) | |||
elif self._compute_method != 'trie' and self._k_func is not None: | |||
return itr, self._kernel_do_naive(G_p1, G_plist[itr]) | |||
else: | |||
return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr]) | |||
return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__add_dummy_labels([g1] + [g2]) | |||
if self.__compute_method == 'trie': | |||
paths_g1 = self.__find_all_path_as_trie(g1) | |||
paths_g2 = self.__find_all_path_as_trie(g2) | |||
kernel = self.__kernel_do_trie(paths_g1, paths_g2) | |||
self._add_dummy_labels([g1] + [g2]) | |||
if self._compute_method == 'trie': | |||
paths_g1 = self._find_all_path_as_trie(g1) | |||
paths_g2 = self._find_all_path_as_trie(g2) | |||
kernel = self._kernel_do_trie(paths_g1, paths_g2) | |||
else: | |||
paths_g1 = self.__find_all_paths_until_length(g1) | |||
paths_g2 = self.__find_all_paths_until_length(g2) | |||
kernel = self.__kernel_do_naive(paths_g1, paths_g2) | |||
paths_g1 = self._find_all_paths_until_length(g1) | |||
paths_g2 = self._find_all_paths_until_length(g2) | |||
kernel = self._kernel_do_naive(paths_g1, paths_g2) | |||
return kernel | |||
def __kernel_do_trie(self, trie1, trie2): | |||
def _kernel_do_trie(self, trie1, trie2): | |||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | |||
Parameters | |||
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
kernel : float | |||
Path kernel up to h between 2 graphs. | |||
""" | |||
if self.__k_func == 'tanimoto': | |||
if self._k_func == 'tanimoto': | |||
# traverse all paths in graph1 and search them in graph2. Deep-first | |||
# search is applied. | |||
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): | |||
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# print(setlist) | |||
kernel = setlist[0] / setlist[1] | |||
elif self.__k_func == 'MinMax': # MinMax kernel | |||
elif self._k_func == 'MinMax': # MinMax kernel | |||
# traverse all paths in graph1 and search them in graph2. Deep-first | |||
# search is applied. | |||
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): | |||
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _wrapper_kernel_do_trie(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j]) | |||
return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) | |||
def __kernel_do_naive(self, paths1, paths2): | |||
def _kernel_do_naive(self, paths1, paths2): | |||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||
Parameters | |||
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
""" | |||
all_paths = list(set(paths1 + paths2)) | |||
if self.__k_func == 'tanimoto': | |||
if self._k_func == 'tanimoto': | |||
length_union = len(set(paths1 + paths2)) | |||
kernel = (len(set(paths1)) + len(set(paths2)) - | |||
length_union) / length_union | |||
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# kernel_uv = np.dot(vector1, vector2) | |||
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) | |||
elif self.__k_func == 'MinMax': # MinMax kernel | |||
elif self._k_func == 'MinMax': # MinMax kernel | |||
path_count1 = Counter(paths1) | |||
path_count2 = Counter(paths2) | |||
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | |||
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
kernel = np.sum(np.minimum(vector1, vector2)) / \ | |||
np.sum(np.maximum(vector1, vector2)) | |||
elif self.__k_func is None: # no sub-kernel used; compare paths directly. | |||
elif self._k_func is None: # no sub-kernel used; compare paths directly. | |||
path_count1 = Counter(paths1) | |||
path_count2 = Counter(paths2) | |||
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) | |||
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _wrapper_kernel_do_naive(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j]) | |||
return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) | |||
def __find_all_path_as_trie(self, G): | |||
def _find_all_path_as_trie(self, G): | |||
# all_path = find_all_paths_until_length(G, length, ds_attrs, | |||
# node_label=node_label, | |||
# edge_label=edge_label) | |||
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# them. Deep-first search is applied. Notice the reverse of each path is | |||
# also stored to the trie. | |||
def traverseGraph(root, ptrie, G, pcurrent=[]): | |||
if len(pcurrent) < self.__depth + 1: | |||
if len(pcurrent) < self._depth + 1: | |||
for neighbor in G[root]: | |||
if neighbor not in pcurrent: | |||
pcurrent.append(neighbor) | |||
plstr = self.__paths2labelseqs([pcurrent], G) | |||
plstr = self._paths2labelseqs([pcurrent], G) | |||
ptrie.insertWord(plstr[0]) | |||
traverseGraph(neighbor, ptrie, G, pcurrent) | |||
del pcurrent[-1] | |||
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
ptrie = Trie() | |||
path_l = [[n] for n in G.nodes] # paths of length l | |||
path_l_str = self.__paths2labelseqs(path_l, G) | |||
path_l_str = self._paths2labelseqs(path_l, G) | |||
for p in path_l_str: | |||
ptrie.insertWord(p) | |||
for n in G.nodes: | |||
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
def _wrapper_find_all_path_as_trie(self, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, self.__find_all_path_as_trie(g) | |||
return i, self._find_all_path_as_trie(g) | |||
# @todo: (can be removed maybe) this method find paths repetively, it could be faster. | |||
def __find_all_paths_until_length(self, G, tolabelseqs=True): | |||
def _find_all_paths_until_length(self, G, tolabelseqs=True): | |||
"""Find all paths no longer than a certain maximum length in a graph. A | |||
recursive depth first search is applied. | |||
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
""" | |||
# path_l = [tuple([n]) for n in G.nodes] # paths of length l | |||
# all_paths = path_l[:] | |||
# for l in range(1, self.__depth + 1): | |||
# for l in range(1, self._depth + 1): | |||
# path_l_new = [] | |||
# for path in path_l: | |||
# for neighbor in G[path[-1]]: | |||
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
path_l = [[n] for n in G.nodes] # paths of length l | |||
all_paths = [p.copy() for p in path_l] | |||
for l in range(1, self.__depth + 1): | |||
for l in range(1, self._depth + 1): | |||
path_lplus1 = [] | |||
for path in path_l: | |||
for neighbor in G[path[-1]]: | |||
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
all_paths += path_lplus1 | |||
path_l = [p.copy() for p in path_lplus1] | |||
# for i in range(0, self.__depth + 1): | |||
# for i in range(0, self._depth + 1): | |||
# new_paths = find_all_paths(G, i) | |||
# if new_paths == []: | |||
# break | |||
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# consider labels | |||
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) | |||
# print() | |||
return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | |||
return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) | |||
def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | |||
return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) | |||
def __paths2labelseqs(self, plist, G): | |||
if len(self.__node_labels) > 0: | |||
if len(self.__edge_labels) > 0: | |||
def _paths2labelseqs(self, plist, G): | |||
if len(self._node_labels) > 0: | |||
if len(self._edge_labels) > 0: | |||
path_strs = [] | |||
for path in plist: | |||
pths_tmp = [] | |||
for idx, node in enumerate(path[:-1]): | |||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) | |||
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels)) | |||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) | |||
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels)) | |||
path_strs.append(tuple(pths_tmp)) | |||
else: | |||
path_strs = [] | |||
for path in plist: | |||
pths_tmp = [] | |||
for node in path: | |||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||
path_strs.append(tuple(pths_tmp)) | |||
return path_strs | |||
else: | |||
if len(self.__edge_labels) > 0: | |||
if len(self._edge_labels) > 0: | |||
path_strs = [] | |||
for path in plist: | |||
if len(path) == 1: | |||
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
else: | |||
pths_tmp = [] | |||
for idx, node in enumerate(path[:-1]): | |||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) | |||
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) | |||
path_strs.append(tuple(pths_tmp)) | |||
return path_strs | |||
else: | |||
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||
# return [tuple([len(path)]) for path in all_paths] | |||
def __add_dummy_labels(self, Gn): | |||
if self.__k_func is not None: | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
def _add_dummy_labels(self, Gn): | |||
if self._k_func is not None: | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel): | |||
def _add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -26,11 +26,11 @@ class ShortestPath(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__node_attrs = kwargs.get('node_attrs', []) | |||
self.__edge_weight = kwargs.get('edge_weight', None) | |||
self.__node_kernels = kwargs.get('node_kernels', None) | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._node_attrs = kwargs.get('node_attrs', []) | |||
self._edge_weight = kwargs.get('edge_weight', None) | |||
self._node_kernels = kwargs.get('node_kernels', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
def _compute_gm_series(self): | |||
@@ -39,7 +39,7 @@ class ShortestPath(GraphKernel): | |||
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] | |||
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
@@ -51,7 +51,7 @@ class ShortestPath(GraphKernel): | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__sp_do(self._graphs[i], self._graphs[j]) | |||
kernel = self._sp_do(self._graphs[i], self._graphs[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
# get shortest path graphs of g1 and each graph in g_list. | |||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] | |||
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
@@ -106,7 +106,7 @@ class ShortestPath(GraphKernel): | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__sp_do(g1, g_list[i]) | |||
kernel = self._sp_do(g1, g_list[i]) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
@@ -114,7 +114,7 @@ class ShortestPath(GraphKernel): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
# get shortest path graphs of g1 and each graph in g_list. | |||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||
pool = Pool(self._n_jobs) | |||
get_sp_graphs_fun = self._wrapper_get_sp_graphs | |||
itr = zip(g_list, range(0, len(g_list))) | |||
@@ -151,55 +151,55 @@ class ShortestPath(GraphKernel): | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self.__sp_do(G_g1, G_gl[itr]) | |||
return itr, self._sp_do(G_g1, G_gl[itr]) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
g1 = getSPGraph(g1, edge_weight=self.__edge_weight) | |||
g2 = getSPGraph(g2, edge_weight=self.__edge_weight) | |||
kernel = self.__sp_do(g1, g2) | |||
g1 = getSPGraph(g1, edge_weight=self._edge_weight) | |||
g2 = getSPGraph(g2, edge_weight=self._edge_weight) | |||
kernel = self._sp_do(g1, g2) | |||
return kernel | |||
def _wrapper_get_sp_graphs(self, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, getSPGraph(g, edge_weight=self.__edge_weight) | |||
return i, getSPGraph(g, edge_weight=self._edge_weight) | |||
def __sp_do(self, g1, g2): | |||
def _sp_do(self, g1, g2): | |||
kernel = 0 | |||
# compute shortest path matrices first, method borrowed from FCSP. | |||
vk_dict = {} # shortest path matrices dict | |||
if len(self.__node_labels) > 0: | |||
if len(self._node_labels) > 0: | |||
# node symb and non-synb labeled | |||
if len(self.__node_attrs) > 0: | |||
kn = self.__node_kernels['mix'] | |||
if len(self._node_attrs) > 0: | |||
kn = self._node_kernels['mix'] | |||
for n1, n2 in product( | |||
g1.nodes(data=True), g2.nodes(data=True)): | |||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||
n1_labels = [n1[1][nl] for nl in self._node_labels] | |||
n2_labels = [n2[1][nl] for nl in self._node_labels] | |||
n1_attrs = [n1[1][na] for na in self._node_attrs] | |||
n2_attrs = [n2[1][na] for na in self._node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | |||
# node symb labeled | |||
else: | |||
kn = self.__node_kernels['symb'] | |||
kn = self._node_kernels['symb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||
n1_labels = [n1[1][nl] for nl in self._node_labels] | |||
n2_labels = [n2[1][nl] for nl in self._node_labels] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | |||
else: | |||
# node non-synb labeled | |||
if len(self.__node_attrs) > 0: | |||
kn = self.__node_kernels['nsymb'] | |||
if len(self._node_attrs) > 0: | |||
kn = self._node_kernels['nsymb'] | |||
for n1 in g1.nodes(data=True): | |||
for n2 in g2.nodes(data=True): | |||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||
n1_attrs = [n1[1][na] for na in self._node_attrs] | |||
n2_attrs = [n2[1][na] for na in self._node_attrs] | |||
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | |||
# node unlabeled | |||
else: | |||
@@ -210,7 +210,7 @@ class ShortestPath(GraphKernel): | |||
return kernel | |||
# compute graph kernels | |||
if self.__ds_infos['directed']: | |||
if self._ds_infos['directed']: | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
if e1[2]['cost'] == e2[2]['cost']: | |||
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] | |||
@@ -261,4 +261,4 @@ class ShortestPath(GraphKernel): | |||
def _wrapper_sp_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__sp_do(G_gs[i], G_gs[j]) | |||
return i, j, self._sp_do(G_gs[i], G_gs[j]) |
@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__node_attrs = kwargs.get('node_attrs', []) | |||
self.__edge_attrs = kwargs.get('edge_attrs', []) | |||
self.__edge_weight = kwargs.get('edge_weight', None) | |||
self.__node_kernels = kwargs.get('node_kernels', None) | |||
self.__edge_kernels = kwargs.get('edge_kernels', None) | |||
self.__compute_method = kwargs.get('compute_method', 'naive') | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._node_attrs = kwargs.get('node_attrs', []) | |||
self._edge_attrs = kwargs.get('edge_attrs', []) | |||
self._edge_weight = kwargs.get('edge_weight', None) | |||
self._node_kernels = kwargs.get('node_kernels', None) | |||
self._edge_kernels = kwargs.get('edge_kernels', None) | |||
self._compute_method = kwargs.get('compute_method', 'naive') | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
def _compute_gm_series(self): | |||
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel): | |||
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) | |||
else: | |||
iterator = self._graphs | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
for g in iterator: | |||
splist.append(self.__get_sps_as_trie(g)) | |||
splist.append(self._get_sps_as_trie(g)) | |||
else: | |||
for g in iterator: | |||
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) | |||
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
@@ -60,14 +60,14 @@ class StructuralSP(GraphKernel): | |||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = itr | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
for i, j in iterator: | |||
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel | |||
else: | |||
for i, j in iterator: | |||
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) | |||
# if(kernel > 1): | |||
# print("error here ") | |||
gram_matrix[i][j] = kernel | |||
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel): | |||
else: | |||
chunksize = 100 | |||
# get shortest path graphs of self._graphs | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
get_sps_fun = self._wrapper_get_sps_trie | |||
else: | |||
get_sps_fun = self._wrapper_get_sps_naive | |||
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel): | |||
global G_spl, G_gs | |||
G_spl = spl_toshare | |||
G_gs = gs_toshare | |||
if self.__compute_method == 'trie': | |||
do_fun = self.__wrapper_ssp_do_trie | |||
if self._compute_method == 'trie': | |||
do_fun = self._wrapper_ssp_do_trie | |||
else: | |||
do_fun = self._wrapper_ssp_do_naive | |||
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, | |||
@@ -119,18 +119,18 @@ class StructuralSP(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
# get shortest paths of g1 and each graph in g_list. | |||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||
splist = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
for g in iterator: | |||
splist.append(self.__get_sps_as_trie(g)) | |||
splist.append(self._get_sps_as_trie(g)) | |||
else: | |||
for g in iterator: | |||
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) | |||
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
@@ -138,13 +138,13 @@ class StructuralSP(GraphKernel): | |||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||
else: | |||
iterator = range(len(g_list)) | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
for i in iterator: | |||
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i]) | |||
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) | |||
kernel_list[i] = kernel | |||
else: | |||
for i in iterator: | |||
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i]) | |||
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel): | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
# get shortest paths of g1 and each graph in g_list. | |||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||
splist = [None] * len(g_list) | |||
pool = Pool(self._n_jobs) | |||
itr = zip(g_list, range(0, len(g_list))) | |||
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel): | |||
else: | |||
chunksize = 100 | |||
# get shortest path graphs of g_list | |||
if self.__compute_method == 'trie': | |||
if self._compute_method == 'trie': | |||
get_sps_fun = self._wrapper_get_sps_trie | |||
else: | |||
get_sps_fun = self._wrapper_get_sps_naive | |||
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel): | |||
G_spl = spl_toshare | |||
G_g1 = g1_toshare | |||
G_gl = gl_toshare | |||
if self.__compute_method == 'trie': | |||
do_fun = self.__wrapper_ssp_do_trie | |||
if self._compute_method == 'trie': | |||
do_fun = self._wrapper_ssp_do_trie | |||
else: | |||
do_fun = self._wrapper_kernel_list_do | |||
def func_assign(result, var_to_assign): | |||
@@ -199,36 +199,36 @@ class StructuralSP(GraphKernel): | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) | |||
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) | |||
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed']) | |||
if self.__compute_method == 'trie': | |||
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2) | |||
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) | |||
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed']) | |||
if self._compute_method == 'trie': | |||
kernel = self._ssp_do_trie(g1, g2, sp1, sp2) | |||
else: | |||
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2) | |||
kernel = self._ssp_do_naive(g1, g2, sp1, sp2) | |||
return kernel | |||
def _wrapper_get_sps_naive(self, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']) | |||
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']) | |||
def __ssp_do_naive(self, g1, g2, spl1, spl2): | |||
def _ssp_do_naive(self, g1, g2, spl1, spl2): | |||
kernel = 0 | |||
# First, compute shortest path matrices, method borrowed from FCSP. | |||
vk_dict = self.__get_all_node_kernels(g1, g2) | |||
vk_dict = self._get_all_node_kernels(g1, g2) | |||
# Then, compute kernels between all pairs of edges, which is an idea of | |||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||
# went though. For dense graphs, this would be slow. | |||
ek_dict = self.__get_all_edge_kernels(g1, g2) | |||
ek_dict = self._get_all_edge_kernels(g1, g2) | |||
# compute graph kernels | |||
if vk_dict: | |||
@@ -314,27 +314,27 @@ class StructuralSP(GraphKernel): | |||
def _wrapper_ssp_do_naive(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) | |||
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) | |||
def __get_all_node_kernels(self, g1, g2): | |||
def _get_all_node_kernels(self, g1, g2): | |||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | |||
def __get_all_edge_kernels(self, g1, g2): | |||
def _get_all_edge_kernels(self, g1, g2): | |||
# compute kernels between all pairs of edges, which is an idea of | |||
# extension of FCSP. It suits sparse graphs, which is the most case we | |||
# went though. For dense graphs, this would be slow. | |||
ek_dict = {} # dict of edge kernels | |||
if len(self.__edge_labels) > 0: | |||
if len(self._edge_labels) > 0: | |||
# edge symb and non-synb labeled | |||
if len(self.__edge_attrs) > 0: | |||
ke = self.__edge_kernels['mix'] | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['mix'] | |||
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): | |||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
@@ -342,11 +342,11 @@ class StructuralSP(GraphKernel): | |||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||
# edge symb labeled | |||
else: | |||
ke = self.__edge_kernels['symb'] | |||
ke = self._edge_kernels['symb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
e1_labels = [e1[2][el] for el in self.__edge_labels] | |||
e2_labels = [e2[2][el] for el in self.__edge_labels] | |||
e1_labels = [e1[2][el] for el in self._edge_labels] | |||
e2_labels = [e2[2][el] for el in self._edge_labels] | |||
ek_temp = ke(e1_labels, e2_labels) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
@@ -354,12 +354,12 @@ class StructuralSP(GraphKernel): | |||
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp | |||
else: | |||
# edge non-synb labeled | |||
if len(self.__edge_attrs) > 0: | |||
ke = self.__edge_kernels['nsymb'] | |||
if len(self._edge_attrs) > 0: | |||
ke = self._edge_kernels['nsymb'] | |||
for e1 in g1.edges(data=True): | |||
for e2 in g2.edges(data=True): | |||
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] | |||
e1_attrs = [e1[2][ea] for ea in self._edge_attrs] | |||
e2_attrs = [e2[2][ea] for ea in self._edge_attrs] | |||
ek_temp = ke(e1_attrs, e2_attrs) | |||
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp | |||
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp | |||
@@ -28,16 +28,16 @@ class Treelet(GraphKernel): | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__sub_kernel = kwargs.get('sub_kernel', None) | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
if self.__sub_kernel is None: | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._sub_kernel = kwargs.get('sub_kernel', None) | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
if self._sub_kernel is None: | |||
raise Exception('Sub kernel not set.') | |||
def _compute_gm_series(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
@@ -47,7 +47,7 @@ class Treelet(GraphKernel): | |||
else: | |||
iterator = self._graphs | |||
for g in iterator: | |||
canonkeys.append(self.__get_canonkeys(g)) | |||
canonkeys.append(self._get_canonkeys(g)) | |||
# compute Gram matrix. | |||
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) | |||
@@ -59,7 +59,7 @@ class Treelet(GraphKernel): | |||
else: | |||
iterator = itr | |||
for i, j in iterator: | |||
kernel = self.__kernel_do(canonkeys[i], canonkeys[j]) | |||
kernel = self._kernel_do(canonkeys[i], canonkeys[j]) | |||
gram_matrix[i][j] = kernel | |||
gram_matrix[j][i] = kernel # @todo: no directed graph considered? | |||
@@ -67,7 +67,7 @@ class Treelet(GraphKernel): | |||
def _compute_gm_imap_unordered(self): | |||
self.__add_dummy_labels(self._graphs) | |||
self._add_dummy_labels(self._graphs) | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
@@ -103,18 +103,18 @@ class Treelet(GraphKernel): | |||
def _compute_kernel_list_series(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys_1 = self.__get_canonkeys(g1) | |||
canonkeys_1 = self._get_canonkeys(g1) | |||
canonkeys_list = [] | |||
if self._verbose >= 2: | |||
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) | |||
else: | |||
iterator = g_list | |||
for g in iterator: | |||
canonkeys_list.append(self.__get_canonkeys(g)) | |||
canonkeys_list.append(self._get_canonkeys(g)) | |||
# compute kernel list. | |||
kernel_list = [None] * len(g_list) | |||
@@ -123,18 +123,18 @@ class Treelet(GraphKernel): | |||
else: | |||
iterator = range(len(g_list)) | |||
for i in iterator: | |||
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i]) | |||
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) | |||
kernel_list[i] = kernel | |||
return kernel_list | |||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | |||
self.__add_dummy_labels(g_list + [g1]) | |||
self._add_dummy_labels(g_list + [g1]) | |||
# get all canonical keys of all graphs before computing kernels to save | |||
# time, but this may cost a lot of memory for large dataset. | |||
canonkeys_1 = self.__get_canonkeys(g1) | |||
canonkeys_1 = self._get_canonkeys(g1) | |||
canonkeys_list = [[] for _ in range(len(g_list))] | |||
pool = Pool(self._n_jobs) | |||
itr = zip(g_list, range(0, len(g_list))) | |||
@@ -173,18 +173,18 @@ class Treelet(GraphKernel): | |||
def _wrapper_kernel_list_do(self, itr): | |||
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr]) | |||
return itr, self._kernel_do(G_ck_1, G_ck_list[itr]) | |||
def _compute_single_kernel_series(self, g1, g2): | |||
self.__add_dummy_labels([g1] + [g2]) | |||
canonkeys_1 = self.__get_canonkeys(g1) | |||
canonkeys_2 = self.__get_canonkeys(g2) | |||
kernel = self.__kernel_do(canonkeys_1, canonkeys_2) | |||
self._add_dummy_labels([g1] + [g2]) | |||
canonkeys_1 = self._get_canonkeys(g1) | |||
canonkeys_2 = self._get_canonkeys(g2) | |||
kernel = self._kernel_do(canonkeys_1, canonkeys_2) | |||
return kernel | |||
def __kernel_do(self, canonkey1, canonkey2): | |||
def _kernel_do(self, canonkey1, canonkey2): | |||
"""Compute treelet graph kernel between 2 graphs. | |||
Parameters | |||
@@ -200,17 +200,17 @@ class Treelet(GraphKernel): | |||
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs | |||
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) | |||
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) | |||
kernel = self.__sub_kernel(vector1, vector2) | |||
kernel = self._sub_kernel(vector1, vector2) | |||
return kernel | |||
def _wrapper_kernel_do(self, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j]) | |||
return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j]) | |||
def __get_canonkeys(self, G): | |||
def _get_canonkeys(self, G): | |||
"""Generate canonical keys of all treelets in a graph. | |||
Parameters | |||
@@ -236,7 +236,7 @@ class Treelet(GraphKernel): | |||
patterns['0'] = list(G.nodes()) | |||
canonkey['0'] = nx.number_of_nodes(G) | |||
for i in range(1, 6): # for i in range(1, 6): | |||
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed']) | |||
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) | |||
canonkey[str(i)] = len(patterns[str(i)]) | |||
# n-star patterns | |||
@@ -330,11 +330,11 @@ class Treelet(GraphKernel): | |||
### pattern obtained in the structural analysis section above, which is a | |||
### string corresponding to a unique treelet. A dictionary is built to keep | |||
### track of the amount of every treelet. | |||
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0: | |||
if len(self._node_labels) > 0 or len(self._edge_labels) > 0: | |||
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. | |||
# linear patterns | |||
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels)) | |||
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) | |||
for key in canonkey_t: | |||
canonkey_l[('0', key)] = canonkey_t[key] | |||
@@ -343,9 +343,9 @@ class Treelet(GraphKernel): | |||
for pattern in patterns[str(i)]: | |||
canonlist = [] | |||
for idx, node in enumerate(pattern[:-1]): | |||
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) | |||
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels)) | |||
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels)) | |||
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) | |||
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels)) | |||
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels)) | |||
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] | |||
treelet.append(tuple([str(i)] + canonkey_t)) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -356,13 +356,13 @@ class Treelet(GraphKernel): | |||
for pattern in patterns[str(i) + 'star']: | |||
canonlist = [] | |||
for leaf in pattern[1:]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||
canonlist.append(tuple((nlabels, elabels))) | |||
canonlist.sort() | |||
canonlist = list(chain.from_iterable(canonlist)) | |||
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + | |||
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||
+ canonlist) | |||
treelet.append(canonkey_t) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -372,17 +372,17 @@ class Treelet(GraphKernel): | |||
for pattern in patterns['7']: | |||
canonlist = [] | |||
for leaf in pattern[1:3]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||
canonlist.append(tuple((nlabels, elabels))) | |||
canonlist.sort() | |||
canonlist = list(chain.from_iterable(canonlist)) | |||
canonkey_t = tuple(['7'] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]) | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) | |||
treelet.append(canonkey_t) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -391,38 +391,38 @@ class Treelet(GraphKernel): | |||
for pattern in patterns['11']: | |||
canonlist = [] | |||
for leaf in pattern[1:4]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||
canonlist.append(tuple((nlabels, elabels))) | |||
canonlist.sort() | |||
canonlist = list(chain.from_iterable(canonlist)) | |||
canonkey_t = tuple(['b'] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]) | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] | |||
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) | |||
treelet.append(canonkey_t) | |||
canonkey_l.update(Counter(treelet)) | |||
# pattern 10 | |||
treelet = [] | |||
for pattern in patterns['10']: | |||
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)] | |||
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)] | |||
canonlist = [] | |||
for leaf in pattern[1:3]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||
canonlist.append(tuple((nlabels, elabels))) | |||
canonlist.sort() | |||
canonkey0 = list(chain.from_iterable(canonlist)) | |||
canonkey_t = tuple(['a'] | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||
+ canonkey4 + canonkey0) | |||
treelet.append(canonkey_t) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -432,15 +432,15 @@ class Treelet(GraphKernel): | |||
for pattern in patterns['12']: | |||
canonlist0 = [] | |||
for leaf in pattern[1:3]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) | |||
canonlist0.append(tuple((nlabels, elabels))) | |||
canonlist0.sort() | |||
canonlist0 = list(chain.from_iterable(canonlist0)) | |||
canonlist3 = [] | |||
for leaf in pattern[4:6]: | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) | |||
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels) | |||
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) | |||
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels) | |||
canonlist3.append(tuple((nlabels, elabels))) | |||
canonlist3.sort() | |||
canonlist3 = list(chain.from_iterable(canonlist3)) | |||
@@ -448,14 +448,14 @@ class Treelet(GraphKernel): | |||
# 2 possible key can be generated from 2 nodes with extended label 3, | |||
# select the one with lower lexicographic order. | |||
canonkey_t1 = tuple(['c'] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0 | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||
+ canonlist3) | |||
canonkey_t2 = tuple(['c'] | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3 | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] | |||
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] | |||
+ canonlist0) | |||
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -463,24 +463,24 @@ class Treelet(GraphKernel): | |||
# pattern 9 | |||
treelet = [] | |||
for pattern in patterns['9']: | |||
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels), | |||
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)] | |||
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), | |||
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)] | |||
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels), | |||
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)] | |||
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels), | |||
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] | |||
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels), | |||
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)] | |||
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), | |||
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] | |||
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), | |||
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] | |||
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), | |||
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] | |||
if prekey2 + canonkey2 < prekey3 + canonkey3: | |||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||
+ prekey2 + prekey3 + canonkey2 + canonkey3 | |||
else: | |||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ | |||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ | |||
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ | |||
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ | |||
+ prekey3 + prekey2 + canonkey3 + canonkey2 | |||
treelet.append(tuple(['9'] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] | |||
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] | |||
+ canonkey_t)) | |||
canonkey_l.update(Counter(treelet)) | |||
@@ -492,15 +492,15 @@ class Treelet(GraphKernel): | |||
def _wrapper_get_canonkeys(self, itr_item): | |||
g = itr_item[0] | |||
i = itr_item[1] | |||
return i, self.__get_canonkeys(g) | |||
return i, self._get_canonkeys(g) | |||
def __add_dummy_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
def _add_dummy_labels(self, Gn): | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__edge_labels = [SpecialLabel.DUMMY] | |||
self._edge_labels = [SpecialLabel.DUMMY] |
@@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def __init__(self, **kwargs): | |||
GraphKernel.__init__(self) | |||
self.__node_labels = kwargs.get('node_labels', []) | |||
self.__edge_labels = kwargs.get('edge_labels', []) | |||
self.__height = int(kwargs.get('height', 0)) | |||
self.__base_kernel = kwargs.get('base_kernel', 'subtree') | |||
self.__ds_infos = kwargs.get('ds_infos', {}) | |||
self._node_labels = kwargs.get('node_labels', []) | |||
self._edge_labels = kwargs.get('edge_labels', []) | |||
self._height = int(kwargs.get('height', 0)) | |||
self._base_kernel = kwargs.get('base_kernel', 'subtree') | |||
self._ds_infos = kwargs.get('ds_infos', {}) | |||
def _compute_gm_series(self): | |||
@@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
import warnings | |||
warnings.warn('A part of the computation is parallelized.') | |||
self.__add_dummy_node_labels(self._graphs) | |||
self._add_dummy_node_labels(self._graphs) | |||
# for WL subtree kernel | |||
if self.__base_kernel == 'subtree': | |||
gram_matrix = self.__subtree_kernel_do(self._graphs) | |||
if self._base_kernel == 'subtree': | |||
gram_matrix = self._subtree_kernel_do(self._graphs) | |||
# for WL shortest path kernel | |||
elif self.__base_kernel == 'sp': | |||
gram_matrix = self.__sp_kernel_do(self._graphs) | |||
elif self._base_kernel == 'sp': | |||
gram_matrix = self._sp_kernel_do(self._graphs) | |||
# for WL edge kernel | |||
elif self.__base_kernel == 'edge': | |||
gram_matrix = self.__edge_kernel_do(self._graphs) | |||
elif self._base_kernel == 'edge': | |||
gram_matrix = self._edge_kernel_do(self._graphs) | |||
# for user defined base kernel | |||
else: | |||
gram_matrix = self.__user_kernel_do(self._graphs) | |||
gram_matrix = self._user_kernel_do(self._graphs) | |||
return gram_matrix | |||
@@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
import warnings | |||
warnings.warn('A part of the computation is parallelized.') | |||
self.__add_dummy_node_labels(g_list + [g1]) | |||
self._add_dummy_node_labels(g_list + [g1]) | |||
# for WL subtree kernel | |||
if self.__base_kernel == 'subtree': | |||
gram_matrix = self.__subtree_kernel_do(g_list + [g1]) | |||
if self._base_kernel == 'subtree': | |||
gram_matrix = self._subtree_kernel_do(g_list + [g1]) | |||
# for WL shortest path kernel | |||
elif self.__base_kernel == 'sp': | |||
gram_matrix = self.__sp_kernel_do(g_list + [g1]) | |||
elif self._base_kernel == 'sp': | |||
gram_matrix = self._sp_kernel_do(g_list + [g1]) | |||
# for WL edge kernel | |||
elif self.__base_kernel == 'edge': | |||
gram_matrix = self.__edge_kernel_do(g_list + [g1]) | |||
elif self._base_kernel == 'edge': | |||
gram_matrix = self._edge_kernel_do(g_list + [g1]) | |||
# for user defined base kernel | |||
else: | |||
gram_matrix = self.__user_kernel_do(g_list + [g1]) | |||
gram_matrix = self._user_kernel_do(g_list + [g1]) | |||
return list(gram_matrix[-1][0:-1]) | |||
@@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. | |||
self.__add_dummy_node_labels([g1] + [g2]) | |||
self._add_dummy_node_labels([g1] + [g2]) | |||
# for WL subtree kernel | |||
if self.__base_kernel == 'subtree': | |||
gram_matrix = self.__subtree_kernel_do([g1] + [g2]) | |||
if self._base_kernel == 'subtree': | |||
gram_matrix = self._subtree_kernel_do([g1] + [g2]) | |||
# for WL shortest path kernel | |||
elif self.__base_kernel == 'sp': | |||
gram_matrix = self.__sp_kernel_do([g1] + [g2]) | |||
elif self._base_kernel == 'sp': | |||
gram_matrix = self._sp_kernel_do([g1] + [g2]) | |||
# for WL edge kernel | |||
elif self.__base_kernel == 'edge': | |||
gram_matrix = self.__edge_kernel_do([g1] + [g2]) | |||
elif self._base_kernel == 'edge': | |||
gram_matrix = self._edge_kernel_do([g1] + [g2]) | |||
# for user defined base kernel | |||
else: | |||
gram_matrix = self.__user_kernel_do([g1] + [g2]) | |||
gram_matrix = self._user_kernel_do([g1] + [g2]) | |||
return gram_matrix[0][1] | |||
def __subtree_kernel_do(self, Gn): | |||
def _subtree_kernel_do(self, Gn): | |||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||
Parameters | |||
@@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
for G in Gn: | |||
# set all labels into a tuple. | |||
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. | |||
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels) | |||
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels) | |||
# get the set of original labels | |||
labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) | |||
# number of occurence of each label in G | |||
all_num_of_each_label.append(dict(Counter(labels_ori))) | |||
# Compute subtree kernel with the 0th iteration and add it to the final kernel. | |||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
# iterate each height | |||
for h in range(1, self.__height + 1): | |||
for h in range(1, self._height + 1): | |||
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration | |||
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs | |||
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration | |||
@@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
all_num_of_each_label.append(dict(Counter(labels_comp))) | |||
# Compute subtree kernel with h iterations and add it to the final kernel | |||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | |||
return gram_matrix | |||
def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn): | |||
def _compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn): | |||
"""Compute Gram matrix using the base kernel. | |||
""" | |||
if self._parallel == 'imap_unordered': | |||
@@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
elif self._parallel is None: | |||
for i in range(len(gram_matrix)): | |||
for j in range(i, len(gram_matrix)): | |||
gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i], | |||
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], | |||
all_num_of_each_label[j], gram_matrix[i][j]) | |||
gram_matrix[j][i] = gram_matrix[i][j] | |||
def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): | |||
def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): | |||
"""Compute the subtree kernel. | |||
""" | |||
labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) | |||
@@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): | |||
i = itr[0] | |||
j = itr[1] | |||
return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) | |||
return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) | |||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||
@@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||
return gram_matrix | |||
def __add_dummy_node_labels(self, Gn): | |||
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): | |||
def _add_dummy_node_labels(self, Gn): | |||
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): | |||
for i in range(len(Gn)): | |||
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) | |||
self.__node_labels = [SpecialLabel.DUMMY] | |||
self._node_labels = [SpecialLabel.DUMMY] | |||
class WLSubtree(WeisfeilerLehman): | |||
@@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav | |||
if save_results: | |||
# create result files. | |||
print('creating output files...') | |||
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save) | |||
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save) | |||
dis_k_dataset_list = [] | |||
@@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav | |||
print('\ncomplete.\n') | |||
def __init_output_file_preimage(ds_name, gkernel, dir_output): | |||
def _init_output_file_preimage(ds_name, gkernel, dir_output): | |||
if not os.path.exists(dir_output): | |||
os.makedirs(dir_output) | |||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | |||
@@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti | |||
if save_results: | |||
# create result files. | |||
print('creating output files...') | |||
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
else: | |||
fn_output_detail, fn_output_summary = None, None | |||
# 2. compute/load Gram matrix a priori. | |||
print('2. computing/loading Gram matrix...') | |||
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) | |||
gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) | |||
# 3. perform k-nn CV. | |||
print('3. performing k-nn CV...') | |||
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': | |||
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
_kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
elif train_examples == 'best-dataset': | |||
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
_kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
elif train_examples == 'trainset': | |||
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
_kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) | |||
print('\ncomplete.\n') | |||
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
Gn = dataset_all.graphs | |||
y_all = dataset_all.targets | |||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||
# get shuffles. | |||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) | |||
accuracies = [[], [], []] | |||
for trial in range(len(train_indices)): | |||
@@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||
mge_options['update_order'] = True | |||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | |||
mpg_options['runtime_precompute_gm'] = 0 | |||
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||
set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||
mge_options['update_order'] = False | |||
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() | |||
mpg_options['runtime_precompute_gm'] = 0 | |||
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||
_, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) | |||
medians[0].append(set_median) | |||
medians[1].append(gen_median) | |||
medians[2].append(gen_median_uo) | |||
@@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||
# compute dis_mat between medians. | |||
dataset = dataset_all.copy() | |||
dataset.load_graphs([g.copy() for g in G_app], targets=None) | |||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||
# compute the entire Gram matrix. | |||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||
kernels_to_medians = [] | |||
for g in G_app: | |||
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | |||
@@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne | |||
f_summary.close() | |||
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
Gn = dataset_all.graphs | |||
y_all = dataset_all.targets | |||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||
# get shuffles. | |||
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) | |||
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) | |||
accuracies = [] | |||
for trial in range(len(train_indices)): | |||
@@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g | |||
# compute dis_mat between medians. | |||
dataset = dataset_all.copy() | |||
dataset.load_graphs([g.copy() for g in best_graphs], targets=None) | |||
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) | |||
# compute the entire Gram matrix. | |||
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) | |||
kernels_to_best_graphs = [] | |||
for g in best_graphs: | |||
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) | |||
@@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g | |||
f_summary.close() | |||
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): | |||
y_all = dataset_all.targets | |||
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] | |||
@@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, | |||
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) | |||
# get shuffles. | |||
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size) | |||
train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size) | |||
accuracies = [] | |||
for trial in range(len(train_indices)): | |||
@@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, | |||
f_summary.close() | |||
def __get_shuffles(y_all, n_splits, test_size): | |||
def _get_shuffles(y_all, n_splits, test_size): | |||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | |||
train_indices = [[] for _ in range(n_splits)] | |||
test_indices = [[] for _ in range(n_splits)] | |||
@@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size): | |||
return train_indices, test_indices, train_nums, keys | |||
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): | |||
def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): | |||
mpg = MedianPreimageGenerator() | |||
mpg.dataset = dataset.copy() | |||
mpg.set_options(**mpg_options.copy()) | |||
@@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option | |||
return mpg.set_median, mpg.gen_median | |||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||
if load_gm == 'auto': | |||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||
@@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] | |||
time_precompute_gm = float(gmfile['run_time']) | |||
else: | |||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | |||
elif not load_gm: | |||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) | |||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) | |||
else: | |||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||
@@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): | |||
return gram_matrix_unnorm, time_precompute_gm | |||
def __get_graph_kernel(dataset, kernel_options): | |||
def _get_graph_kernel(dataset, kernel_options): | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
node_labels=dataset.node_labels, | |||
@@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options): | |||
return graph_kernel | |||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||
def _compute_gram_matrix_unnorm(dataset, kernel_options): | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
node_labels=dataset.node_labels, | |||
@@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||
return gram_matrix_unnorm, run_time | |||
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output): | |||
def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output): | |||
if not os.path.exists(dir_output): | |||
os.makedirs(dir_output) | |||
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' | |||
@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
def __init__(self, dataset=None): | |||
PreimageGenerator.__init__(self, dataset=dataset) | |||
### arguments to set. | |||
self.__mge = None | |||
self.__ged_options = {} | |||
self.__mge_options = {} | |||
# self.__fit_method = 'k-graphs' | |||
self.__init_method = 'random' | |||
self.__init_ecc = None | |||
self.__parallel = True | |||
self.__n_jobs = multiprocessing.cpu_count() | |||
self.__ds_name = None | |||
self._mge = None | |||
self._ged_options = {} | |||
self._mge_options = {} | |||
# self._fit_method = 'k-graphs' | |||
self._init_method = 'random' | |||
self._init_ecc = None | |||
self._parallel = True | |||
self._n_jobs = multiprocessing.cpu_count() | |||
self._ds_name = None | |||
# for cml. | |||
self.__time_limit_in_sec = 0 | |||
self.__max_itrs = 100 | |||
self.__max_itrs_without_update = 3 | |||
self.__epsilon_residual = 0.01 | |||
self.__epsilon_ec = 0.1 | |||
self.__allow_zeros = True | |||
# self.__triangle_rule = True | |||
self._time_limit_in_sec = 0 | |||
self._max_itrs = 100 | |||
self._max_itrs_without_update = 3 | |||
self._epsilon_residual = 0.01 | |||
self._epsilon_ec = 0.1 | |||
self._allow_zeros = True | |||
# self._triangle_rule = True | |||
### values to compute. | |||
self.__runtime_optimize_ec = None | |||
self.__runtime_generate_preimage = None | |||
self.__runtime_total = None | |||
self.__set_median = None | |||
self.__gen_median = None | |||
self.__best_from_dataset = None | |||
self.__sod_set_median = None | |||
self.__sod_gen_median = None | |||
self.__k_dis_set_median = None | |||
self.__k_dis_gen_median = None | |||
self.__k_dis_dataset = None | |||
self.__node_label_costs = None | |||
self.__edge_label_costs = None | |||
self._runtime_optimize_ec = None | |||
self._runtime_generate_preimage = None | |||
self._runtime_total = None | |||
self._set_median = None | |||
self._gen_median = None | |||
self._best_from_dataset = None | |||
self._sod_set_median = None | |||
self._sod_gen_median = None | |||
self._k_dis_set_median = None | |||
self._k_dis_gen_median = None | |||
self._k_dis_dataset = None | |||
self._node_label_costs = None | |||
self._edge_label_costs = None | |||
# for cml. | |||
self.__itrs = 0 | |||
self.__converged = False | |||
self.__num_updates_ecs = 0 | |||
self._itrs = 0 | |||
self._converged = False | |||
self._num_updates_ecs = 0 | |||
### values that can be set or to be computed. | |||
self.__edit_cost_constants = [] | |||
self.__gram_matrix_unnorm = None | |||
self.__runtime_precompute_gm = None | |||
self._edit_cost_constants = [] | |||
self._gram_matrix_unnorm = None | |||
self._runtime_precompute_gm = None | |||
def set_options(self, **kwargs): | |||
self._kernel_options = kwargs.get('kernel_options', {}) | |||
self._graph_kernel = kwargs.get('graph_kernel', None) | |||
self._verbose = kwargs.get('verbose', 2) | |||
self.__ged_options = kwargs.get('ged_options', {}) | |||
self.__mge_options = kwargs.get('mge_options', {}) | |||
# self.__fit_method = kwargs.get('fit_method', 'k-graphs') | |||
self.__init_method = kwargs.get('init_method', 'random') | |||
self.__init_ecc = kwargs.get('init_ecc', None) | |||
self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) | |||
self.__parallel = kwargs.get('parallel', True) | |||
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||
self.__ds_name = kwargs.get('ds_name', None) | |||
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||
self.__max_itrs = kwargs.get('max_itrs', 100) | |||
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) | |||
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) | |||
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
self.__allow_zeros = kwargs.get('allow_zeros', True) | |||
# self.__triangle_rule = kwargs.get('triangle_rule', True) | |||
self._ged_options = kwargs.get('ged_options', {}) | |||
self._mge_options = kwargs.get('mge_options', {}) | |||
# self._fit_method = kwargs.get('fit_method', 'k-graphs') | |||
self._init_method = kwargs.get('init_method', 'random') | |||
self._init_ecc = kwargs.get('init_ecc', None) | |||
self._edit_cost_constants = kwargs.get('edit_cost_constants', []) | |||
self._parallel = kwargs.get('parallel', True) | |||
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||
self._ds_name = kwargs.get('ds_name', None) | |||
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||
self._max_itrs = kwargs.get('max_itrs', 100) | |||
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) | |||
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01) | |||
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1) | |||
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
self._allow_zeros = kwargs.get('allow_zeros', True) | |||
# self._triangle_rule = kwargs.get('triangle_rule', True) | |||
def run(self): | |||
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
start = time.time() | |||
# 1. precompute gram matrix. | |||
if self.__gram_matrix_unnorm is None: | |||
if self._gram_matrix_unnorm is None: | |||
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | |||
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||
end_precompute_gm = time.time() | |||
self.__runtime_precompute_gm = end_precompute_gm - start | |||
self._runtime_precompute_gm = end_precompute_gm - start | |||
else: | |||
if self.__runtime_precompute_gm is None: | |||
if self._runtime_precompute_gm is None: | |||
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | |||
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm | |||
if self._kernel_options['normalize']: | |||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) | |||
else: | |||
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) | |||
end_precompute_gm = time.time() | |||
start -= self.__runtime_precompute_gm | |||
start -= self._runtime_precompute_gm | |||
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': | |||
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset': | |||
# start = time.time() | |||
# self.__runtime_precompute_gm = 0 | |||
# self._runtime_precompute_gm = 0 | |||
# end_precompute_gm = start | |||
# 2. optimize edit cost constants. | |||
self.__optimize_edit_cost_vector() | |||
self._optimize_edit_cost_vector() | |||
end_optimize_ec = time.time() | |||
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm | |||
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm | |||
# 3. compute set median and gen median using optimized edit costs. | |||
if self._verbose >= 2: | |||
print('\nstart computing set median and gen median using optimized edit costs...\n') | |||
self.__gmg_bcu() | |||
self._gmg_bcu() | |||
end_generate_preimage = time.time() | |||
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec | |||
self.__runtime_total = end_generate_preimage - start | |||
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec | |||
self._runtime_total = end_generate_preimage - start | |||
if self._verbose >= 2: | |||
print('medians computed.') | |||
print('SOD of the set median: ', self.__sod_set_median) | |||
print('SOD of the generalized median: ', self.__sod_gen_median) | |||
print('SOD of the set median: ', self._sod_set_median) | |||
print('SOD of the generalized median: ', self._sod_gen_median) | |||
# 4. compute kernel distances to the true median. | |||
if self._verbose >= 2: | |||
print('\nstart computing distances to true median....\n') | |||
self.__compute_distances_to_true_median() | |||
self._compute_distances_to_true_median() | |||
# 5. print out results. | |||
if self._verbose: | |||
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
print('================================================================================') | |||
print('Finished generation of preimages.') | |||
print('--------------------------------------------------------------------------------') | |||
print('The optimized edit costs:', self.__edit_cost_constants) | |||
print('SOD of the set median:', self.__sod_set_median) | |||
print('SOD of the generalized median:', self.__sod_gen_median) | |||
print('Distance in kernel space for set median:', self.__k_dis_set_median) | |||
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||
print('Time to optimize edit costs:', self.__runtime_optimize_ec) | |||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||
print('Total time:', self.__runtime_total) | |||
print('Total number of iterations for optimizing:', self.__itrs) | |||
print('Total number of updating edit costs:', self.__num_updates_ecs) | |||
print('Is optimization of edit costs converged:', self.__converged) | |||
print('The optimized edit costs:', self._edit_cost_constants) | |||
print('SOD of the set median:', self._sod_set_median) | |||
print('SOD of the generalized median:', self._sod_gen_median) | |||
print('Distance in kernel space for set median:', self._k_dis_set_median) | |||
print('Distance in kernel space for generalized median:', self._k_dis_gen_median) | |||
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) | |||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||
print('Time to optimize edit costs:', self._runtime_optimize_ec) | |||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||
print('Total time:', self._runtime_total) | |||
print('Total number of iterations for optimizing:', self._itrs) | |||
print('Total number of updating edit costs:', self._num_updates_ecs) | |||
print('Is optimization of edit costs converged:', self._converged) | |||
print('================================================================================') | |||
print() | |||
def get_results(self): | |||
results = {} | |||
results['edit_cost_constants'] = self.__edit_cost_constants | |||
results['runtime_precompute_gm'] = self.__runtime_precompute_gm | |||
results['runtime_optimize_ec'] = self.__runtime_optimize_ec | |||
results['runtime_generate_preimage'] = self.__runtime_generate_preimage | |||
results['runtime_total'] = self.__runtime_total | |||
results['sod_set_median'] = self.__sod_set_median | |||
results['sod_gen_median'] = self.__sod_gen_median | |||
results['k_dis_set_median'] = self.__k_dis_set_median | |||
results['k_dis_gen_median'] = self.__k_dis_gen_median | |||
results['k_dis_dataset'] = self.__k_dis_dataset | |||
results['itrs'] = self.__itrs | |||
results['converged'] = self.__converged | |||
results['num_updates_ecc'] = self.__num_updates_ecs | |||
results['edit_cost_constants'] = self._edit_cost_constants | |||
results['runtime_precompute_gm'] = self._runtime_precompute_gm | |||
results['runtime_optimize_ec'] = self._runtime_optimize_ec | |||
results['runtime_generate_preimage'] = self._runtime_generate_preimage | |||
results['runtime_total'] = self._runtime_total | |||
results['sod_set_median'] = self._sod_set_median | |||
results['sod_gen_median'] = self._sod_gen_median | |||
results['k_dis_set_median'] = self._k_dis_set_median | |||
results['k_dis_gen_median'] = self._k_dis_gen_median | |||
results['k_dis_dataset'] = self._k_dis_dataset | |||
results['itrs'] = self._itrs | |||
results['converged'] = self._converged | |||
results['num_updates_ecc'] = self._num_updates_ecs | |||
results['mge'] = {} | |||
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() | |||
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() | |||
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() | |||
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased() | |||
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased() | |||
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents() | |||
return results | |||
def __optimize_edit_cost_vector(self): | |||
def _optimize_edit_cost_vector(self): | |||
"""Learn edit cost vector. | |||
""" | |||
# Initialize label costs randomly. | |||
if self.__init_method == 'random': | |||
if self._init_method == 'random': | |||
# Initialize label costs. | |||
self.__initialize_label_costs() | |||
self._initialize_label_costs() | |||
# Optimize edit cost matrices. | |||
self.__optimize_ecm_by_kernel_distances() | |||
self._optimize_ecm_by_kernel_distances() | |||
# Initialize all label costs with the same value. | |||
elif self.__init_method == 'uniform': # random | |||
elif self._init_method == 'uniform': # random | |||
pass | |||
elif self.__fit_method == 'random': # random | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
self.__edit_cost_constants = random.sample(range(1, 1000), 3) | |||
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] | |||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||
elif self._fit_method == 'random': # random | |||
if self._ged_options['edit_cost'] == 'LETTER': | |||
self._edit_cost_constants = random.sample(range(1, 1000), 3) | |||
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants] | |||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||
random.seed(time.time()) | |||
self.__edit_cost_constants = random.sample(range(1, 1000), 5) | |||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||
self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||
self._edit_cost_constants = random.sample(range(1, 1000), 5) | |||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||
self._edit_cost_constants = random.sample(range(1, 1000), 6) | |||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||
if self._dataset.node_attrs == []: | |||
self.__edit_cost_constants[2] = 0 | |||
self._edit_cost_constants[2] = 0 | |||
if self._dataset.edge_attrs == []: | |||
self.__edit_cost_constants[5] = 0 | |||
self._edit_cost_constants[5] = 0 | |||
else: | |||
self.__edit_cost_constants = random.sample(range(1, 1000), 6) | |||
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] | |||
self._edit_cost_constants = random.sample(range(1, 1000), 6) | |||
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] | |||
if self._verbose >= 2: | |||
print('edit cost constants used:', self.__edit_cost_constants) | |||
elif self.__fit_method == 'expert': # expert | |||
if self.__init_ecc is None: | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
self.__edit_cost_constants = [0.9, 1.7, 0.75] | |||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
print('edit cost constants used:', self._edit_cost_constants) | |||
elif self._fit_method == 'expert': # expert | |||
if self._init_ecc is None: | |||
if self._ged_options['edit_cost'] == 'LETTER': | |||
self._edit_cost_constants = [0.9, 1.7, 0.75] | |||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
else: | |||
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] | |||
self._edit_cost_constants = [3, 3, 1, 3, 3, 1] | |||
else: | |||
self.__edit_cost_constants = self.__init_ecc | |||
elif self.__fit_method == 'k-graphs': | |||
if self.__init_ecc is None: | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
self.__init_ecc = [0.9, 1.7, 0.75] | |||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||
self.__init_ecc = [0, 0, 1, 1, 1, 0] | |||
self._edit_cost_constants = self._init_ecc | |||
elif self._fit_method == 'k-graphs': | |||
if self._init_ecc is None: | |||
if self._ged_options['edit_cost'] == 'LETTER': | |||
self._init_ecc = [0.9, 1.7, 0.75] | |||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': | |||
self._init_ecc = [0, 0, 1, 1, 1, 0] | |||
if self._dataset.node_attrs == []: | |||
self.__init_ecc[2] = 0 | |||
self._init_ecc[2] = 0 | |||
if self._dataset.edge_attrs == []: | |||
self.__init_ecc[5] = 0 | |||
self._init_ecc[5] = 0 | |||
else: | |||
self.__init_ecc = [3, 3, 1, 3, 3, 1] | |||
self._init_ecc = [3, 3, 1, 3, 3, 1] | |||
# optimize on the k-graph subset. | |||
self.__optimize_ecm_by_kernel_distances() | |||
elif self.__fit_method == 'whole-dataset': | |||
if self.__init_ecc is None: | |||
if self.__ged_options['edit_cost'] == 'LETTER': | |||
self.__init_ecc = [0.9, 1.7, 0.75] | |||
elif self.__ged_options['edit_cost'] == 'LETTER2': | |||
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
self._optimize_ecm_by_kernel_distances() | |||
elif self._fit_method == 'whole-dataset': | |||
if self._init_ecc is None: | |||
if self._ged_options['edit_cost'] == 'LETTER': | |||
self._init_ecc = [0.9, 1.7, 0.75] | |||
elif self._ged_options['edit_cost'] == 'LETTER2': | |||
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] | |||
else: | |||
self.__init_ecc = [3, 3, 1, 3, 3, 1] | |||
self._init_ecc = [3, 3, 1, 3, 3, 1] | |||
# optimizeon the whole set. | |||
self.__optimize_ecc_by_kernel_distances() | |||
elif self.__fit_method == 'precomputed': | |||
self._optimize_ecc_by_kernel_distances() | |||
elif self._fit_method == 'precomputed': | |||
pass | |||
def __initialize_label_costs(self): | |||
self.__initialize_node_label_costs() | |||
self.__initialize_edge_label_costs() | |||
def _initialize_label_costs(self): | |||
self._initialize_node_label_costs() | |||
self._initialize_edge_label_costs() | |||
def __initialize_node_label_costs(self): | |||
def _initialize_node_label_costs(self): | |||
# Get list of node labels. | |||
nls = self._dataset.get_all_node_labels() | |||
# Generate random costs. | |||
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) | |||
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) | |||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | |||
self.__node_label_costs = rand_costs | |||
self._node_label_costs = rand_costs | |||
def __initialize_edge_label_costs(self): | |||
def _initialize_edge_label_costs(self): | |||
# Get list of edge labels. | |||
els = self._dataset.get_all_edge_labels() | |||
# Generate random costs. | |||
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) | |||
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) | |||
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. | |||
self.__edge_label_costs = rand_costs | |||
self._edge_label_costs = rand_costs | |||
def __optimize_ecm_by_kernel_distances(self): | |||
def _optimize_ecm_by_kernel_distances(self): | |||
# compute distances in feature space. | |||
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() | |||
dis_k_vec = [] | |||
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
dis_k_vec = np.array(dis_k_vec) | |||
# Set GEDEnv options. | |||
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||
# self.__edit_cost_constants = self.__init_ecc | |||
options = self.__ged_options.copy() | |||
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed. | |||
# graphs = [self._clean_graph(g) for g in self._dataset.graphs] | |||
# self._edit_cost_constants = self._init_ecc | |||
options = self._ged_options.copy() | |||
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed. | |||
options['node_labels'] = self._dataset.node_labels | |||
options['edge_labels'] = self._dataset.edge_labels | |||
# options['node_attrs'] = self._dataset.node_attrs | |||
# options['edge_attrs'] = self._dataset.edge_attrs | |||
options['node_label_costs'] = self.__node_label_costs | |||
options['edge_label_costs'] = self.__edge_label_costs | |||
options['node_label_costs'] = self._node_label_costs | |||
options['edge_label_costs'] = self._edge_label_costs | |||
# Learner cost matrices. | |||
# Initialize cost learner. | |||
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo | |||
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec) | |||
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo | |||
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec) | |||
# Run cost learner. | |||
cml.update(dis_k_vec, self._dataset.graphs, options) | |||
# Get results. | |||
results = cml.get_results() | |||
self.__converged = results['converged'] | |||
self.__itrs = results['itrs'] | |||
self.__num_updates_ecs = results['num_updates_ecs'] | |||
self._converged = results['converged'] | |||
self._itrs = results['itrs'] | |||
self._num_updates_ecs = results['num_updates_ecs'] | |||
cost_list = results['cost_list'] | |||
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)] | |||
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):] | |||
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)] | |||
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):] | |||
def __gmg_bcu(self): | |||
def _gmg_bcu(self): | |||
""" | |||
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). | |||
@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
# Set up the ged environment. | |||
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. | |||
# gedlibpy.restart_env() | |||
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants) | |||
graphs = [self.__clean_graph(g) for g in self._dataset.graphs] | |||
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants) | |||
graphs = [self._clean_graph(g) for g in self._dataset.graphs] | |||
for g in graphs: | |||
ged_env.add_nx_graph(g, '') | |||
graph_ids = ged_env.get_all_graph_ids() | |||
node_labels = ged_env.get_all_node_labels() | |||
edge_labels = ged_env.get_all_edge_labels() | |||
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels)) | |||
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels)) | |||
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels)) | |||
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels)) | |||
ged_env.set_label_costs(node_label_costs, edge_label_costs) | |||
set_median_id = ged_env.add_graph('set_median') | |||
gen_median_id = ged_env.add_graph('gen_median') | |||
ged_env.init(init_type=self.__ged_options['init_option']) | |||
ged_env.init(init_type=self._ged_options['init_option']) | |||
# Set up the madian graph estimator. | |||
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) | |||
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) | |||
options = self.__mge_options.copy() | |||
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost'])) | |||
self._mge.set_refine_method(self._ged_options['method'], self._ged_options) | |||
options = self._mge_options.copy() | |||
if not 'seed' in options: | |||
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. | |||
options['parallel'] = self.__parallel | |||
options['parallel'] = self._parallel | |||
# Select the GED algorithm. | |||
self.__mge.set_options(mge_options_to_string(options)) | |||
self.__mge.set_label_names(node_labels=self._dataset.node_labels, | |||
self._mge.set_options(mge_options_to_string(options)) | |||
self._mge.set_label_names(node_labels=self._dataset.node_labels, | |||
edge_labels=self._dataset.edge_labels, | |||
node_attrs=self._dataset.node_attrs, | |||
edge_attrs=self._dataset.edge_attrs) | |||
ged_options = self.__ged_options.copy() | |||
if self.__parallel: | |||
ged_options = self._ged_options.copy() | |||
if self._parallel: | |||
ged_options['threads'] = 1 | |||
self.__mge.set_init_method(ged_options['method'], ged_options) | |||
self.__mge.set_descent_method(ged_options['method'], ged_options) | |||
self._mge.set_init_method(ged_options['method'], ged_options) | |||
self._mge.set_descent_method(ged_options['method'], ged_options) | |||
# Run the estimator. | |||
self.__mge.run(graph_ids, set_median_id, gen_median_id) | |||
self._mge.run(graph_ids, set_median_id, gen_median_id) | |||
# Get SODs. | |||
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') | |||
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') | |||
self._sod_set_median = self._mge.get_sum_of_distances('initialized') | |||
self._sod_gen_median = self._mge.get_sum_of_distances('converged') | |||
# Get median graphs. | |||
self.__set_median = ged_env.get_nx_graph(set_median_id) | |||
self.__gen_median = ged_env.get_nx_graph(gen_median_id) | |||
self._set_median = ged_env.get_nx_graph(set_median_id) | |||
self._gen_median = ged_env.get_nx_graph(gen_median_id) | |||
def __compute_distances_to_true_median(self): | |||
def _compute_distances_to_true_median(self): | |||
# compute distance in kernel space for set median. | |||
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) | |||
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) | |||
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options) | |||
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options) | |||
if self._kernel_options['normalize']: | |||
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize | |||
kernel_sm = 1 | |||
# @todo: not correct kernel value | |||
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) | |||
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | |||
gram_with_sm, withterm3=False) | |||
# compute distance in kernel space for generalized median. | |||
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) | |||
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) | |||
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options) | |||
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options) | |||
if self._kernel_options['normalize']: | |||
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize | |||
kernel_gm = 1 | |||
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) | |||
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), | |||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | |||
gram_with_gm, withterm3=False) | |||
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), | |||
gram_with_gm, withterm3=False)) | |||
idx_k_dis_median_set_min = np.argmin(k_dis_median_set) | |||
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] | |||
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() | |||
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] | |||
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() | |||
if self._verbose >= 2: | |||
print() | |||
print('distance in kernel space for set median:', self.__k_dis_set_median) | |||
print('distance in kernel space for generalized median:', self.__k_dis_gen_median) | |||
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) | |||
print('distance in kernel space for set median:', self._k_dis_set_median) | |||
print('distance in kernel space for generalized median:', self._k_dis_gen_median) | |||
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) | |||
print('distance in kernel space for each graph in median set:', k_dis_median_set) | |||
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated. | |||
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated. | |||
""" | |||
Cleans node and edge labels and attributes of the given graph. | |||
""" | |||
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator): | |||
@property | |||
def mge(self): | |||
return self.__mge | |||
return self._mge | |||
@property | |||
def ged_options(self): | |||
return self.__ged_options | |||
return self._ged_options | |||
@ged_options.setter | |||
def ged_options(self, value): | |||
self.__ged_options = value | |||
self._ged_options = value | |||
@property | |||
def mge_options(self): | |||
return self.__mge_options | |||
return self._mge_options | |||
@mge_options.setter | |||
def mge_options(self, value): | |||
self.__mge_options = value | |||
self._mge_options = value | |||
@property | |||
def fit_method(self): | |||
return self.__fit_method | |||
return self._fit_method | |||
@fit_method.setter | |||
def fit_method(self, value): | |||
self.__fit_method = value | |||
self._fit_method = value | |||
@property | |||
def init_ecc(self): | |||
return self.__init_ecc | |||
return self._init_ecc | |||
@init_ecc.setter | |||
def init_ecc(self, value): | |||
self.__init_ecc = value | |||
self._init_ecc = value | |||
@property | |||
def set_median(self): | |||
return self.__set_median | |||
return self._set_median | |||
@property | |||
def gen_median(self): | |||
return self.__gen_median | |||
return self._gen_median | |||
@property | |||
def best_from_dataset(self): | |||
return self.__best_from_dataset | |||
return self._best_from_dataset | |||
@property | |||
def gram_matrix_unnorm(self): | |||
return self.__gram_matrix_unnorm | |||
return self._gram_matrix_unnorm | |||
@gram_matrix_unnorm.setter | |||
def gram_matrix_unnorm(self, value): | |||
self.__gram_matrix_unnorm = value | |||
self._gram_matrix_unnorm = value |
@@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
def __init__(self, dataset=None): | |||
PreimageGenerator.__init__(self, dataset=dataset) | |||
# arguments to set. | |||
self.__k = 5 # number of nearest neighbors of phi in D_N. | |||
self.__r_max = 10 # maximum number of iterations. | |||
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. | |||
self.__alphas = None # weights of linear combinations of points in kernel space. | |||
self.__parallel = True | |||
self.__n_jobs = multiprocessing.cpu_count() | |||
self.__time_limit_in_sec = 0 | |||
self.__max_itrs = 20 | |||
self._k = 5 # number of nearest neighbors of phi in D_N. | |||
self._r_max = 10 # maximum number of iterations. | |||
self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. | |||
self._alphas = None # weights of linear combinations of points in kernel space. | |||
self._parallel = True | |||
self._n_jobs = multiprocessing.cpu_count() | |||
self._time_limit_in_sec = 0 | |||
self._max_itrs = 20 | |||
# values to compute. | |||
self.__runtime_generate_preimage = None | |||
self.__runtime_total = None | |||
self.__preimage = None | |||
self.__best_from_dataset = None | |||
self.__k_dis_preimage = None | |||
self.__k_dis_dataset = None | |||
self.__itrs = 0 | |||
self.__converged = False # @todo | |||
self.__num_updates = 0 | |||
self._runtime_generate_preimage = None | |||
self._runtime_total = None | |||
self._preimage = None | |||
self._best_from_dataset = None | |||
self._k_dis_preimage = None | |||
self._k_dis_dataset = None | |||
self._itrs = 0 | |||
self._converged = False # @todo | |||
self._num_updates = 0 | |||
# values that can be set or to be computed. | |||
self.__gram_matrix_unnorm = None | |||
self.__runtime_precompute_gm = None | |||
self._gram_matrix_unnorm = None | |||
self._runtime_precompute_gm = None | |||
def set_options(self, **kwargs): | |||
self._kernel_options = kwargs.get('kernel_options', {}) | |||
self._graph_kernel = kwargs.get('graph_kernel', None) | |||
self._verbose = kwargs.get('verbose', 2) | |||
self.__k = kwargs.get('k', 5) | |||
self.__r_max = kwargs.get('r_max', 10) | |||
self.__l = kwargs.get('l', 500) | |||
self.__alphas = kwargs.get('alphas', None) | |||
self.__parallel = kwargs.get('parallel', True) | |||
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||
self.__max_itrs = kwargs.get('max_itrs', 20) | |||
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
self._k = kwargs.get('k', 5) | |||
self._r_max = kwargs.get('r_max', 10) | |||
self._l = kwargs.get('l', 500) | |||
self._alphas = kwargs.get('alphas', None) | |||
self._parallel = kwargs.get('parallel', True) | |||
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) | |||
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) | |||
self._max_itrs = kwargs.get('max_itrs', 20) | |||
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) | |||
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) | |||
def run(self): | |||
@@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
start = time.time() | |||
# 1. precompute gram matrix. | |||
if self.__gram_matrix_unnorm is None: | |||
if self._gram_matrix_unnorm is None: | |||
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) | |||
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm | |||
end_precompute_gm = time.time() | |||
self.__runtime_precompute_gm = end_precompute_gm - start | |||
self._runtime_precompute_gm = end_precompute_gm - start | |||
else: | |||
if self.__runtime_precompute_gm is None: | |||
if self._runtime_precompute_gm is None: | |||
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') | |||
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm | |||
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm | |||
if self._kernel_options['normalize']: | |||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) | |||
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) | |||
else: | |||
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) | |||
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) | |||
end_precompute_gm = time.time() | |||
start -= self.__runtime_precompute_gm | |||
start -= self._runtime_precompute_gm | |||
# 2. compute k nearest neighbors of phi in D_N. | |||
if self._verbose >= 2: | |||
print('\nstart computing k nearest neighbors of phi in D_N...\n') | |||
D_N = self._dataset.graphs | |||
if self.__alphas is None: | |||
self.__alphas = [1 / len(D_N)] * len(D_N) | |||
if self._alphas is None: | |||
self._alphas = [1 / len(D_N)] * len(D_N) | |||
k_dis_list = [] # distance between g_star and each graph. | |||
term3 = 0 | |||
for i1, a1 in enumerate(self.__alphas): | |||
for i2, a2 in enumerate(self.__alphas): | |||
for i1, a1 in enumerate(self._alphas): | |||
for i2, a2 in enumerate(self._alphas): | |||
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] | |||
for idx in range(len(D_N)): | |||
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) | |||
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) | |||
# sort. | |||
sort_idx = np.argsort(k_dis_list) | |||
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. | |||
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances. | |||
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) | |||
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N | |||
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. | |||
self.__k_dis_dataset = dis_gs[0] | |||
self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. | |||
self._k_dis_dataset = dis_gs[0] | |||
if self.__k_dis_dataset == 0: # get the exact pre-image. | |||
if self._k_dis_dataset == 0: # get the exact pre-image. | |||
end_generate_preimage = time.time() | |||
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||
self.__runtime_total = end_generate_preimage - start | |||
self.__preimage = self.__best_from_dataset.copy() | |||
self.__k_dis_preimage = self.__k_dis_dataset | |||
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||
self._runtime_total = end_generate_preimage - start | |||
self._preimage = self._best_from_dataset.copy() | |||
self._k_dis_preimage = self._k_dis_dataset | |||
if self._verbose: | |||
print() | |||
print('=============================================================================') | |||
print('The exact pre-image is found from the input dataset.') | |||
print('-----------------------------------------------------------------------------') | |||
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) | |||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||
print('Total time:', self.__runtime_total) | |||
print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset) | |||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||
print('Total time:', self._runtime_total) | |||
print('=============================================================================') | |||
print() | |||
return | |||
dhat = dis_gs[0] # the nearest distance | |||
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors | |||
Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors | |||
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] | |||
# 3. start iterations. | |||
@@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
dihat_list = [] | |||
r = 0 | |||
dis_of_each_itr = [dhat] | |||
if self.__parallel: | |||
if self._parallel: | |||
self._kernel_options['parallel'] = None | |||
self.__itrs = 0 | |||
self.__num_updates = 0 | |||
timer = Timer(self.__time_limit_in_sec) | |||
while not self.__termination_criterion_met(timer, self.__itrs, r): | |||
self._itrs = 0 | |||
self._num_updates = 0 | |||
timer = Timer(self._time_limit_in_sec) | |||
while not self._termination_criterion_met(timer, self._itrs, r): | |||
print('\n- r =', r) | |||
found = False | |||
dis_bests = dis_gs + dihat_list | |||
@@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
nb_modif = 1 | |||
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): | |||
nb_modif *= nb / (fdgs_max - idx) | |||
while fdgs_max < nb_vpairs_min and nb_modif < self.__l: | |||
while fdgs_max < nb_vpairs_min and nb_modif < self._l: | |||
fdgs_max += 1 | |||
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max | |||
nb_increase = int(fdgs_max - fdgs_max_old) | |||
@@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
for ig, gs in enumerate(Gs_nearest + gihat_list): | |||
if self._verbose >= 2: | |||
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) | |||
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||
gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) | |||
if found: | |||
r = 0 | |||
@@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
r += 1 | |||
dis_of_each_itr.append(dhat) | |||
self.__itrs += 1 | |||
self._itrs += 1 | |||
if self._verbose >= 2: | |||
print('Total number of iterations is', self.__itrs, '.') | |||
print('The preimage is updated', self.__num_updates, 'times.') | |||
print('Total number of iterations is', self._itrs, '.') | |||
print('The preimage is updated', self._num_updates, 'times.') | |||
print('The shortest distances for previous iterations are', dis_of_each_itr, '.') | |||
# get results and print. | |||
end_generate_preimage = time.time() | |||
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||
self.__runtime_total = end_generate_preimage - start | |||
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
self.__k_dis_preimage = dhat | |||
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm | |||
self._runtime_total = end_generate_preimage - start | |||
self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) | |||
self._k_dis_preimage = dhat | |||
if self._verbose: | |||
print() | |||
print('=============================================================================') | |||
print('Finished generation of preimages.') | |||
print('-----------------------------------------------------------------------------') | |||
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) | |||
print('Distance in kernel space for the preimage:', self.__k_dis_preimage) | |||
print('Total number of iterations for optimizing:', self.__itrs) | |||
print('Total number of updating preimage:', self.__num_updates) | |||
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) | |||
print('Time to generate pre-images:', self.__runtime_generate_preimage) | |||
print('Total time:', self.__runtime_total) | |||
print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset) | |||
print('Distance in kernel space for the preimage:', self._k_dis_preimage) | |||
print('Total number of iterations for optimizing:', self._itrs) | |||
print('Total number of updating preimage:', self._num_updates) | |||
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) | |||
print('Time to generate pre-images:', self._runtime_generate_preimage) | |||
print('Total time:', self._runtime_total) | |||
print('=============================================================================') | |||
print() | |||
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||
if self.__parallel: | |||
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||
def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): | |||
if self._parallel: | |||
gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) | |||
else: | |||
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||
gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) | |||
return gnew, dhat, found | |||
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||
def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): | |||
gnew = None | |||
updated = False | |||
for trial in range(0, self.__l): | |||
for trial in range(0, self._l): | |||
if self._verbose >= 2: | |||
print('---', trial + 1, 'trial out of', self.__l) | |||
print('---', trial + 1, 'trial out of', self._l) | |||
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) | |||
# get the better graph preimage. | |||
if dnew <= dhat: # @todo: the new distance is smaller or also equal? | |||
@@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
found = True # found better or equally good graph. | |||
if updated: | |||
self.__num_updates += 1 | |||
self._num_updates += 1 | |||
return gnew, dhat, found | |||
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||
def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): | |||
gnew = None | |||
len_itr = self.__l | |||
len_itr = self._l | |||
gnew_list = [None] * len_itr | |||
dnew_list = [None] * len_itr | |||
itr = range(0, len_itr) | |||
@@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
print('I am smaller!') | |||
print('index (as in D_k U {gihat}) =', str(ig)) | |||
print('distance:', dhat, '->', dnew, '\n') | |||
self.__num_updates += 1 | |||
self._num_updates += 1 | |||
else: | |||
if self._verbose >= 2: | |||
print('I am equal!') | |||
@@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
def _generate_graph_parallel(self, g_init, fdgs, term3, itr): | |||
trial = itr | |||
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) | |||
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) | |||
return trial, gtemp, dnew | |||
def __do_trial(self, g_init, fdgs, term3, trial): | |||
def _do_trial(self, g_init, fdgs, term3, trial): | |||
# add and delete edges. | |||
gtemp = g_init.copy() | |||
seed = (trial + int(time.time())) % (2 ** 32 - 1) | |||
@@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator): | |||
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) | |||
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) | |||
if self._kernel_options['normalize']: | |||
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize | |||
kernel_gtmp = 1 | |||
# @todo: not correct kernel value | |||
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) | |||
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) | |||
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True) | |||
return gtemp, dnew | |||
def get_results(self): | |||
results = {} | |||
results['runtime_precompute_gm'] = self.__runtime_precompute_gm | |||
results['runtime_generate_preimage'] = self.__runtime_generate_preimage | |||
results['runtime_total'] = self.__runtime_total | |||
results['k_dis_dataset'] = self.__k_dis_dataset | |||
results['k_dis_preimage'] = self.__k_dis_preimage | |||
results['itrs'] = self.__itrs | |||
results['num_updates'] = self.__num_updates | |||
results['runtime_precompute_gm'] = self._runtime_precompute_gm | |||
results['runtime_generate_preimage'] = self._runtime_generate_preimage | |||
results['runtime_total'] = self._runtime_total | |||
results['k_dis_dataset'] = self._k_dis_dataset | |||
results['k_dis_preimage'] = self._k_dis_preimage | |||
results['itrs'] = self._itrs | |||
results['num_updates'] = self._num_updates | |||
return results | |||
def __termination_criterion_met(self, timer, itr, r): | |||
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): | |||
# if self.__state == AlgorithmState.TERMINATED: | |||
# self.__state = AlgorithmState.INITIALIZED | |||
def _termination_criterion_met(self, timer, itr, r): | |||
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): | |||
# if self._state == AlgorithmState.TERMINATED: | |||
# self._state = AlgorithmState.INITIALIZED | |||
return True | |||
return (r >= self.__r_max if self.__r_max >= 0 else False) | |||
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) | |||
return (r >= self._r_max if self._r_max >= 0 else False) | |||
# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) | |||
@property | |||
def preimage(self): | |||
return self.__preimage | |||
return self._preimage | |||
@property | |||
def best_from_dataset(self): | |||
return self.__best_from_dataset | |||
return self._best_from_dataset | |||
@property | |||
def gram_matrix_unnorm(self): | |||
return self.__gram_matrix_unnorm | |||
return self._gram_matrix_unnorm | |||
@gram_matrix_unnorm.setter | |||
def gram_matrix_unnorm(self, value): | |||
self.__gram_matrix_unnorm = value | |||
self._gram_matrix_unnorm = value |
@@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||
if save_results: | |||
# create result files. | |||
print('creating output files...') | |||
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
else: | |||
fn_output_detail, fn_output_summary = None, None | |||
# 2. compute/load Gram matrix a priori. | |||
print('2. computing/loading Gram matrix...') | |||
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) | |||
gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) | |||
sod_sm_list = [] | |||
sod_gm_list = [] | |||
@@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||
# 3. get the best graph and remove it from median set. | |||
print('3. getting and removing the best graph...') | |||
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] | |||
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) | |||
best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) | |||
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] | |||
num_graphs -= 1 | |||
if num_graphs == 1: | |||
@@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt | |||
print('\ncomplete.\n') | |||
def __get_best_graph(Gn, gram_matrix): | |||
def _get_best_graph(Gn, gram_matrix): | |||
k_dis_list = [] | |||
for idx in range(len(Gn)): | |||
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) | |||
@@ -313,7 +313,7 @@ def get_relations(sign): | |||
return 'worse' | |||
def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||
if load_gm == 'auto': | |||
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' | |||
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) | |||
@@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||
gram_matrix_unnorm_list = [] | |||
time_precompute_gm_list = [] | |||
for dataset in datasets: | |||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) | |||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||
time_precompute_gm_list.append(time_precompute_gm) | |||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||
@@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||
gram_matrix_unnorm_list = [] | |||
time_precompute_gm_list = [] | |||
for dataset in datasets: | |||
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) | |||
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) | |||
gram_matrix_unnorm_list.append(gram_matrix_unnorm) | |||
time_precompute_gm_list.append(time_precompute_gm) | |||
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) | |||
@@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): | |||
return gram_matrix_unnorm_list, time_precompute_gm_list | |||
def __get_graph_kernel(dataset, kernel_options): | |||
def _get_graph_kernel(dataset, kernel_options): | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
node_labels=dataset.node_labels, | |||
@@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options): | |||
return graph_kernel | |||
def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||
def _compute_gram_matrix_unnorm(dataset, kernel_options): | |||
from gklearn.utils.utils import get_graph_kernel_by_name | |||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
node_labels=dataset.node_labels, | |||
@@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): | |||
return gram_matrix_unnorm, run_time | |||
def __init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
def _init_output_file(ds_name, gkernel, fit_method, dir_output): | |||
if not os.path.exists(dir_output): | |||
os.makedirs(dir_output) | |||
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' | |||
@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
if save_results: | |||
# create result files. | |||
print('creating output files...') | |||
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) | |||
sod_sm_list = [] | |||
sod_gm_list = [] | |||
@@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged | |||
print('\ncomplete.\n') | |||
def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): | |||
def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): | |||
if not os.path.exists(dir_output): | |||
os.makedirs(dir_output) | |||
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' | |||
@@ -16,54 +16,54 @@ class Dataset(object): | |||
def __init__(self, filename=None, filename_targets=None, **kwargs): | |||
if filename is None: | |||
self.__graphs = None | |||
self.__targets = None | |||
self.__node_labels = None | |||
self.__edge_labels = None | |||
self.__node_attrs = None | |||
self.__edge_attrs = None | |||
self._graphs = None | |||
self._targets = None | |||
self._node_labels = None | |||
self._edge_labels = None | |||
self._node_attrs = None | |||
self._edge_attrs = None | |||
else: | |||
self.load_dataset(filename, filename_targets=filename_targets, **kwargs) | |||
self.__substructures = None | |||
self.__node_label_dim = None | |||
self.__edge_label_dim = None | |||
self.__directed = None | |||
self.__dataset_size = None | |||
self.__total_node_num = None | |||
self.__ave_node_num = None | |||
self.__min_node_num = None | |||
self.__max_node_num = None | |||
self.__total_edge_num = None | |||
self.__ave_edge_num = None | |||
self.__min_edge_num = None | |||
self.__max_edge_num = None | |||
self.__ave_node_degree = None | |||
self.__min_node_degree = None | |||
self.__max_node_degree = None | |||
self.__ave_fill_factor = None | |||
self.__min_fill_factor = None | |||
self.__max_fill_factor = None | |||
self.__node_label_nums = None | |||
self.__edge_label_nums = None | |||
self.__node_attr_dim = None | |||
self.__edge_attr_dim = None | |||
self.__class_number = None | |||
self._substructures = None | |||
self._node_label_dim = None | |||
self._edge_label_dim = None | |||
self._directed = None | |||
self._dataset_size = None | |||
self._total_node_num = None | |||
self._ave_node_num = None | |||
self._min_node_num = None | |||
self._max_node_num = None | |||
self._total_edge_num = None | |||
self._ave_edge_num = None | |||
self._min_edge_num = None | |||
self._max_edge_num = None | |||
self._ave_node_degree = None | |||
self._min_node_degree = None | |||
self._max_node_degree = None | |||
self._ave_fill_factor = None | |||
self._min_fill_factor = None | |||
self._max_fill_factor = None | |||
self._node_label_nums = None | |||
self._edge_label_nums = None | |||
self._node_attr_dim = None | |||
self._edge_attr_dim = None | |||
self._class_number = None | |||
def load_dataset(self, filename, filename_targets=None, **kwargs): | |||
self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) | |||
self.__node_labels = label_names['node_labels'] | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) | |||
self._node_labels = label_names['node_labels'] | |||
self._node_attrs = label_names['node_attrs'] | |||
self._edge_labels = label_names['edge_labels'] | |||
self._edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def load_graphs(self, graphs, targets=None): | |||
# this has to be followed by set_labels(). | |||
self.__graphs = graphs | |||
self.__targets = targets | |||
self._graphs = graphs | |||
self._targets = targets | |||
# self.set_labels_attrs() # @todo | |||
@@ -71,108 +71,108 @@ class Dataset(object): | |||
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' | |||
if ds_name == 'Acyclic': | |||
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'AIDS': | |||
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Alkane': | |||
ds_file = current_path + '../../datasets/Alkane/dataset.ds' | |||
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) | |||
elif ds_name == 'COIL-DEL': | |||
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'COIL-RAG': | |||
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'COLORS-3': | |||
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Cuneiform': | |||
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'DD': | |||
ds_file = current_path + '../../datasets/DD/DD_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'ENZYMES': | |||
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Fingerprint': | |||
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'FRANKENSTEIN': | |||
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-high': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-low': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Letter-med': # node non-symb | |||
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'MAO': | |||
ds_file = current_path + '../../datasets/MAO/dataset.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Monoterpenoides': | |||
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'MUTAG': | |||
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'NCI1': | |||
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'NCI109': | |||
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'PAH': | |||
ds_file = current_path + '../../datasets/PAH/dataset.ds' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'SYNTHETIC': | |||
pass | |||
elif ds_name == 'SYNTHETICnew': | |||
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' | |||
self.__graphs, self.__targets, label_names = load_dataset(ds_file) | |||
self._graphs, self._targets, label_names = load_dataset(ds_file) | |||
elif ds_name == 'Synthie': | |||
pass | |||
else: | |||
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') | |||
self.__node_labels = label_names['node_labels'] | |||
self.__node_attrs = label_names['node_attrs'] | |||
self.__edge_labels = label_names['edge_labels'] | |||
self.__edge_attrs = label_names['edge_attrs'] | |||
self._node_labels = label_names['node_labels'] | |||
self._node_attrs = label_names['node_attrs'] | |||
self._edge_labels = label_names['edge_labels'] | |||
self._edge_attrs = label_names['edge_attrs'] | |||
self.clean_labels() | |||
def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): | |||
self.__node_labels = node_labels | |||
self.__node_attrs = node_attrs | |||
self.__edge_labels = edge_labels | |||
self.__edge_attrs = edge_attrs | |||
self._node_labels = node_labels | |||
self._node_attrs = node_attrs | |||
self._edge_labels = edge_labels | |||
self._edge_attrs = edge_attrs | |||
def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): | |||
# @todo: remove labels which have only one possible values. | |||
if node_labels is None: | |||
self.__node_labels = self.__graphs[0].graph['node_labels'] | |||
self._node_labels = self._graphs[0].graph['node_labels'] | |||
# # graphs are considered node unlabeled if all nodes have the same label. | |||
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) | |||
if node_attrs is None: | |||
self.__node_attrs = self.__graphs[0].graph['node_attrs'] | |||
self._node_attrs = self._graphs[0].graph['node_attrs'] | |||
# for G in Gn: | |||
# for n in G.nodes(data=True): | |||
# if 'attributes' in n[1]: | |||
# return len(n[1]['attributes']) | |||
# return 0 | |||
if edge_labels is None: | |||
self.__edge_labels = self.__graphs[0].graph['edge_labels'] | |||
self._edge_labels = self._graphs[0].graph['edge_labels'] | |||
# # graphs are considered edge unlabeled if all edges have the same label. | |||
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) | |||
if edge_attrs is None: | |||
self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] | |||
self._edge_attrs = self._graphs[0].graph['edge_attrs'] | |||
# for G in Gn: | |||
# if nx.number_of_edges(G) > 0: | |||
# for e in G.edges(data=True): | |||
@@ -291,145 +291,145 @@ class Dataset(object): | |||
# dataset size | |||
if 'dataset_size' in keys: | |||
if self.__dataset_size is None: | |||
self.__dataset_size = self.__get_dataset_size() | |||
infos['dataset_size'] = self.__dataset_size | |||
if self._dataset_size is None: | |||
self._dataset_size = self._get_dataset_size() | |||
infos['dataset_size'] = self._dataset_size | |||
# graph node number | |||
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): | |||
all_node_nums = self.__get_all_node_nums() | |||
all_node_nums = self._get_all_node_nums() | |||
if 'total_node_num' in keys: | |||
if self.__total_node_num is None: | |||
self.__total_node_num = self.__get_total_node_num(all_node_nums) | |||
infos['total_node_num'] = self.__total_node_num | |||
if self._total_node_num is None: | |||
self._total_node_num = self._get_total_node_num(all_node_nums) | |||
infos['total_node_num'] = self._total_node_num | |||
if 'ave_node_num' in keys: | |||
if self.__ave_node_num is None: | |||
self.__ave_node_num = self.__get_ave_node_num(all_node_nums) | |||
infos['ave_node_num'] = self.__ave_node_num | |||
if self._ave_node_num is None: | |||
self._ave_node_num = self._get_ave_node_num(all_node_nums) | |||
infos['ave_node_num'] = self._ave_node_num | |||
if 'min_node_num' in keys: | |||
if self.__min_node_num is None: | |||
self.__min_node_num = self.__get_min_node_num(all_node_nums) | |||
infos['min_node_num'] = self.__min_node_num | |||
if self._min_node_num is None: | |||
self._min_node_num = self._get_min_node_num(all_node_nums) | |||
infos['min_node_num'] = self._min_node_num | |||
if 'max_node_num' in keys: | |||
if self.__max_node_num is None: | |||
self.__max_node_num = self.__get_max_node_num(all_node_nums) | |||
infos['max_node_num'] = self.__max_node_num | |||
if self._max_node_num is None: | |||
self._max_node_num = self._get_max_node_num(all_node_nums) | |||
infos['max_node_num'] = self._max_node_num | |||
# graph edge number | |||
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): | |||
all_edge_nums = self.__get_all_edge_nums() | |||
all_edge_nums = self._get_all_edge_nums() | |||
if 'total_edge_num' in keys: | |||
if self.__total_edge_num is None: | |||
self.__total_edge_num = self.__get_total_edge_num(all_edge_nums) | |||
infos['total_edge_num'] = self.__total_edge_num | |||
if self._total_edge_num is None: | |||
self._total_edge_num = self._get_total_edge_num(all_edge_nums) | |||
infos['total_edge_num'] = self._total_edge_num | |||
if 'ave_edge_num' in keys: | |||
if self.__ave_edge_num is None: | |||
self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums) | |||
infos['ave_edge_num'] = self.__ave_edge_num | |||
if self._ave_edge_num is None: | |||
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums) | |||
infos['ave_edge_num'] = self._ave_edge_num | |||
if 'max_edge_num' in keys: | |||
if self.__max_edge_num is None: | |||
self.__max_edge_num = self.__get_max_edge_num(all_edge_nums) | |||
infos['max_edge_num'] = self.__max_edge_num | |||
if self._max_edge_num is None: | |||
self._max_edge_num = self._get_max_edge_num(all_edge_nums) | |||
infos['max_edge_num'] = self._max_edge_num | |||
if 'min_edge_num' in keys: | |||
if self.__min_edge_num is None: | |||
self.__min_edge_num = self.__get_min_edge_num(all_edge_nums) | |||
infos['min_edge_num'] = self.__min_edge_num | |||
if self._min_edge_num is None: | |||
self._min_edge_num = self._get_min_edge_num(all_edge_nums) | |||
infos['min_edge_num'] = self._min_edge_num | |||
# label number | |||
if 'node_label_dim' in keys: | |||
if self.__node_label_dim is None: | |||
self.__node_label_dim = self.__get_node_label_dim() | |||
infos['node_label_dim'] = self.__node_label_dim | |||
if self._node_label_dim is None: | |||
self._node_label_dim = self._get_node_label_dim() | |||
infos['node_label_dim'] = self._node_label_dim | |||
if 'node_label_nums' in keys: | |||
if self.__node_label_nums is None: | |||
self.__node_label_nums = {} | |||
for node_label in self.__node_labels: | |||
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) | |||
infos['node_label_nums'] = self.__node_label_nums | |||
if self._node_label_nums is None: | |||
self._node_label_nums = {} | |||
for node_label in self._node_labels: | |||
self._node_label_nums[node_label] = self._get_node_label_num(node_label) | |||
infos['node_label_nums'] = self._node_label_nums | |||
if 'edge_label_dim' in keys: | |||
if self.__edge_label_dim is None: | |||
self.__edge_label_dim = self.__get_edge_label_dim() | |||
infos['edge_label_dim'] = self.__edge_label_dim | |||
if self._edge_label_dim is None: | |||
self._edge_label_dim = self._get_edge_label_dim() | |||
infos['edge_label_dim'] = self._edge_label_dim | |||
if 'edge_label_nums' in keys: | |||
if self.__edge_label_nums is None: | |||
self.__edge_label_nums = {} | |||
for edge_label in self.__edge_labels: | |||
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) | |||
infos['edge_label_nums'] = self.__edge_label_nums | |||
if self._edge_label_nums is None: | |||
self._edge_label_nums = {} | |||
for edge_label in self._edge_labels: | |||
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label) | |||
infos['edge_label_nums'] = self._edge_label_nums | |||
if 'directed' in keys or 'substructures' in keys: | |||
if self.__directed is None: | |||
self.__directed = self.__is_directed() | |||
infos['directed'] = self.__directed | |||
if self._directed is None: | |||
self._directed = self._is_directed() | |||
infos['directed'] = self._directed | |||
# node degree | |||
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): | |||
all_node_degrees = self.__get_all_node_degrees() | |||
all_node_degrees = self._get_all_node_degrees() | |||
if 'ave_node_degree' in keys: | |||
if self.__ave_node_degree is None: | |||
self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees) | |||
infos['ave_node_degree'] = self.__ave_node_degree | |||
if self._ave_node_degree is None: | |||
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees) | |||
infos['ave_node_degree'] = self._ave_node_degree | |||
if 'max_node_degree' in keys: | |||
if self.__max_node_degree is None: | |||
self.__max_node_degree = self.__get_max_node_degree(all_node_degrees) | |||
infos['max_node_degree'] = self.__max_node_degree | |||
if self._max_node_degree is None: | |||
self._max_node_degree = self._get_max_node_degree(all_node_degrees) | |||
infos['max_node_degree'] = self._max_node_degree | |||
if 'min_node_degree' in keys: | |||
if self.__min_node_degree is None: | |||
self.__min_node_degree = self.__get_min_node_degree(all_node_degrees) | |||
infos['min_node_degree'] = self.__min_node_degree | |||
if self._min_node_degree is None: | |||
self._min_node_degree = self._get_min_node_degree(all_node_degrees) | |||
infos['min_node_degree'] = self._min_node_degree | |||
# fill factor | |||
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): | |||
all_fill_factors = self.__get_all_fill_factors() | |||
all_fill_factors = self._get_all_fill_factors() | |||
if 'ave_fill_factor' in keys: | |||
if self.__ave_fill_factor is None: | |||
self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors) | |||
infos['ave_fill_factor'] = self.__ave_fill_factor | |||
if self._ave_fill_factor is None: | |||
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors) | |||
infos['ave_fill_factor'] = self._ave_fill_factor | |||
if 'max_fill_factor' in keys: | |||
if self.__max_fill_factor is None: | |||
self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors) | |||
infos['max_fill_factor'] = self.__max_fill_factor | |||
if self._max_fill_factor is None: | |||
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors) | |||
infos['max_fill_factor'] = self._max_fill_factor | |||
if 'min_fill_factor' in keys: | |||
if self.__min_fill_factor is None: | |||
self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors) | |||
infos['min_fill_factor'] = self.__min_fill_factor | |||
if self._min_fill_factor is None: | |||
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors) | |||
infos['min_fill_factor'] = self._min_fill_factor | |||
if 'substructures' in keys: | |||
if self.__substructures is None: | |||
self.__substructures = self.__get_substructures() | |||
infos['substructures'] = self.__substructures | |||
if self._substructures is None: | |||
self._substructures = self._get_substructures() | |||
infos['substructures'] = self._substructures | |||
if 'class_number' in keys: | |||
if self.__class_number is None: | |||
self.__class_number = self.__get_class_number() | |||
infos['class_number'] = self.__class_number | |||
if self._class_number is None: | |||
self._class_number = self._get_class_number() | |||
infos['class_number'] = self._class_number | |||
if 'node_attr_dim' in keys: | |||
if self.__node_attr_dim is None: | |||
self.__node_attr_dim = self.__get_node_attr_dim() | |||
infos['node_attr_dim'] = self.__node_attr_dim | |||
if self._node_attr_dim is None: | |||
self._node_attr_dim = self._get_node_attr_dim() | |||
infos['node_attr_dim'] = self._node_attr_dim | |||
if 'edge_attr_dim' in keys: | |||
if self.__edge_attr_dim is None: | |||
self.__edge_attr_dim = self.__get_edge_attr_dim() | |||
infos['edge_attr_dim'] = self.__edge_attr_dim | |||
if self._edge_attr_dim is None: | |||
self._edge_attr_dim = self._get_edge_attr_dim() | |||
infos['edge_attr_dim'] = self._edge_attr_dim | |||
# entropy of degree distribution. | |||
@@ -438,14 +438,14 @@ class Dataset(object): | |||
base = params['all_degree_entropy']['base'] | |||
else: | |||
base = None | |||
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) | |||
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base) | |||
if 'ave_degree_entropy' in keys: | |||
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): | |||
base = params['ave_degree_entropy']['base'] | |||
else: | |||
base = None | |||
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) | |||
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base)) | |||
return infos | |||
@@ -457,12 +457,12 @@ class Dataset(object): | |||
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): | |||
node_labels = [item for item in node_labels if item in self.__node_labels] | |||
edge_labels = [item for item in edge_labels if item in self.__edge_labels] | |||
node_attrs = [item for item in node_attrs if item in self.__node_attrs] | |||
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] | |||
node_labels = [item for item in node_labels if item in self._node_labels] | |||
edge_labels = [item for item in edge_labels if item in self._edge_labels] | |||
node_attrs = [item for item in node_attrs if item in self._node_attrs] | |||
edge_attrs = [item for item in edge_attrs if item in self._edge_attrs] | |||
for g in self.__graphs: | |||
for g in self._graphs: | |||
for nd in g.nodes(): | |||
for nl in node_labels: | |||
del g.nodes[nd][nl] | |||
@@ -474,99 +474,99 @@ class Dataset(object): | |||
for ea in edge_attrs: | |||
del g.edges[ed][ea] | |||
if len(node_labels) > 0: | |||
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] | |||
self._node_labels = [nl for nl in self._node_labels if nl not in node_labels] | |||
if len(edge_labels) > 0: | |||
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] | |||
self._edge_labels = [el for el in self._edge_labels if el not in edge_labels] | |||
if len(node_attrs) > 0: | |||
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] | |||
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs] | |||
if len(edge_attrs) > 0: | |||
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] | |||
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs] | |||
def clean_labels(self): | |||
labels = [] | |||
for name in self.__node_labels: | |||
for name in self._node_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_labels = labels | |||
self._node_labels = labels | |||
labels = [] | |||
for name in self.__edge_labels: | |||
for name in self._edge_labels: | |||
label = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_labels = labels | |||
self._edge_labels = labels | |||
labels = [] | |||
for name in self.__node_attrs: | |||
for name in self._node_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
label = label | set(nx.get_node_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
for nd in G.nodes(): | |||
del G.nodes[nd][name] | |||
self.__node_attrs = labels | |||
self._node_attrs = labels | |||
labels = [] | |||
for name in self.__edge_attrs: | |||
for name in self._edge_attrs: | |||
label = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
label = label | set(nx.get_edge_attributes(G, name).values()) | |||
if len(label) > 1: | |||
labels.append(name) | |||
break | |||
if len(label) < 2: | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
for ed in G.edges(): | |||
del G.edges[ed][name] | |||
self.__edge_attrs = labels | |||
self._edge_attrs = labels | |||
def cut_graphs(self, range_): | |||
self.__graphs = [self.__graphs[i] for i in range_] | |||
if self.__targets is not None: | |||
self.__targets = [self.__targets[i] for i in range_] | |||
self._graphs = [self._graphs[i] for i in range_] | |||
if self._targets is not None: | |||
self._targets = [self._targets[i] for i in range_] | |||
self.clean_labels() | |||
def trim_dataset(self, edge_required=False): | |||
if edge_required: | |||
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] | |||
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] | |||
else: | |||
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0] | |||
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0] | |||
idx = [p[0] for p in trimed_pairs] | |||
self.__graphs = [p[1] for p in trimed_pairs] | |||
self.__targets = [self.__targets[i] for i in idx] | |||
self._graphs = [p[1] for p in trimed_pairs] | |||
self._targets = [self._targets[i] for i in idx] | |||
self.clean_labels() | |||
def copy(self): | |||
dataset = Dataset() | |||
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None | |||
target = self.__targets.copy() if self.__targets is not None else None | |||
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None | |||
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None | |||
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None | |||
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None | |||
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None | |||
target = self._targets.copy() if self._targets is not None else None | |||
node_labels = self._node_labels.copy() if self._node_labels is not None else None | |||
node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None | |||
edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None | |||
edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None | |||
dataset.load_graphs(graphs, target) | |||
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) | |||
# @todo: clean_labels and add other class members? | |||
@@ -575,7 +575,7 @@ class Dataset(object): | |||
def get_all_node_labels(self): | |||
node_labels = [] | |||
for g in self.__graphs: | |||
for g in self._graphs: | |||
for n in g.nodes(): | |||
nl = tuple(g.nodes[n].items()) | |||
if nl not in node_labels: | |||
@@ -585,7 +585,7 @@ class Dataset(object): | |||
def get_all_edge_labels(self): | |||
edge_labels = [] | |||
for g in self.__graphs: | |||
for g in self._graphs: | |||
for e in g.edges(): | |||
el = tuple(g.edges[e].items()) | |||
if el not in edge_labels: | |||
@@ -593,93 +593,93 @@ class Dataset(object): | |||
return edge_labels | |||
def __get_dataset_size(self): | |||
return len(self.__graphs) | |||
def _get_dataset_size(self): | |||
return len(self._graphs) | |||
def __get_all_node_nums(self): | |||
return [nx.number_of_nodes(G) for G in self.__graphs] | |||
def _get_all_node_nums(self): | |||
return [nx.number_of_nodes(G) for G in self._graphs] | |||
def __get_total_node_nums(self, all_node_nums): | |||
def _get_total_node_nums(self, all_node_nums): | |||
return np.sum(all_node_nums) | |||
def __get_ave_node_num(self, all_node_nums): | |||
def _get_ave_node_num(self, all_node_nums): | |||
return np.mean(all_node_nums) | |||
def __get_min_node_num(self, all_node_nums): | |||
def _get_min_node_num(self, all_node_nums): | |||
return np.amin(all_node_nums) | |||
def __get_max_node_num(self, all_node_nums): | |||
def _get_max_node_num(self, all_node_nums): | |||
return np.amax(all_node_nums) | |||
def __get_all_edge_nums(self): | |||
return [nx.number_of_edges(G) for G in self.__graphs] | |||
def _get_all_edge_nums(self): | |||
return [nx.number_of_edges(G) for G in self._graphs] | |||
def __get_total_edge_nums(self, all_edge_nums): | |||
def _get_total_edge_nums(self, all_edge_nums): | |||
return np.sum(all_edge_nums) | |||
def __get_ave_edge_num(self, all_edge_nums): | |||
def _get_ave_edge_num(self, all_edge_nums): | |||
return np.mean(all_edge_nums) | |||
def __get_min_edge_num(self, all_edge_nums): | |||
def _get_min_edge_num(self, all_edge_nums): | |||
return np.amin(all_edge_nums) | |||
def __get_max_edge_num(self, all_edge_nums): | |||
def _get_max_edge_num(self, all_edge_nums): | |||
return np.amax(all_edge_nums) | |||
def __get_node_label_dim(self): | |||
return len(self.__node_labels) | |||
def _get_node_label_dim(self): | |||
return len(self._node_labels) | |||
def __get_node_label_num(self, node_label): | |||
def _get_node_label_num(self, node_label): | |||
nl = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
nl = nl | set(nx.get_node_attributes(G, node_label).values()) | |||
return len(nl) | |||
def __get_edge_label_dim(self): | |||
return len(self.__edge_labels) | |||
def _get_edge_label_dim(self): | |||
return len(self._edge_labels) | |||
def __get_edge_label_num(self, edge_label): | |||
def _get_edge_label_num(self, edge_label): | |||
el = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
el = el | set(nx.get_edge_attributes(G, edge_label).values()) | |||
return len(el) | |||
def __is_directed(self): | |||
return nx.is_directed(self.__graphs[0]) | |||
def _is_directed(self): | |||
return nx.is_directed(self._graphs[0]) | |||
def __get_all_node_degrees(self): | |||
return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs] | |||
def _get_all_node_degrees(self): | |||
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs] | |||
def __get_ave_node_degree(self, all_node_degrees): | |||
def _get_ave_node_degree(self, all_node_degrees): | |||
return np.mean(all_node_degrees) | |||
def __get_max_node_degree(self, all_node_degrees): | |||
def _get_max_node_degree(self, all_node_degrees): | |||
return np.amax(all_node_degrees) | |||
def __get_min_node_degree(self, all_node_degrees): | |||
def _get_min_node_degree(self, all_node_degrees): | |||
return np.amin(all_node_degrees) | |||
def __get_all_fill_factors(self): | |||
def _get_all_fill_factors(self): | |||
"""Get fill factor, the number of non-zero entries in the adjacency matrix. | |||
Returns | |||
@@ -687,24 +687,24 @@ class Dataset(object): | |||
list[float] | |||
List of fill factors for all graphs. | |||
""" | |||
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs] | |||
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs] | |||
def __get_ave_fill_factor(self, all_fill_factors): | |||
def _get_ave_fill_factor(self, all_fill_factors): | |||
return np.mean(all_fill_factors) | |||
def __get_max_fill_factor(self, all_fill_factors): | |||
def _get_max_fill_factor(self, all_fill_factors): | |||
return np.amax(all_fill_factors) | |||
def __get_min_fill_factor(self, all_fill_factors): | |||
def _get_min_fill_factor(self, all_fill_factors): | |||
return np.amin(all_fill_factors) | |||
def __get_substructures(self): | |||
def _get_substructures(self): | |||
subs = set() | |||
for G in self.__graphs: | |||
for G in self._graphs: | |||
degrees = list(dict(G.degree()).values()) | |||
if any(i == 2 for i in degrees): | |||
subs.add('linear') | |||
@@ -713,8 +713,8 @@ class Dataset(object): | |||
if 'linear' in subs and 'non linear' in subs: | |||
break | |||
if self.__directed: | |||
for G in self.__graphs: | |||
if self._directed: | |||
for G in self._graphs: | |||
if len(list(nx.find_cycle(G))) > 0: | |||
subs.add('cyclic') | |||
break | |||
@@ -737,19 +737,19 @@ class Dataset(object): | |||
return subs | |||
def __get_class_num(self): | |||
return len(set(self.__targets)) | |||
def _get_class_num(self): | |||
return len(set(self._targets)) | |||
def __get_node_attr_dim(self): | |||
return len(self.__node_attrs) | |||
def _get_node_attr_dim(self): | |||
return len(self._node_attrs) | |||
def __get_edge_attr_dim(self): | |||
return len(self.__edge_attrs) | |||
def _get_edge_attr_dim(self): | |||
return len(self._edge_attrs) | |||
def __compute_all_degree_entropy(self, base=None): | |||
def _compute_all_degree_entropy(self, base=None): | |||
"""Compute the entropy of degree distribution of each graph. | |||
Parameters | |||
@@ -765,7 +765,7 @@ class Dataset(object): | |||
from gklearn.utils.stats import entropy | |||
degree_entropy = [] | |||
for g in self.__graphs: | |||
for g in self._graphs: | |||
degrees = list(dict(g.degree()).values()) | |||
en = entropy(degrees, base=base) | |||
degree_entropy.append(en) | |||
@@ -774,32 +774,32 @@ class Dataset(object): | |||
@property | |||
def graphs(self): | |||
return self.__graphs | |||
return self._graphs | |||
@property | |||
def targets(self): | |||
return self.__targets | |||
return self._targets | |||
@property | |||
def node_labels(self): | |||
return self.__node_labels | |||
return self._node_labels | |||
@property | |||
def edge_labels(self): | |||
return self.__edge_labels | |||
return self._edge_labels | |||
@property | |||
def node_attrs(self): | |||
return self.__node_attrs | |||
return self._node_attrs | |||
@property | |||
def edge_attrs(self): | |||
return self.__edge_attrs | |||
return self._edge_attrs | |||
def split_dataset_by_target(dataset): | |||
@@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets): | |||
# remove the '#'s in file names | |||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
_append_label_names(label_names, l_names) | |||
y.append(float(tmp[1])) | |||
else: # targets in a seperate file | |||
for i in range(0, len(content)): | |||
@@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets): | |||
# remove the '#'s in file names | |||
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
_append_label_names(label_names, l_names) | |||
with open(filename_targets) as fnt: | |||
content_y = fnt.read().splitlines() | |||
@@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None): | |||
mol_class = graph.attrib['class'] | |||
g, l_names = load_gxl(dir_dataset + '/' + mol_filename) | |||
data.append(g) | |||
__append_label_names(label_names, l_names) | |||
_append_label_names(label_names, l_names) | |||
y.append(mol_class) | |||
return data, y, label_names | |||
def __append_label_names(label_names, new_names): | |||
def _append_label_names(label_names, new_names): | |||
for key, val in label_names.items(): | |||
label_names[key] += [name for name in new_names[key] if name not in val] | |||
@@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t | |||
y_all = dataset.targets | |||
# compute kernel distances. | |||
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset) | |||
dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset) | |||
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) | |||
@@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t | |||
return results | |||
def __compute_kernel_distances(dataset, kernel_options, trainset=None): | |||
def _compute_kernel_distances(dataset, kernel_options, trainset=None): | |||
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], | |||
node_labels=dataset.node_labels, | |||
edge_labels=dataset.edge_labels, | |||
@@ -23,8 +23,8 @@ class Timer(object): | |||
time_limit_in_sec : string | |||
The time limit in seconds. | |||
""" | |||
self.__time_limit_in_sec = time_limit_in_sec | |||
self.__start_time = time.time() | |||
self._time_limit_in_sec = time_limit_in_sec | |||
self._start_time = time.time() | |||
def expired(self): | |||
@@ -34,7 +34,7 @@ class Timer(object): | |||
------ | |||
Boolean true if the time limit has expired and false otherwise. | |||
""" | |||
if self.__time_limit_in_sec > 0: | |||
runtime = time.time() - self.__start_time | |||
return runtime >= self.__time_limit_in_sec | |||
if self._time_limit_in_sec > 0: | |||
runtime = time.time() - self._start_time | |||
return runtime >= self._time_limit_in_sec | |||
return False |