Browse Source

Refactor: deprecate the usage of "__" for the "private" members and methods, use "_" instead.

v0.2.x
jajupmochi 4 years ago
parent
commit
bc5b8b0d25
21 changed files with 1593 additions and 1592 deletions
  1. +51
    -51
      gklearn/kernels/common_walk.py
  2. +7
    -7
      gklearn/kernels/graph_kernel.py
  3. +45
    -45
      gklearn/kernels/marginalized.py
  4. +80
    -80
      gklearn/kernels/path_up_to_h.py
  5. +4
    -4
      gklearn/kernels/random_walk_meta.py
  6. +33
    -33
      gklearn/kernels/shortest_path.py
  7. +56
    -56
      gklearn/kernels/structural_sp.py
  8. +85
    -85
      gklearn/kernels/treelet.py
  9. +41
    -41
      gklearn/kernels/weisfeiler_lehman.py
  10. +2
    -2
      gklearn/preimage/generate_random_preimages_by_class.py
  11. +25
    -25
      gklearn/preimage/kernel_knn_cv.py
  12. +284
    -283
      gklearn/preimage/median_preimage_generator.py
  13. +221
    -221
      gklearn/preimage/median_preimage_generator_cml.py
  14. +283
    -283
      gklearn/preimage/median_preimage_generator_py.py
  15. +109
    -109
      gklearn/preimage/random_preimage_generator.py
  16. +10
    -10
      gklearn/preimage/remove_best_graph.py
  17. +2
    -2
      gklearn/preimage/utils.py
  18. +244
    -244
      gklearn/utils/dataset.py
  19. +4
    -4
      gklearn/utils/graph_files.py
  20. +2
    -2
      gklearn/utils/knn.py
  21. +5
    -5
      gklearn/utils/timer.py

+ 51
- 51
gklearn/kernels/common_walk.py View File

@@ -26,18 +26,18 @@ class CommonWalk(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__weight = kwargs.get('weight', 1)
self.__compute_method = kwargs.get('compute_method', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__compute_method = self.__compute_method.lower()
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._weight = kwargs.get('weight', 1)
self._compute_method = kwargs.get('compute_method', None)
self._ds_infos = kwargs.get('ds_infos', {})
self._compute_method = self._compute_method.lower()




def _compute_gm_series(self): def _compute_gm_series(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs] self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix. # compute Gram matrix.
@@ -51,15 +51,15 @@ class CommonWalk(GraphKernel):
iterator = itr iterator = itr
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -67,9 +67,9 @@ class CommonWalk(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs] self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix. # compute Gram matrix.
@@ -80,10 +80,10 @@ class CommonWalk(GraphKernel):
# G_gn = gn_toshare # G_gn = gn_toshare
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_do_exp do_fun = self._wrapper_kernel_do_exp
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_do_geo do_fun = self._wrapper_kernel_do_geo
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm,
@@ -93,9 +93,9 @@ class CommonWalk(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list] g_list = [G.to_directed() for G in g_list]
@@ -107,23 +107,23 @@ class CommonWalk(GraphKernel):
iterator = range(len(g_list)) iterator = range(len(g_list))
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i in iterator: for i in iterator:
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight)
kernel = self._kernel_do_exp(g1, g_list[i], self._weight)
kernel_list[i] = kernel kernel_list[i] = kernel
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i in iterator: for i in iterator:
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight)
kernel = self._kernel_do_geo(g1, g_list[i], self._weight)
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list] g_list = [G.to_directed() for G in g_list]
@@ -136,10 +136,10 @@ class CommonWalk(GraphKernel):
# G_g_list = g_list_toshare # G_g_list = g_list_toshare
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_list_do_exp do_fun = self._wrapper_kernel_list_do_exp
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_list_do_geo do_fun = self._wrapper_kernel_list_do_geo
def func_assign(result, var_to_assign): def func_assign(result, var_to_assign):
@@ -154,31 +154,31 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_list_do_exp(self, itr): def _wrapper_kernel_list_do_exp(self, itr):
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight)




def _wrapper_kernel_list_do_geo(self, itr): def _wrapper_kernel_list_do_geo(self, itr):
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__check_graphs([g1] + [g2])
self.__add_dummy_labels([g1] + [g2])
if not self.__ds_infos['directed']: # convert
self._check_graphs([g1] + [g2])
self._add_dummy_labels([g1] + [g2])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed() g1 = g1.to_directed()
g2 = g2.to_directed() g2 = g2.to_directed()
# direct product graph method - exponential # direct product graph method - exponential
if self.__compute_method == 'exp':
kernel = self.__kernel_do_exp(g1, g2, self.__weight)
if self._compute_method == 'exp':
kernel = self._kernel_do_exp(g1, g2, self._weight)
# direct product graph method - geometric # direct product graph method - geometric
elif self.__compute_method == 'geo':
kernel = self.__kernel_do_geo(g1, g2, self.__weight)
elif self._compute_method == 'geo':
kernel = self._kernel_do_geo(g1, g2, self._weight)


return kernel return kernel
def __kernel_do_exp(self, g1, g2, beta):
def _kernel_do_exp(self, g1, g2, beta):
"""Compute common walk graph kernel between 2 graphs using exponential """Compute common walk graph kernel between 2 graphs using exponential
series. series.
@@ -195,7 +195,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs. The common walk Kernel between 2 graphs.
""" """
# get tensor product / direct product # get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node. # return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2: if nx.number_of_nodes(gp) < 2:
return 0 return 0
@@ -227,10 +227,10 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_exp(self, itr): def _wrapper_kernel_do_exp(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight)
def __kernel_do_geo(self, g1, g2, gamma):
def _kernel_do_geo(self, g1, g2, gamma):
"""Compute common walk graph kernel between 2 graphs using geometric """Compute common walk graph kernel between 2 graphs using geometric
series. series.
@@ -247,7 +247,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs. The common walk Kernel between 2 graphs.
""" """
# get tensor product / direct product # get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node. # return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2: if nx.number_of_nodes(gp) < 2:
return 0 return 0
@@ -262,24 +262,24 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_geo(self, itr): def _wrapper_kernel_do_geo(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight)
def __check_graphs(self, Gn):
def _check_graphs(self, Gn):
for g in Gn: for g in Gn:
if nx.number_of_nodes(g) == 1: if nx.number_of_nodes(g) == 1:
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.')
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]
def _init_worker_gm(gn_toshare): def _init_worker_gm(gn_toshare):


+ 7
- 7
gklearn/kernels/graph_kernel.py View File

@@ -37,7 +37,7 @@ class GraphKernel(object):
raise Exception('The graph list given is empty. No computation was performed.') raise Exception('The graph list given is empty. No computation was performed.')
else: else:
self._graphs = [g.copy() for g in graphs[0]] self._graphs = [g.copy() for g in graphs[0]]
self._gram_matrix = self.__compute_gram_matrix()
self._gram_matrix = self._compute_gram_matrix()
self._gram_matrix_unnorm = np.copy(self._gram_matrix) self._gram_matrix_unnorm = np.copy(self._gram_matrix)
if self._normalize: if self._normalize:
self._gram_matrix = self.normalize_gm(self._gram_matrix) self._gram_matrix = self.normalize_gm(self._gram_matrix)
@@ -45,17 +45,17 @@ class GraphKernel(object):
elif len(graphs) == 2: elif len(graphs) == 2:
if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy())
kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy())
return kernel, self._run_time return kernel, self._run_time
elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
g1 = graphs[0].copy() g1 = graphs[0].copy()
g_list = [g.copy() for g in graphs[1]] g_list = [g.copy() for g in graphs[1]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time return kernel_list, self._run_time
elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
g1 = graphs[1].copy() g1 = graphs[1].copy()
g_list = [g.copy() for g in graphs[0]] g_list = [g.copy() for g in graphs[0]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time return kernel_list, self._run_time
else: else:
raise Exception('Cannot detect graphs.') raise Exception('Cannot detect graphs.')
@@ -99,7 +99,7 @@ class GraphKernel(object):
return dis_mat, dis_max, dis_min, dis_mean return dis_mat, dis_max, dis_min, dis_mean
def __compute_gram_matrix(self):
def _compute_gram_matrix(self):
start_time = time.time() start_time = time.time()
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -125,7 +125,7 @@ class GraphKernel(object):
pass pass
def __compute_kernel_list(self, g1, g_list):
def _compute_kernel_list(self, g1, g_list):
start_time = time.time() start_time = time.time()
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -151,7 +151,7 @@ class GraphKernel(object):
pass pass
def __compute_single_kernel(self, g1, g2):
def _compute_single_kernel(self, g1, g2):
start_time = time.time() start_time = time.time()
kernel = self._compute_single_kernel_series(g1, g2) kernel = self._compute_single_kernel_series(g1, g2)


+ 45
- 45
gklearn/kernels/marginalized.py View File

@@ -33,25 +33,25 @@ class Marginalized(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__p_quit = kwargs.get('p_quit', 0.5)
self.__n_iteration = kwargs.get('n_iteration', 10)
self.__remove_totters = kwargs.get('remove_totters', False)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__n_iteration = int(self.__n_iteration)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._p_quit = kwargs.get('p_quit', 0.5)
self._n_iteration = kwargs.get('n_iteration', 10)
self._remove_totters = kwargs.get('remove_totters', False)
self._ds_infos = kwargs.get('ds_infos', {})
self._n_iteration = int(self._n_iteration)




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
# @todo: this may not work. # @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -63,7 +63,7 @@ class Marginalized(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j])
kernel = self._kernel_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered? gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -71,9 +71,9 @@ class Marginalized(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = range(0, len(self._graphs)) itr = range(0, len(self._graphs))
if len(self._graphs) < 100 * self._n_jobs: if len(self._graphs) < 100 * self._n_jobs:
@@ -105,16 +105,16 @@ class Marginalized(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
# @todo: this may not work. # @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -123,17 +123,17 @@ class Marginalized(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(g1, g_list[i])
kernel = self._kernel_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = range(0, len(g_list)) itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs: if len(g_list) < 100 * self._n_jobs:
@@ -171,19 +171,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_g1, G_g_list[itr])
return itr, self._kernel_do(G_g1, G_g_list[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels)
kernel = self.__kernel_do(g1, g2)
self._add_dummy_labels([g1] + [g2])
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self._node_labels, self._edge_labels)
kernel = self._kernel_do(g1, g2)
return kernel return kernel
def __kernel_do(self, g1, g2):
def _kernel_do(self, g1, g2):
"""Compute marginalized graph kernel between 2 graphs. """Compute marginalized graph kernel between 2 graphs.
Parameters Parameters
@@ -205,7 +205,7 @@ class Marginalized(GraphKernel):
p_init_G1 = 1 / num_nodes_G1 p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2 p_init_G2 = 1 / num_nodes_G2
q = self.__p_quit * self.__p_quit
q = self._p_quit * self._p_quit
r1 = q r1 = q
# # initial R_inf # # initial R_inf
@@ -260,36 +260,36 @@ class Marginalized(GraphKernel):
if len(g2[node2]) > 0: if len(g2[node2]) > 0:
R_inf[(node1, node2)] = r1 R_inf[(node1, node2)] = r1
else: else:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else: else:
if len(g2[node2]) > 0: if len(g2[node2]) > 0:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else: else:
R_inf[(node1, node2)] = 1 R_inf[(node1, node2)] = 1
# compute all transition probability first. # compute all transition probability first.
t_dict = {} t_dict = {}
if self.__n_iteration > 1:
if self._n_iteration > 1:
for node1 in g1.nodes(): for node1 in g1.nodes():
neighbor_n1 = g1[node1] neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks # the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent # generating step (uniform distribution over the vertices adjacent
# to the current vertex) # to the current vertex)
if len(neighbor_n1) > 0: if len(neighbor_n1) > 0:
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1)
p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1)
for node2 in g2.nodes(): for node2 in g2.nodes():
neighbor_n2 = g2[node2] neighbor_n2 = g2[node2]
if len(neighbor_n2) > 0: if len(neighbor_n2) > 0:
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2)
p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1: for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2: for neighbor2 in neighbor_n2:
t_dict[(node1, node2, neighbor1, neighbor2)] = \ t_dict[(node1, node2, neighbor1, neighbor2)] = \
p_trans_n1 * p_trans_n2 * \ p_trans_n1 * p_trans_n2 * \
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels))
# Compute R_inf with a simple interative method # Compute R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1):
for i in range(2, self._n_iteration + 1):
R_inf_old = R_inf.copy() R_inf_old = R_inf.copy()
# Compute R_inf for each pair of nodes # Compute R_inf for each pair of nodes
@@ -311,7 +311,7 @@ class Marginalized(GraphKernel):
# add elements of R_inf up and compute kernel. # add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items(): for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels))
kernel += s * value # ref [1] equation (6) kernel += s * value # ref [1] equation (6)
return kernel return kernel
@@ -320,19 +320,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j])
return i, j, self._kernel_do(G_gn[i], G_gn[j])


def _wrapper_untotter(self, i): def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work.
return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work.
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 80
- 80
gklearn/kernels/path_up_to_h.py View File

@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__depth = int(kwargs.get('depth', 10))
self.__k_func = kwargs.get('k_func', 'MinMax')
self.__compute_method = kwargs.get('compute_method', 'trie')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._depth = int(kwargs.get('depth', 10))
self._k_func = kwargs.get('k_func', 'MinMax')
self._compute_method = kwargs.get('compute_method', 'trie')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


if self.__compute_method == 'trie':
all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
if self._compute_method == 'trie':
all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel: for i, j in iterator_kernel:
kernel = self.__kernel_do_trie(all_paths[i], all_paths[j])
kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
else: else:
all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel: for i, j in iterator_kernel:
kernel = self.__kernel_do_naive(all_paths[i], all_paths[j])
kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
chunksize = 100 chunksize = 100
all_paths = [[] for _ in range(len(self._graphs))] all_paths = [[] for _ in range(len(self._graphs))]
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
get_ps_fun = self._wrapper_find_all_path_as_trie get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else: else:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
def init_worker(trie_toshare): def init_worker(trie_toshare):
global G_trie global G_trie
G_trie = trie_toshare G_trie = trie_toshare
do_fun = self._wrapper_kernel_do_trie do_fun = self._wrapper_kernel_do_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
def init_worker(plist_toshare): def init_worker(plist_toshare):
global G_plist global G_plist
G_plist = plist_toshare G_plist = plist_toshare
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def init_worker(plist_toshare): def init_worker(plist_toshare):
global G_plist global G_plist
G_plist = plist_toshare G_plist = plist_toshare
do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this?
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self._verbose >= 2: if self._verbose >= 2:
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)


if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps]
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
for i in iterator_kernel: for i in iterator_kernel:
kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i])
kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps]
paths_g1 = self._find_all_paths_until_length(g1)
paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
for i in iterator_kernel: for i in iterator_kernel:
kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i])
kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
chunksize = 100 chunksize = 100
paths_g_list = [[] for _ in range(len(g_list))] paths_g_list = [[] for _ in range(len(g_list))]
if self.__compute_method == 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_path_as_trie(g1)
if self._compute_method == 'trie' and self._k_func is not None:
paths_g1 = self._find_all_path_as_trie(g1)
get_ps_fun = self._wrapper_find_all_path_as_trie get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_paths_until_length(g1)
elif self._compute_method != 'trie' and self._k_func is not None:
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
if self.__compute_method == 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_trie(G_p1, G_plist[itr])
elif self.__compute_method != 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_naive(G_p1, G_plist[itr])
if self._compute_method == 'trie' and self._k_func is not None:
return itr, self._kernel_do_trie(G_p1, G_plist[itr])
elif self._compute_method != 'trie' and self._k_func is not None:
return itr, self._kernel_do_naive(G_p1, G_plist[itr])
else: else:
return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr])
return itr, self._kernel_do_kernelless(G_p1, G_plist[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g2 = self.__find_all_path_as_trie(g2)
kernel = self.__kernel_do_trie(paths_g1, paths_g2)
self._add_dummy_labels([g1] + [g2])
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g2 = self._find_all_path_as_trie(g2)
kernel = self._kernel_do_trie(paths_g1, paths_g2)
else: else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g2 = self.__find_all_paths_until_length(g2)
kernel = self.__kernel_do_naive(paths_g1, paths_g2)
paths_g1 = self._find_all_paths_until_length(g1)
paths_g2 = self._find_all_paths_until_length(g2)
kernel = self._kernel_do_naive(paths_g1, paths_g2)
return kernel return kernel


def __kernel_do_trie(self, trie1, trie2):
def _kernel_do_trie(self, trie1, trie2):
"""Compute path graph kernels up to depth d between 2 graphs using trie. """Compute path graph kernels up to depth d between 2 graphs using trie.
Parameters Parameters
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel : float kernel : float
Path kernel up to h between 2 graphs. Path kernel up to h between 2 graphs.
""" """
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
# traverse all paths in graph1 and search them in graph2. Deep-first # traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied. # search is applied.
def traverseTrie1t(root, trie2, setlist, pcurrent=[]): def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# print(setlist) # print(setlist)
kernel = setlist[0] / setlist[1] kernel = setlist[0] / setlist[1]
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
# traverse all paths in graph1 and search them in graph2. Deep-first # traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied. # search is applied.
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_trie(self, itr): def _wrapper_kernel_do_trie(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j])
return i, j, self._kernel_do_trie(G_trie[i], G_trie[j])
def __kernel_do_naive(self, paths1, paths2):
def _kernel_do_naive(self, paths1, paths2):
"""Compute path graph kernels up to depth d between 2 graphs naively. """Compute path graph kernels up to depth d between 2 graphs naively.
Parameters Parameters
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
""" """
all_paths = list(set(paths1 + paths2)) all_paths = list(set(paths1 + paths2))
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
length_union = len(set(paths1 + paths2)) length_union = len(set(paths1 + paths2))
kernel = (len(set(paths1)) + len(set(paths2)) - kernel = (len(set(paths1)) + len(set(paths2)) -
length_union) / length_union length_union) / length_union
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# kernel_uv = np.dot(vector1, vector2) # kernel_uv = np.dot(vector1, vector2)
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
path_count1 = Counter(paths1) path_count1 = Counter(paths1)
path_count2 = Counter(paths2) path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel = np.sum(np.minimum(vector1, vector2)) / \ kernel = np.sum(np.minimum(vector1, vector2)) / \
np.sum(np.maximum(vector1, vector2)) np.sum(np.maximum(vector1, vector2))
elif self.__k_func is None: # no sub-kernel used; compare paths directly.
elif self._k_func is None: # no sub-kernel used; compare paths directly.
path_count1 = Counter(paths1) path_count1 = Counter(paths1)
path_count2 = Counter(paths2) path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_naive(self, itr): def _wrapper_kernel_do_naive(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j])
return i, j, self._kernel_do_naive(G_plist[i], G_plist[j])
def __find_all_path_as_trie(self, G):
def _find_all_path_as_trie(self, G):
# all_path = find_all_paths_until_length(G, length, ds_attrs, # all_path = find_all_paths_until_length(G, length, ds_attrs,
# node_label=node_label, # node_label=node_label,
# edge_label=edge_label) # edge_label=edge_label)
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# them. Deep-first search is applied. Notice the reverse of each path is # them. Deep-first search is applied. Notice the reverse of each path is
# also stored to the trie. # also stored to the trie.
def traverseGraph(root, ptrie, G, pcurrent=[]): def traverseGraph(root, ptrie, G, pcurrent=[]):
if len(pcurrent) < self.__depth + 1:
if len(pcurrent) < self._depth + 1:
for neighbor in G[root]: for neighbor in G[root]:
if neighbor not in pcurrent: if neighbor not in pcurrent:
pcurrent.append(neighbor) pcurrent.append(neighbor)
plstr = self.__paths2labelseqs([pcurrent], G)
plstr = self._paths2labelseqs([pcurrent], G)
ptrie.insertWord(plstr[0]) ptrie.insertWord(plstr[0])
traverseGraph(neighbor, ptrie, G, pcurrent) traverseGraph(neighbor, ptrie, G, pcurrent)
del pcurrent[-1] del pcurrent[-1]
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
ptrie = Trie() ptrie = Trie()
path_l = [[n] for n in G.nodes] # paths of length l path_l = [[n] for n in G.nodes] # paths of length l
path_l_str = self.__paths2labelseqs(path_l, G)
path_l_str = self._paths2labelseqs(path_l, G)
for p in path_l_str: for p in path_l_str:
ptrie.insertWord(p) ptrie.insertWord(p)
for n in G.nodes: for n in G.nodes:
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_find_all_path_as_trie(self, itr_item): def _wrapper_find_all_path_as_trie(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__find_all_path_as_trie(g)
return i, self._find_all_path_as_trie(g)
# @todo: (can be removed maybe) this method find paths repetively, it could be faster. # @todo: (can be removed maybe) this method find paths repetively, it could be faster.
def __find_all_paths_until_length(self, G, tolabelseqs=True):
def _find_all_paths_until_length(self, G, tolabelseqs=True):
"""Find all paths no longer than a certain maximum length in a graph. A """Find all paths no longer than a certain maximum length in a graph. A
recursive depth first search is applied. recursive depth first search is applied.
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
""" """
# path_l = [tuple([n]) for n in G.nodes] # paths of length l # path_l = [tuple([n]) for n in G.nodes] # paths of length l
# all_paths = path_l[:] # all_paths = path_l[:]
# for l in range(1, self.__depth + 1):
# for l in range(1, self._depth + 1):
# path_l_new = [] # path_l_new = []
# for path in path_l: # for path in path_l:
# for neighbor in G[path[-1]]: # for neighbor in G[path[-1]]:
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
path_l = [[n] for n in G.nodes] # paths of length l path_l = [[n] for n in G.nodes] # paths of length l
all_paths = [p.copy() for p in path_l] all_paths = [p.copy() for p in path_l]
for l in range(1, self.__depth + 1):
for l in range(1, self._depth + 1):
path_lplus1 = [] path_lplus1 = []
for path in path_l: for path in path_l:
for neighbor in G[path[-1]]: for neighbor in G[path[-1]]:
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
all_paths += path_lplus1 all_paths += path_lplus1
path_l = [p.copy() for p in path_lplus1] path_l = [p.copy() for p in path_lplus1]
# for i in range(0, self.__depth + 1):
# for i in range(0, self._depth + 1):
# new_paths = find_all_paths(G, i) # new_paths = find_all_paths(G, i)
# if new_paths == []: # if new_paths == []:
# break # break
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# consider labels # consider labels
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
# print() # print()
return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
def __paths2labelseqs(self, plist, G):
if len(self.__node_labels) > 0:
if len(self.__edge_labels) > 0:
def _paths2labelseqs(self, plist, G):
if len(self._node_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = [] path_strs = []
for path in plist: for path in plist:
pths_tmp = [] pths_tmp = []
for idx, node in enumerate(path[:-1]): for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
else: else:
path_strs = [] path_strs = []
for path in plist: for path in plist:
pths_tmp = [] pths_tmp = []
for node in path: for node in path:
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
return path_strs return path_strs
else: else:
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = [] path_strs = []
for path in plist: for path in plist:
if len(path) == 1: if len(path) == 1:
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else: else:
pths_tmp = [] pths_tmp = []
for idx, node in enumerate(path[:-1]): for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
path_strs.append(tuple(pths_tmp)) path_strs.append(tuple(pths_tmp))
return path_strs return path_strs
else: else:
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# return [tuple([len(path)]) for path in all_paths] # return [tuple([len(path)]) for path in all_paths]
def __add_dummy_labels(self, Gn):
if self.__k_func is not None:
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if self._k_func is not None:
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 4
- 4
gklearn/kernels/random_walk_meta.py View File

@@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel):
def _add_dummy_labels(self, Gn): def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 33
- 33
gklearn/kernels/shortest_path.py View File

@@ -26,11 +26,11 @@ class ShortestPath(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -39,7 +39,7 @@ class ShortestPath(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -51,7 +51,7 @@ class ShortestPath(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__sp_do(self._graphs[i], self._graphs[j])
kernel = self._sp_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -106,7 +106,7 @@ class ShortestPath(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__sp_do(g1, g_list[i])
kernel = self._sp_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
@@ -114,7 +114,7 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
get_sp_graphs_fun = self._wrapper_get_sp_graphs get_sp_graphs_fun = self._wrapper_get_sp_graphs
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -151,55 +151,55 @@ class ShortestPath(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__sp_do(G_g1, G_gl[itr])
return itr, self._sp_do(G_g1, G_gl[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g2 = getSPGraph(g2, edge_weight=self.__edge_weight)
kernel = self.__sp_do(g1, g2)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
g2 = getSPGraph(g2, edge_weight=self._edge_weight)
kernel = self._sp_do(g1, g2)
return kernel return kernel
def _wrapper_get_sp_graphs(self, itr_item): def _wrapper_get_sp_graphs(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, getSPGraph(g, edge_weight=self.__edge_weight)
return i, getSPGraph(g, edge_weight=self._edge_weight)
def __sp_do(self, g1, g2):
def _sp_do(self, g1, g2):
kernel = 0 kernel = 0
# compute shortest path matrices first, method borrowed from FCSP. # compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
if len(self._node_labels) > 0:
# node symb and non-synb labeled # node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
if len(self._node_attrs) > 0:
kn = self._node_kernels['mix']
for n1, n2 in product( for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)): g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled # node symb labeled
else: else:
kn = self.__node_kernels['symb']
kn = self._node_kernels['symb']
for n1 in g1.nodes(data=True): for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True): for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else: else:
# node non-synb labeled # node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
if len(self._node_attrs) > 0:
kn = self._node_kernels['nsymb']
for n1 in g1.nodes(data=True): for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True): for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled # node unlabeled
else: else:
@@ -210,7 +210,7 @@ class ShortestPath(GraphKernel):
return kernel return kernel
# compute graph kernels # compute graph kernels
if self.__ds_infos['directed']:
if self._ds_infos['directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']: if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])]
@@ -261,4 +261,4 @@ class ShortestPath(GraphKernel):
def _wrapper_sp_do(self, itr): def _wrapper_sp_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__sp_do(G_gs[i], G_gs[j])
return i, j, self._sp_do(G_gs[i], G_gs[j])

+ 56
- 56
gklearn/kernels/structural_sp.py View File

@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_attrs = kwargs.get('edge_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__edge_kernels = kwargs.get('edge_kernels', None)
self.__compute_method = kwargs.get('compute_method', 'naive')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._compute_method = kwargs.get('compute_method', 'naive')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = self._graphs iterator = self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -60,14 +60,14 @@ class StructuralSP(GraphKernel):
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else: else:
iterator = itr iterator = itr
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel gram_matrix[j][i] = kernel
else: else:
for i, j in iterator: for i, j in iterator:
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
# if(kernel > 1): # if(kernel > 1):
# print("error here ") # print("error here ")
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of self._graphs # get shortest path graphs of self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel):
global G_spl, G_gs global G_spl, G_gs
G_spl = spl_toshare G_spl = spl_toshare
G_gs = gs_toshare G_gs = gs_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_ssp_do_naive do_fun = self._wrapper_ssp_do_naive
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
@@ -119,18 +119,18 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [] splist = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else: else:
for g in iterator: for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -138,13 +138,13 @@ class StructuralSP(GraphKernel):
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i in iterator: for i in iterator:
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
else: else:
for i in iterator: for i in iterator:
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [None] * len(g_list) splist = [None] * len(g_list)
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel):
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of g_list # get shortest path graphs of g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie get_sps_fun = self._wrapper_get_sps_trie
else: else:
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel):
G_spl = spl_toshare G_spl = spl_toshare
G_g1 = g1_toshare G_g1 = g1_toshare
G_gl = gl_toshare G_gl = gl_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else: else:
do_fun = self._wrapper_kernel_list_do do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign): def func_assign(result, var_to_assign):
@@ -199,36 +199,36 @@ class StructuralSP(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])


def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed'])
if self.__compute_method == 'trie':
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2)
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed'])
if self._compute_method == 'trie':
kernel = self._ssp_do_trie(g1, g2, sp1, sp2)
else: else:
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2)
kernel = self._ssp_do_naive(g1, g2, sp1, sp2)
return kernel return kernel
def _wrapper_get_sps_naive(self, itr_item): def _wrapper_get_sps_naive(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])
def __ssp_do_naive(self, g1, g2, spl1, spl2):
def _ssp_do_naive(self, g1, g2, spl1, spl2):
kernel = 0 kernel = 0
# First, compute shortest path matrices, method borrowed from FCSP. # First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = self.__get_all_node_kernels(g1, g2)
vk_dict = self._get_all_node_kernels(g1, g2)
# Then, compute kernels between all pairs of edges, which is an idea of # Then, compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = self.__get_all_edge_kernels(g1, g2)
ek_dict = self._get_all_edge_kernels(g1, g2)
# compute graph kernels # compute graph kernels
if vk_dict: if vk_dict:
@@ -314,27 +314,27 @@ class StructuralSP(GraphKernel):
def _wrapper_ssp_do_naive(self, itr): def _wrapper_ssp_do_naive(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
def __get_all_node_kernels(self, g1, g2):
def _get_all_node_kernels(self, g1, g2):
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2):
def _get_all_edge_kernels(self, g1, g2):
# compute kernels between all pairs of edges, which is an idea of # compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we # extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow. # went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels ek_dict = {} # dict of edge kernels
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled # edge symb and non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['mix']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -342,11 +342,11 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled # edge symb labeled
else: else:
ke = self.__edge_kernels['symb']
ke = self._edge_kernels['symb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
ek_temp = ke(e1_labels, e2_labels) ek_temp = ke(e1_labels, e2_labels)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -354,12 +354,12 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else: else:
# edge non-synb labeled # edge non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['nsymb']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
for e1 in g1.edges(data=True): for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True): for e2 in g2.edges(data=True):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_attrs, e2_attrs) ek_temp = ke(e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp


+ 85
- 85
gklearn/kernels/treelet.py View File

@@ -28,16 +28,16 @@ class Treelet(GraphKernel):
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__sub_kernel = kwargs.get('sub_kernel', None)
self.__ds_infos = kwargs.get('ds_infos', {})
if self.__sub_kernel is None:
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._sub_kernel = kwargs.get('sub_kernel', None)
self._ds_infos = kwargs.get('ds_infos', {})
if self._sub_kernel is None:
raise Exception('Sub kernel not set.') raise Exception('Sub kernel not set.')




def _compute_gm_series(self): def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
@@ -47,7 +47,7 @@ class Treelet(GraphKernel):
else: else:
iterator = self._graphs iterator = self._graphs
for g in iterator: for g in iterator:
canonkeys.append(self.__get_canonkeys(g))
canonkeys.append(self._get_canonkeys(g))
# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -59,7 +59,7 @@ class Treelet(GraphKernel):
else: else:
iterator = itr iterator = itr
for i, j in iterator: for i, j in iterator:
kernel = self.__kernel_do(canonkeys[i], canonkeys[j])
kernel = self._kernel_do(canonkeys[i], canonkeys[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered? gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -67,7 +67,7 @@ class Treelet(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
@@ -103,18 +103,18 @@ class Treelet(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [] canonkeys_list = []
if self._verbose >= 2: if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
for g in iterator: for g in iterator:
canonkeys_list.append(self.__get_canonkeys(g))
canonkeys_list.append(self._get_canonkeys(g))
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
@@ -123,18 +123,18 @@ class Treelet(GraphKernel):
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))
for i in iterator: for i in iterator:
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i])
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
return kernel_list return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))] canonkeys_list = [[] for _ in range(len(g_list))]
pool = Pool(self._n_jobs) pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
@@ -173,18 +173,18 @@ class Treelet(GraphKernel):
def _wrapper_kernel_list_do(self, itr): def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr])
return itr, self._kernel_do(G_ck_1, G_ck_list[itr])
def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_2 = self.__get_canonkeys(g2)
kernel = self.__kernel_do(canonkeys_1, canonkeys_2)
self._add_dummy_labels([g1] + [g2])
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_2 = self._get_canonkeys(g2)
kernel = self._kernel_do(canonkeys_1, canonkeys_2)
return kernel return kernel
def __kernel_do(self, canonkey1, canonkey2):
def _kernel_do(self, canonkey1, canonkey2):
"""Compute treelet graph kernel between 2 graphs. """Compute treelet graph kernel between 2 graphs.
Parameters Parameters
@@ -200,17 +200,17 @@ class Treelet(GraphKernel):
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
kernel = self.__sub_kernel(vector1, vector2)
kernel = self._sub_kernel(vector1, vector2)
return kernel return kernel
def _wrapper_kernel_do(self, itr): def _wrapper_kernel_do(self, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j])
return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j])
def __get_canonkeys(self, G):
def _get_canonkeys(self, G):
"""Generate canonical keys of all treelets in a graph. """Generate canonical keys of all treelets in a graph.
Parameters Parameters
@@ -236,7 +236,7 @@ class Treelet(GraphKernel):
patterns['0'] = list(G.nodes()) patterns['0'] = list(G.nodes())
canonkey['0'] = nx.number_of_nodes(G) canonkey['0'] = nx.number_of_nodes(G)
for i in range(1, 6): # for i in range(1, 6): for i in range(1, 6): # for i in range(1, 6):
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed'])
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed'])
canonkey[str(i)] = len(patterns[str(i)]) canonkey[str(i)] = len(patterns[str(i)])
# n-star patterns # n-star patterns
@@ -330,11 +330,11 @@ class Treelet(GraphKernel):
### pattern obtained in the structural analysis section above, which is a ### pattern obtained in the structural analysis section above, which is a
### string corresponding to a unique treelet. A dictionary is built to keep ### string corresponding to a unique treelet. A dictionary is built to keep
### track of the amount of every treelet. ### track of the amount of every treelet.
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0:
if len(self._node_labels) > 0 or len(self._edge_labels) > 0:
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet.
# linear patterns # linear patterns
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels))
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels))
for key in canonkey_t: for key in canonkey_t:
canonkey_l[('0', key)] = canonkey_t[key] canonkey_l[('0', key)] = canonkey_t[key]
@@ -343,9 +343,9 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i)]: for pattern in patterns[str(i)]:
canonlist = [] canonlist = []
for idx, node in enumerate(pattern[:-1]): for idx, node in enumerate(pattern[:-1]):
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels))
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels))
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1]
treelet.append(tuple([str(i)] + canonkey_t)) treelet.append(tuple([str(i)] + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -356,13 +356,13 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i) + 'star']: for pattern in patterns[str(i) + 'star']:
canonlist = [] canonlist = []
for leaf in pattern[1:]: for leaf in pattern[1:]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + canonkey_t = tuple(['d' if i == 5 else str(i * 2)] +
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonlist) + canonlist)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -372,17 +372,17 @@ class Treelet(GraphKernel):
for pattern in patterns['7']: for pattern in patterns['7']:
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['7'] canonkey_t = tuple(['7']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -391,38 +391,38 @@ class Treelet(GraphKernel):
for pattern in patterns['11']: for pattern in patterns['11']:
canonlist = [] canonlist = []
for leaf in pattern[1:4]: for leaf in pattern[1:4]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['b'] canonkey_t = tuple(['b']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
# pattern 10 # pattern 10
treelet = [] treelet = []
for pattern in patterns['10']: for pattern in patterns['10']:
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonkey0 = list(chain.from_iterable(canonlist)) canonkey0 = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['a'] canonkey_t = tuple(['a']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonkey4 + canonkey0) + canonkey4 + canonkey0)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -432,15 +432,15 @@ class Treelet(GraphKernel):
for pattern in patterns['12']: for pattern in patterns['12']:
canonlist0 = [] canonlist0 = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist0.append(tuple((nlabels, elabels))) canonlist0.append(tuple((nlabels, elabels)))
canonlist0.sort() canonlist0.sort()
canonlist0 = list(chain.from_iterable(canonlist0)) canonlist0 = list(chain.from_iterable(canonlist0))
canonlist3 = [] canonlist3 = []
for leaf in pattern[4:6]: for leaf in pattern[4:6]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels)
canonlist3.append(tuple((nlabels, elabels))) canonlist3.append(tuple((nlabels, elabels)))
canonlist3.sort() canonlist3.sort()
canonlist3 = list(chain.from_iterable(canonlist3)) canonlist3 = list(chain.from_iterable(canonlist3))
@@ -448,14 +448,14 @@ class Treelet(GraphKernel):
# 2 possible key can be generated from 2 nodes with extended label 3, # 2 possible key can be generated from 2 nodes with extended label 3,
# select the one with lower lexicographic order. # select the one with lower lexicographic order.
canonkey_t1 = tuple(['c'] canonkey_t1 = tuple(['c']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ canonlist3) + canonlist3)
canonkey_t2 = tuple(['c'] canonkey_t2 = tuple(['c']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonlist0) + canonlist0)
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -463,24 +463,24 @@ class Treelet(GraphKernel):
# pattern 9 # pattern 9
treelet = [] treelet = []
for pattern in patterns['9']: for pattern in patterns['9']:
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
if prekey2 + canonkey2 < prekey3 + canonkey3: if prekey2 + canonkey2 < prekey3 + canonkey3:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey2 + prekey3 + canonkey2 + canonkey3 + prekey2 + prekey3 + canonkey2 + canonkey3
else: else:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey3 + prekey2 + canonkey3 + canonkey2 + prekey3 + prekey2 + canonkey3 + canonkey2
treelet.append(tuple(['9'] treelet.append(tuple(['9']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonkey_t)) + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -492,15 +492,15 @@ class Treelet(GraphKernel):
def _wrapper_get_canonkeys(self, itr_item): def _wrapper_get_canonkeys(self, itr_item):
g = itr_item[0] g = itr_item[0]
i = itr_item[1] i = itr_item[1]
return i, self.__get_canonkeys(g)
return i, self._get_canonkeys(g)
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 41
- 41
gklearn/kernels/weisfeiler_lehman.py View File

@@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self) GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__height = int(kwargs.get('height', 0))
self.__base_kernel = kwargs.get('base_kernel', 'subtree')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._height = int(kwargs.get('height', 0))
self._base_kernel = kwargs.get('base_kernel', 'subtree')
self._ds_infos = kwargs.get('ds_infos', {})




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings import warnings
warnings.warn('A part of the computation is parallelized.') warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(self._graphs)
self._add_dummy_node_labels(self._graphs)
# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(self._graphs)
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(self._graphs)
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(self._graphs)
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(self._graphs)
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(self._graphs)
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(self._graphs)
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do(self._graphs)
gram_matrix = self._user_kernel_do(self._graphs)
return gram_matrix return gram_matrix
@@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings import warnings
warnings.warn('A part of the computation is parallelized.') warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(g_list + [g1])
self._add_dummy_node_labels(g_list + [g1])
# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(g_list + [g1])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(g_list + [g1])
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(g_list + [g1])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(g_list + [g1])
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(g_list + [g1])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(g_list + [g1])
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do(g_list + [g1])
gram_matrix = self._user_kernel_do(g_list + [g1])
return list(gram_matrix[-1][0:-1]) return list(gram_matrix[-1][0:-1])
@@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
self.__add_dummy_node_labels([g1] + [g2])
self._add_dummy_node_labels([g1] + [g2])


# for WL subtree kernel # for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do([g1] + [g2])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do([g1] + [g2])
# for WL shortest path kernel # for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do([g1] + [g2])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do([g1] + [g2])
# for WL edge kernel # for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do([g1] + [g2])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do([g1] + [g2])
# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self.__user_kernel_do([g1] + [g2])
gram_matrix = self._user_kernel_do([g1] + [g2])
return gram_matrix[0][1] return gram_matrix[0][1]
def __subtree_kernel_do(self, Gn):
def _subtree_kernel_do(self, Gn):
"""Compute Weisfeiler-Lehman kernels between graphs. """Compute Weisfeiler-Lehman kernels between graphs.
Parameters Parameters
@@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for G in Gn: for G in Gn:
# set all labels into a tuple. # set all labels into a tuple.
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels)
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
# get the set of original labels # get the set of original labels
labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
# number of occurence of each label in G # number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))
# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
# iterate each height # iterate each height
for h in range(1, self.__height + 1):
for h in range(1, self._height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
@@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
all_num_of_each_label.append(dict(Counter(labels_comp))) all_num_of_each_label.append(dict(Counter(labels_comp)))
# Compute subtree kernel with h iterations and add it to the final kernel # Compute subtree kernel with h iterations and add it to the final kernel
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
return gram_matrix return gram_matrix


def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn):
def _compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn):
"""Compute Gram matrix using the base kernel. """Compute Gram matrix using the base kernel.
""" """
if self._parallel == 'imap_unordered': if self._parallel == 'imap_unordered':
@@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
elif self._parallel is None: elif self._parallel is None:
for i in range(len(gram_matrix)): for i in range(len(gram_matrix)):
for j in range(i, len(gram_matrix)): for j in range(i, len(gram_matrix)):
gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i],
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
all_num_of_each_label[j], gram_matrix[i][j]) all_num_of_each_label[j], gram_matrix[i][j])
gram_matrix[j][i] = gram_matrix[i][j] gram_matrix[j][i] = gram_matrix[i][j]
def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
"""Compute the subtree kernel. """Compute the subtree kernel.
""" """
labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
@@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
i = itr[0] i = itr[0]
j = itr[1] j = itr[1]
return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
def _wl_spkernel_do(Gn, node_label, edge_label, height): def _wl_spkernel_do(Gn, node_label, edge_label, height):
@@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
return gram_matrix return gram_matrix
def __add_dummy_node_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_node_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)): for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
self._node_labels = [SpecialLabel.DUMMY]
class WLSubtree(WeisfeilerLehman): class WLSubtree(WeisfeilerLehman):


+ 2
- 2
gklearn/preimage/generate_random_preimages_by_class.py View File

@@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save)


dis_k_dataset_list = [] dis_k_dataset_list = []
@@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
print('\ncomplete.\n') print('\ncomplete.\n')


def __init_output_file_preimage(ds_name, gkernel, dir_output):
def _init_output_file_preimage(ds_name, gkernel, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 25
- 25
gklearn/preimage/kernel_knn_cv.py View File

@@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else: else:
fn_output_detail, fn_output_summary = None, None fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori. # 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...') print('2. computing/loading Gram matrix...')
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
# 3. perform k-nn CV. # 3. perform k-nn CV.
print('3. performing k-nn CV...') print('3. performing k-nn CV...')
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random':
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'best-dataset': elif train_examples == 'best-dataset':
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'trainset': elif train_examples == 'trainset':
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)


print('\ncomplete.\n') print('\ncomplete.\n')
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs Gn = dataset_all.graphs
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']


# get shuffles. # get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = [[], [], []] accuracies = [[], [], []]
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
mge_options['update_order'] = True mge_options['update_order'] = True
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0 mpg_options['runtime_precompute_gm'] = 0
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
mge_options['update_order'] = False mge_options['update_order'] = False
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0 mpg_options['runtime_precompute_gm'] = 0
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
_, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
medians[0].append(set_median) medians[0].append(set_median)
medians[1].append(gen_median) medians[1].append(gen_median)
medians[2].append(gen_median_uo) medians[2].append(gen_median_uo)
@@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
# compute dis_mat between medians. # compute dis_mat between medians.
dataset = dataset_all.copy() dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in G_app], targets=None) dataset.load_graphs([g.copy() for g in G_app], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix. # compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_medians = [] kernels_to_medians = []
for g in G_app: for g in G_app:
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
f_summary.close() f_summary.close()
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs Gn = dataset_all.graphs
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']


# get shuffles. # get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = [] accuracies = []
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
# compute dis_mat between medians. # compute dis_mat between medians.
dataset = dataset_all.copy() dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in best_graphs], targets=None) dataset.load_graphs([g.copy() for g in best_graphs], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix. # compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_best_graphs = [] kernels_to_best_graphs = []
for g in best_graphs: for g in best_graphs:
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
f_summary.close() f_summary.close()
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
y_all = dataset_all.targets y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']
@@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) dis_mat, _, _, _ = compute_distance_matrix(gram_matrix)


# get shuffles. # get shuffles.
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size)
accuracies = [] accuracies = []
for trial in range(len(train_indices)): for trial in range(len(train_indices)):
@@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
f_summary.close() f_summary.close()
def __get_shuffles(y_all, n_splits, test_size):
def _get_shuffles(y_all, n_splits, test_size):
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
train_indices = [[] for _ in range(n_splits)] train_indices = [[] for _ in range(n_splits)]
test_indices = [[] for _ in range(n_splits)] test_indices = [[] for _ in range(n_splits)]
@@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size):
return train_indices, test_indices, train_nums, keys return train_indices, test_indices, train_nums, keys
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
mpg = MedianPreimageGenerator() mpg = MedianPreimageGenerator()
mpg.dataset = dataset.copy() mpg.dataset = dataset.copy()
mpg.set_options(**mpg_options.copy()) mpg.set_options(**mpg_options.copy())
@@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option
return mpg.set_median, mpg.gen_median return mpg.set_median, mpg.gen_median




def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
if load_gm == 'auto': if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
time_precompute_gm = float(gmfile['run_time']) time_precompute_gm = float(gmfile['run_time'])
else: else:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
elif not load_gm: elif not load_gm:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
else: else:
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
@@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
return gram_matrix_unnorm, time_precompute_gm return gram_matrix_unnorm, time_precompute_gm




def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time return gram_matrix_unnorm, run_time
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv'


+ 284
- 283
gklearn/preimage/median_preimage_generator.py
File diff suppressed because it is too large
View File


+ 221
- 221
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
### arguments to set. ### arguments to set.
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__ds_name = None
self._mge = None
self._ged_options = {}
self._mge_options = {}
# self._fit_method = 'k-graphs'
self._init_method = 'random'
self._init_ecc = None
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._ds_name = None
# for cml. # for cml.
self.__time_limit_in_sec = 0
self.__max_itrs = 100
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__allow_zeros = True
# self.__triangle_rule = True
self._time_limit_in_sec = 0
self._max_itrs = 100
self._max_itrs_without_update = 3
self._epsilon_residual = 0.01
self._epsilon_ec = 0.1
self._allow_zeros = True
# self._triangle_rule = True
### values to compute. ### values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__set_median = None
self.__gen_median = None
self.__best_from_dataset = None
self.__sod_set_median = None
self.__sod_gen_median = None
self.__k_dis_set_median = None
self.__k_dis_gen_median = None
self.__k_dis_dataset = None
self.__node_label_costs = None
self.__edge_label_costs = None
self._runtime_optimize_ec = None
self._runtime_generate_preimage = None
self._runtime_total = None
self._set_median = None
self._gen_median = None
self._best_from_dataset = None
self._sod_set_median = None
self._sod_gen_median = None
self._k_dis_set_median = None
self._k_dis_gen_median = None
self._k_dis_dataset = None
self._node_label_costs = None
self._edge_label_costs = None
# for cml. # for cml.
self.__itrs = 0
self.__converged = False
self.__num_updates_ecs = 0
self._itrs = 0
self._converged = False
self._num_updates_ecs = 0
### values that can be set or to be computed. ### values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._edit_cost_constants = []
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None


def set_options(self, **kwargs): def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {}) self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None) self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__ds_name = kwargs.get('ds_name', None)
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 100)
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
self._ged_options = kwargs.get('ged_options', {})
self._mge_options = kwargs.get('mge_options', {})
# self._fit_method = kwargs.get('fit_method', 'k-graphs')
self._init_method = kwargs.get('init_method', 'random')
self._init_ecc = kwargs.get('init_ecc', None)
self._edit_cost_constants = kwargs.get('edit_cost_constants', [])
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._ds_name = kwargs.get('ds_name', None)
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 100)
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._allow_zeros = kwargs.get('allow_zeros', True)
# self._triangle_rule = kwargs.get('triangle_rule', True)
def run(self): def run(self):
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
start = time.time() start = time.time()
# 1. precompute gram matrix. # 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time() end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else: else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else: else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset':
# start = time.time() # start = time.time()
# self.__runtime_precompute_gm = 0
# self._runtime_precompute_gm = 0
# end_precompute_gm = start # end_precompute_gm = start
# 2. optimize edit cost constants. # 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
self._optimize_edit_cost_vector()
end_optimize_ec = time.time() end_optimize_ec = time.time()
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm
# 3. compute set median and gen median using optimized edit costs. # 3. compute set median and gen median using optimized edit costs.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing set median and gen median using optimized edit costs...\n') print('\nstart computing set median and gen median using optimized edit costs...\n')
self.__gmg_bcu()
self._gmg_bcu()
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self.__runtime_total = end_generate_preimage - start
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self._runtime_total = end_generate_preimage - start
if self._verbose >= 2: if self._verbose >= 2:
print('medians computed.') print('medians computed.')
print('SOD of the set median: ', self.__sod_set_median)
print('SOD of the generalized median: ', self.__sod_gen_median)
print('SOD of the set median: ', self._sod_set_median)
print('SOD of the generalized median: ', self._sod_gen_median)
# 4. compute kernel distances to the true median. # 4. compute kernel distances to the true median.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing distances to true median....\n') print('\nstart computing distances to true median....\n')
self.__compute_distances_to_true_median()
self._compute_distances_to_true_median()


# 5. print out results. # 5. print out results.
if self._verbose: if self._verbose:
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('================================================================================') print('================================================================================')
print('Finished generation of preimages.') print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------')
print('The optimized edit costs:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
print('SOD of the generalized median:', self.__sod_gen_median)
print('Distance in kernel space for set median:', self.__k_dis_set_median)
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to optimize edit costs:', self.__runtime_optimize_ec)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating edit costs:', self.__num_updates_ecs)
print('Is optimization of edit costs converged:', self.__converged)
print('The optimized edit costs:', self._edit_cost_constants)
print('SOD of the set median:', self._sod_set_median)
print('SOD of the generalized median:', self._sod_gen_median)
print('Distance in kernel space for set median:', self._k_dis_set_median)
print('Distance in kernel space for generalized median:', self._k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to optimize edit costs:', self._runtime_optimize_ec)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating edit costs:', self._num_updates_ecs)
print('Is optimization of edit costs converged:', self._converged)
print('================================================================================') print('================================================================================')
print() print()




def get_results(self): def get_results(self):
results = {} results = {}
results['edit_cost_constants'] = self.__edit_cost_constants
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_optimize_ec'] = self.__runtime_optimize_ec
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['sod_set_median'] = self.__sod_set_median
results['sod_gen_median'] = self.__sod_gen_median
results['k_dis_set_median'] = self.__k_dis_set_median
results['k_dis_gen_median'] = self.__k_dis_gen_median
results['k_dis_dataset'] = self.__k_dis_dataset
results['itrs'] = self.__itrs
results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecs
results['edit_cost_constants'] = self._edit_cost_constants
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_optimize_ec'] = self._runtime_optimize_ec
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['sod_set_median'] = self._sod_set_median
results['sod_gen_median'] = self._sod_gen_median
results['k_dis_set_median'] = self._k_dis_set_median
results['k_dis_gen_median'] = self._k_dis_gen_median
results['k_dis_dataset'] = self._k_dis_dataset
results['itrs'] = self._itrs
results['converged'] = self._converged
results['num_updates_ecc'] = self._num_updates_ecs
results['mge'] = {} results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents()
return results return results


def __optimize_edit_cost_vector(self):
def _optimize_edit_cost_vector(self):
"""Learn edit cost vector. """Learn edit cost vector.
""" """
# Initialize label costs randomly. # Initialize label costs randomly.
if self.__init_method == 'random':
if self._init_method == 'random':
# Initialize label costs. # Initialize label costs.
self.__initialize_label_costs()
self._initialize_label_costs()
# Optimize edit cost matrices. # Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
self._optimize_ecm_by_kernel_distances()
# Initialize all label costs with the same value. # Initialize all label costs with the same value.
elif self.__init_method == 'uniform': # random
elif self._init_method == 'uniform': # random
pass pass
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'LETTER2':
elif self._fit_method == 'random': # random
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = random.sample(range(1, 1000), 3)
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'LETTER2':
random.seed(time.time()) random.seed(time.time())
self.__edit_cost_constants = random.sample(range(1, 1000), 5)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 5)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._dataset.node_attrs == []: if self._dataset.node_attrs == []:
self.__edit_cost_constants[2] = 0
self._edit_cost_constants[2] = 0
if self._dataset.edge_attrs == []: if self._dataset.edge_attrs == []:
self.__edit_cost_constants[5] = 0
self._edit_cost_constants[5] = 0
else: else:
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._verbose >= 2: if self._verbose >= 2:
print('edit cost constants used:', self.__edit_cost_constants)
elif self.__fit_method == 'expert': # expert
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
print('edit cost constants used:', self._edit_cost_constants)
elif self._fit_method == 'expert': # expert
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
else: else:
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1]
self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
else: else:
self.__edit_cost_constants = self.__init_ecc
elif self.__fit_method == 'k-graphs':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__init_ecc = [0, 0, 1, 1, 1, 0]
self._edit_cost_constants = self._init_ecc
elif self._fit_method == 'k-graphs':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._init_ecc = [0, 0, 1, 1, 1, 0]
if self._dataset.node_attrs == []: if self._dataset.node_attrs == []:
self.__init_ecc[2] = 0
self._init_ecc[2] = 0
if self._dataset.edge_attrs == []: if self._dataset.edge_attrs == []:
self.__init_ecc[5] = 0
self._init_ecc[5] = 0
else: else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimize on the k-graph subset. # optimize on the k-graph subset.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'whole-dataset':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
self._optimize_ecm_by_kernel_distances()
elif self._fit_method == 'whole-dataset':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
else: else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimizeon the whole set. # optimizeon the whole set.
self.__optimize_ecc_by_kernel_distances()
elif self.__fit_method == 'precomputed':
self._optimize_ecc_by_kernel_distances()
elif self._fit_method == 'precomputed':
pass pass
def __initialize_label_costs(self):
self.__initialize_node_label_costs()
self.__initialize_edge_label_costs()
def _initialize_label_costs(self):
self._initialize_node_label_costs()
self._initialize_edge_label_costs()
def __initialize_node_label_costs(self):
def _initialize_node_label_costs(self):
# Get list of node labels. # Get list of node labels.
nls = self._dataset.get_all_node_labels() nls = self._dataset.get_all_node_labels()
# Generate random costs. # Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__node_label_costs = rand_costs
self._node_label_costs = rand_costs




def __initialize_edge_label_costs(self):
def _initialize_edge_label_costs(self):
# Get list of edge labels. # Get list of edge labels.
els = self._dataset.get_all_edge_labels() els = self._dataset.get_all_edge_labels()
# Generate random costs. # Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed. rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__edge_label_costs = rand_costs
self._edge_label_costs = rand_costs
def __optimize_ecm_by_kernel_distances(self):
def _optimize_ecm_by_kernel_distances(self):
# compute distances in feature space. # compute distances in feature space.
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
dis_k_vec = [] dis_k_vec = []
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
dis_k_vec = np.array(dis_k_vec) dis_k_vec = np.array(dis_k_vec)
# Set GEDEnv options. # Set GEDEnv options.
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
# self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed.
# graphs = [self._clean_graph(g) for g in self._dataset.graphs]
# self._edit_cost_constants = self._init_ecc
options = self._ged_options.copy()
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed.
options['node_labels'] = self._dataset.node_labels options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels options['edge_labels'] = self._dataset.edge_labels
# options['node_attrs'] = self._dataset.node_attrs # options['node_attrs'] = self._dataset.node_attrs
# options['edge_attrs'] = self._dataset.edge_attrs # options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
options['node_label_costs'] = self._node_label_costs
options['edge_label_costs'] = self._edge_label_costs
# Learner cost matrices. # Learner cost matrices.
# Initialize cost learner. # Initialize cost learner.
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec)
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec)
# Run cost learner. # Run cost learner.
cml.update(dis_k_vec, self._dataset.graphs, options) cml.update(dis_k_vec, self._dataset.graphs, options)
# Get results. # Get results.
results = cml.get_results() results = cml.get_results()
self.__converged = results['converged']
self.__itrs = results['itrs']
self.__num_updates_ecs = results['num_updates_ecs']
self._converged = results['converged']
self._itrs = results['itrs']
self._num_updates_ecs = results['num_updates_ecs']
cost_list = results['cost_list'] cost_list = results['cost_list']
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)]
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):]
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)]
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):]


def __gmg_bcu(self):
def _gmg_bcu(self):
""" """
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).


@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
# Set up the ged environment. # Set up the ged environment.
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
# gedlibpy.restart_env() # gedlibpy.restart_env()
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants)
graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants)
graphs = [self._clean_graph(g) for g in self._dataset.graphs]
for g in graphs: for g in graphs:
ged_env.add_nx_graph(g, '') ged_env.add_nx_graph(g, '')
graph_ids = ged_env.get_all_graph_ids() graph_ids = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels() node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels() edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels))
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels))
ged_env.set_label_costs(node_label_costs, edge_label_costs) ged_env.set_label_costs(node_label_costs, edge_label_costs)
set_median_id = ged_env.add_graph('set_median') set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median') gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_type=self.__ged_options['init_option'])
ged_env.init(init_type=self._ged_options['init_option'])
# Set up the madian graph estimator. # Set up the madian graph estimator.
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost']))
self._mge.set_refine_method(self._ged_options['method'], self._ged_options)
options = self._mge_options.copy()
if not 'seed' in options: if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
options['parallel'] = self.__parallel
options['parallel'] = self._parallel
# Select the GED algorithm. # Select the GED algorithm.
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
self._mge.set_options(mge_options_to_string(options))
self._mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels, edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs, node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs) edge_attrs=self._dataset.edge_attrs)
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options = self._ged_options.copy()
if self._parallel:
ged_options['threads'] = 1 ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
self._mge.set_init_method(ged_options['method'], ged_options)
self._mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator. # Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)
self._mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs. # Get SODs.
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
self._sod_set_median = self._mge.get_sum_of_distances('initialized')
self._sod_gen_median = self._mge.get_sum_of_distances('converged')
# Get median graphs. # Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id)
self.__gen_median = ged_env.get_nx_graph(gen_median_id)
self._set_median = ged_env.get_nx_graph(set_median_id)
self._gen_median = ged_env.get_nx_graph(gen_median_id)
def __compute_distances_to_true_median(self):
def _compute_distances_to_true_median(self):
# compute distance in kernel space for set median. # compute distance in kernel space for set median.
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernel_sm = 1 kernel_sm = 1
# @todo: not correct kernel value # @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_sm, withterm3=False) gram_with_sm, withterm3=False)
# compute distance in kernel space for generalized median. # compute distance in kernel space for generalized median.
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernel_gm = 1 kernel_gm = 1
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False) gram_with_gm, withterm3=False)
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False)) gram_with_gm, withterm3=False))
idx_k_dis_median_set_min = np.argmin(k_dis_median_set) idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
if self._verbose >= 2: if self._verbose >= 2:
print() print()
print('distance in kernel space for set median:', self.__k_dis_set_median)
print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('distance in kernel space for set median:', self._k_dis_set_median)
print('distance in kernel space for generalized median:', self._k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('distance in kernel space for each graph in median set:', k_dis_median_set) print('distance in kernel space for each graph in median set:', k_dis_median_set)
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
""" """
Cleans node and edge labels and attributes of the given graph. Cleans node and edge labels and attributes of the given graph.
""" """
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
@property @property
def mge(self): def mge(self):
return self.__mge
return self._mge
@property @property
def ged_options(self): def ged_options(self):
return self.__ged_options
return self._ged_options


@ged_options.setter @ged_options.setter
def ged_options(self, value): def ged_options(self, value):
self.__ged_options = value
self._ged_options = value


@property @property
def mge_options(self): def mge_options(self):
return self.__mge_options
return self._mge_options


@mge_options.setter @mge_options.setter
def mge_options(self, value): def mge_options(self, value):
self.__mge_options = value
self._mge_options = value




@property @property
def fit_method(self): def fit_method(self):
return self.__fit_method
return self._fit_method


@fit_method.setter @fit_method.setter
def fit_method(self, value): def fit_method(self, value):
self.__fit_method = value
self._fit_method = value
@property @property
def init_ecc(self): def init_ecc(self):
return self.__init_ecc
return self._init_ecc


@init_ecc.setter @init_ecc.setter
def init_ecc(self, value): def init_ecc(self, value):
self.__init_ecc = value
self._init_ecc = value
@property @property
def set_median(self): def set_median(self):
return self.__set_median
return self._set_median




@property @property
def gen_median(self): def gen_median(self):
return self.__gen_median
return self._gen_median
@property @property
def best_from_dataset(self): def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property @property
def gram_matrix_unnorm(self): def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter @gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value): def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 283
- 283
gklearn/preimage/median_preimage_generator_py.py
File diff suppressed because it is too large
View File


+ 109
- 109
gklearn/preimage/random_preimage_generator.py View File

@@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator):
def __init__(self, dataset=None): def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset) PreimageGenerator.__init__(self, dataset=dataset)
# arguments to set. # arguments to set.
self.__k = 5 # number of nearest neighbors of phi in D_N.
self.__r_max = 10 # maximum number of iterations.
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self.__alphas = None # weights of linear combinations of points in kernel space.
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__time_limit_in_sec = 0
self.__max_itrs = 20
self._k = 5 # number of nearest neighbors of phi in D_N.
self._r_max = 10 # maximum number of iterations.
self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self._alphas = None # weights of linear combinations of points in kernel space.
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._time_limit_in_sec = 0
self._max_itrs = 20
# values to compute. # values to compute.
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__preimage = None
self.__best_from_dataset = None
self.__k_dis_preimage = None
self.__k_dis_dataset = None
self.__itrs = 0
self.__converged = False # @todo
self.__num_updates = 0
self._runtime_generate_preimage = None
self._runtime_total = None
self._preimage = None
self._best_from_dataset = None
self._k_dis_preimage = None
self._k_dis_dataset = None
self._itrs = 0
self._converged = False # @todo
self._num_updates = 0
# values that can be set or to be computed. # values that can be set or to be computed.
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None


def set_options(self, **kwargs): def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {}) self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None) self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2) self._verbose = kwargs.get('verbose', 2)
self.__k = kwargs.get('k', 5)
self.__r_max = kwargs.get('r_max', 10)
self.__l = kwargs.get('l', 500)
self.__alphas = kwargs.get('alphas', None)
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 20)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._k = kwargs.get('k', 5)
self._r_max = kwargs.get('r_max', 10)
self._l = kwargs.get('l', 500)
self._alphas = kwargs.get('alphas', None)
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 20)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
def run(self): def run(self):
@@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator):
start = time.time() start = time.time()
# 1. precompute gram matrix. # 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time() end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else: else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else: else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time() end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# 2. compute k nearest neighbors of phi in D_N. # 2. compute k nearest neighbors of phi in D_N.
if self._verbose >= 2: if self._verbose >= 2:
print('\nstart computing k nearest neighbors of phi in D_N...\n') print('\nstart computing k nearest neighbors of phi in D_N...\n')
D_N = self._dataset.graphs D_N = self._dataset.graphs
if self.__alphas is None:
self.__alphas = [1 / len(D_N)] * len(D_N)
if self._alphas is None:
self._alphas = [1 / len(D_N)] * len(D_N)
k_dis_list = [] # distance between g_star and each graph. k_dis_list = [] # distance between g_star and each graph.
term3 = 0 term3 = 0
for i1, a1 in enumerate(self.__alphas):
for i2, a2 in enumerate(self.__alphas):
for i1, a1 in enumerate(self._alphas):
for i2, a2 in enumerate(self._alphas):
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2]
for idx in range(len(D_N)): for idx in range(len(D_N)):
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
# sort. # sort.
sort_idx = np.argsort(k_dis_list) sort_idx = np.argsort(k_dis_list)
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances.
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances.
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self.__k_dis_dataset = dis_gs[0]
self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self._k_dis_dataset = dis_gs[0]
if self.__k_dis_dataset == 0: # get the exact pre-image.
if self._k_dis_dataset == 0: # get the exact pre-image.
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = self.__best_from_dataset.copy()
self.__k_dis_preimage = self.__k_dis_dataset
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = self._best_from_dataset.copy()
self._k_dis_preimage = self._k_dis_dataset
if self._verbose: if self._verbose:
print() print()
print('=============================================================================') print('=============================================================================')
print('The exact pre-image is found from the input dataset.') print('The exact pre-image is found from the input dataset.')
print('-----------------------------------------------------------------------------') print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================') print('=============================================================================')
print() print()
return return
dhat = dis_gs[0] # the nearest distance dhat = dis_gs[0] # the nearest distance
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors
Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk]
# 3. start iterations. # 3. start iterations.
@@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator):
dihat_list = [] dihat_list = []
r = 0 r = 0
dis_of_each_itr = [dhat] dis_of_each_itr = [dhat]
if self.__parallel:
if self._parallel:
self._kernel_options['parallel'] = None self._kernel_options['parallel'] = None
self.__itrs = 0
self.__num_updates = 0
timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(timer, self.__itrs, r):
self._itrs = 0
self._num_updates = 0
timer = Timer(self._time_limit_in_sec)
while not self._termination_criterion_met(timer, self._itrs, r):
print('\n- r =', r) print('\n- r =', r)
found = False found = False
dis_bests = dis_gs + dihat_list dis_bests = dis_gs + dihat_list
@@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator):
nb_modif = 1 nb_modif = 1
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)):
nb_modif *= nb / (fdgs_max - idx) nb_modif *= nb / (fdgs_max - idx)
while fdgs_max < nb_vpairs_min and nb_modif < self.__l:
while fdgs_max < nb_vpairs_min and nb_modif < self._l:
fdgs_max += 1 fdgs_max += 1
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max
nb_increase = int(fdgs_max - fdgs_max_old) nb_increase = int(fdgs_max - fdgs_max_old)
@@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator):
for ig, gs in enumerate(Gs_nearest + gihat_list): for ig, gs in enumerate(Gs_nearest + gihat_list):
if self._verbose >= 2: if self._verbose >= 2:
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list))
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
if found: if found:
r = 0 r = 0
@@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator):
r += 1 r += 1
dis_of_each_itr.append(dhat) dis_of_each_itr.append(dhat)
self.__itrs += 1
self._itrs += 1
if self._verbose >= 2: if self._verbose >= 2:
print('Total number of iterations is', self.__itrs, '.')
print('The preimage is updated', self.__num_updates, 'times.')
print('Total number of iterations is', self._itrs, '.')
print('The preimage is updated', self._num_updates, 'times.')
print('The shortest distances for previous iterations are', dis_of_each_itr, '.') print('The shortest distances for previous iterations are', dis_of_each_itr, '.')
# get results and print. # get results and print.
end_generate_preimage = time.time() end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self.__k_dis_preimage = dhat
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self._k_dis_preimage = dhat
if self._verbose: if self._verbose:
print() print()
print('=============================================================================') print('=============================================================================')
print('Finished generation of preimages.') print('Finished generation of preimages.')
print('-----------------------------------------------------------------------------') print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset)
print('Distance in kernel space for the preimage:', self.__k_dis_preimage)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating preimage:', self.__num_updates)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset)
print('Distance in kernel space for the preimage:', self._k_dis_preimage)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating preimage:', self._num_updates)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================') print('=============================================================================')
print() print()
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self.__parallel:
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self._parallel:
gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
else: else:
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
return gnew, dhat, found return gnew, dhat, found
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None gnew = None
updated = False updated = False
for trial in range(0, self.__l):
for trial in range(0, self._l):
if self._verbose >= 2: if self._verbose >= 2:
print('---', trial + 1, 'trial out of', self.__l)
print('---', trial + 1, 'trial out of', self._l)


gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)


# get the better graph preimage. # get the better graph preimage.
if dnew <= dhat: # @todo: the new distance is smaller or also equal? if dnew <= dhat: # @todo: the new distance is smaller or also equal?
@@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator):
found = True # found better or equally good graph. found = True # found better or equally good graph.
if updated: if updated:
self.__num_updates += 1
self._num_updates += 1
return gnew, dhat, found return gnew, dhat, found
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None gnew = None
len_itr = self.__l
len_itr = self._l
gnew_list = [None] * len_itr gnew_list = [None] * len_itr
dnew_list = [None] * len_itr dnew_list = [None] * len_itr
itr = range(0, len_itr) itr = range(0, len_itr)
@@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator):
print('I am smaller!') print('I am smaller!')
print('index (as in D_k U {gihat}) =', str(ig)) print('index (as in D_k U {gihat}) =', str(ig))
print('distance:', dhat, '->', dnew, '\n') print('distance:', dhat, '->', dnew, '\n')
self.__num_updates += 1
self._num_updates += 1
else: else:
if self._verbose >= 2: if self._verbose >= 2:
print('I am equal!') print('I am equal!')
@@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator):
def _generate_graph_parallel(self, g_init, fdgs, term3, itr): def _generate_graph_parallel(self, g_init, fdgs, term3, itr):
trial = itr trial = itr
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)
return trial, gtemp, dnew return trial, gtemp, dnew
def __do_trial(self, g_init, fdgs, term3, trial):
def _do_trial(self, g_init, fdgs, term3, trial):
# add and delete edges. # add and delete edges.
gtemp = g_init.copy() gtemp = g_init.copy()
seed = (trial + int(time.time())) % (2 ** 32 - 1) seed = (trial + int(time.time())) % (2 ** 32 - 1)
@@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator):
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options)
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options)
if self._kernel_options['normalize']: if self._kernel_options['normalize']:
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernel_gtmp = 1 kernel_gtmp = 1
# @todo: not correct kernel value # @todo: not correct kernel value
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True)
return gtemp, dnew return gtemp, dnew


def get_results(self): def get_results(self):
results = {} results = {}
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['k_dis_dataset'] = self.__k_dis_dataset
results['k_dis_preimage'] = self.__k_dis_preimage
results['itrs'] = self.__itrs
results['num_updates'] = self.__num_updates
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['k_dis_dataset'] = self._k_dis_dataset
results['k_dis_preimage'] = self._k_dis_preimage
results['itrs'] = self._itrs
results['num_updates'] = self._num_updates
return results return results




def __termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
def _termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
# if self._state == AlgorithmState.TERMINATED:
# self._state = AlgorithmState.INITIALIZED
return True return True
return (r >= self.__r_max if self.__r_max >= 0 else False)
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
return (r >= self._r_max if self._r_max >= 0 else False)
# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)
@property @property
def preimage(self): def preimage(self):
return self.__preimage
return self._preimage
@property @property
def best_from_dataset(self): def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property @property
def gram_matrix_unnorm(self): def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter @gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value): def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 10
- 10
gklearn/preimage/remove_best_graph.py View File

@@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else: else:
fn_output_detail, fn_output_summary = None, None fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori. # 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...') print('2. computing/loading Gram matrix...')
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
sod_sm_list = [] sod_sm_list = []
sod_gm_list = [] sod_gm_list = []
@@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
# 3. get the best graph and remove it from median set. # 3. get the best graph and remove it from median set.
print('3. getting and removing the best graph...') print('3. getting and removing the best graph...')
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset]
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index]
num_graphs -= 1 num_graphs -= 1
if num_graphs == 1: if num_graphs == 1:
@@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
print('\ncomplete.\n') print('\ncomplete.\n')




def __get_best_graph(Gn, gram_matrix):
def _get_best_graph(Gn, gram_matrix):
k_dis_list = [] k_dis_list = []
for idx in range(len(Gn)): for idx in range(len(Gn)):
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False))
@@ -313,7 +313,7 @@ def get_relations(sign):
return 'worse' return 'worse'




def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
if load_gm == 'auto': if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = [] gram_matrix_unnorm_list = []
time_precompute_gm_list = [] time_precompute_gm_list = []
for dataset in datasets: for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm) gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm) time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = [] gram_matrix_unnorm_list = []
time_precompute_gm_list = [] time_precompute_gm_list = []
for dataset in datasets: for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm) gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm) time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
return gram_matrix_unnorm_list, time_precompute_gm_list return gram_matrix_unnorm_list, time_precompute_gm_list




def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
@@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time return gram_matrix_unnorm, run_time
def __init_output_file(ds_name, gkernel, fit_method, dir_output):
def _init_output_file(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 2
- 2
gklearn/preimage/utils.py View File

@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
if save_results: if save_results:
# create result files. # create result files.
print('creating output files...') print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
sod_sm_list = [] sod_sm_list = []
sod_gm_list = [] sod_gm_list = []
@@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
print('\ncomplete.\n') print('\ncomplete.\n')


def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output): if not os.path.exists(dir_output):
os.makedirs(dir_output) os.makedirs(dir_output)
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'


+ 244
- 244
gklearn/utils/dataset.py View File

@@ -16,54 +16,54 @@ class Dataset(object):
def __init__(self, filename=None, filename_targets=None, **kwargs): def __init__(self, filename=None, filename_targets=None, **kwargs):
if filename is None: if filename is None:
self.__graphs = None
self.__targets = None
self.__node_labels = None
self.__edge_labels = None
self.__node_attrs = None
self.__edge_attrs = None
self._graphs = None
self._targets = None
self._node_labels = None
self._edge_labels = None
self._node_attrs = None
self._edge_attrs = None
else: else:
self.load_dataset(filename, filename_targets=filename_targets, **kwargs) self.load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__substructures = None
self.__node_label_dim = None
self.__edge_label_dim = None
self.__directed = None
self.__dataset_size = None
self.__total_node_num = None
self.__ave_node_num = None
self.__min_node_num = None
self.__max_node_num = None
self.__total_edge_num = None
self.__ave_edge_num = None
self.__min_edge_num = None
self.__max_edge_num = None
self.__ave_node_degree = None
self.__min_node_degree = None
self.__max_node_degree = None
self.__ave_fill_factor = None
self.__min_fill_factor = None
self.__max_fill_factor = None
self.__node_label_nums = None
self.__edge_label_nums = None
self.__node_attr_dim = None
self.__edge_attr_dim = None
self.__class_number = None
self._substructures = None
self._node_label_dim = None
self._edge_label_dim = None
self._directed = None
self._dataset_size = None
self._total_node_num = None
self._ave_node_num = None
self._min_node_num = None
self._max_node_num = None
self._total_edge_num = None
self._ave_edge_num = None
self._min_edge_num = None
self._max_edge_num = None
self._ave_node_degree = None
self._min_node_degree = None
self._max_node_degree = None
self._ave_fill_factor = None
self._min_fill_factor = None
self._max_fill_factor = None
self._node_label_nums = None
self._edge_label_nums = None
self._node_attr_dim = None
self._edge_attr_dim = None
self._class_number = None
def load_dataset(self, filename, filename_targets=None, **kwargs): def load_dataset(self, filename, filename_targets=None, **kwargs):
self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()
def load_graphs(self, graphs, targets=None): def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels(). # this has to be followed by set_labels().
self.__graphs = graphs
self.__targets = targets
self._graphs = graphs
self._targets = targets
# self.set_labels_attrs() # @todo # self.set_labels_attrs() # @todo
@@ -71,108 +71,108 @@ class Dataset(object):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/' current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic': if ds_name == 'Acyclic':
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'AIDS': elif ds_name == 'AIDS':
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Alkane': elif ds_name == 'Alkane':
ds_file = current_path + '../../datasets/Alkane/dataset.ds' ds_file = current_path + '../../datasets/Alkane/dataset.ds'
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
elif ds_name == 'COIL-DEL': elif ds_name == 'COIL-DEL':
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COIL-RAG': elif ds_name == 'COIL-RAG':
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COLORS-3': elif ds_name == 'COLORS-3':
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Cuneiform': elif ds_name == 'Cuneiform':
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'DD': elif ds_name == 'DD':
ds_file = current_path + '../../datasets/DD/DD_A.txt' ds_file = current_path + '../../datasets/DD/DD_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'ENZYMES': elif ds_name == 'ENZYMES':
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Fingerprint': elif ds_name == 'Fingerprint':
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'FRANKENSTEIN': elif ds_name == 'FRANKENSTEIN':
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-high': # node non-symb elif ds_name == 'Letter-high': # node non-symb
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-low': # node non-symb elif ds_name == 'Letter-low': # node non-symb
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-med': # node non-symb elif ds_name == 'Letter-med': # node non-symb
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MAO': elif ds_name == 'MAO':
ds_file = current_path + '../../datasets/MAO/dataset.ds' ds_file = current_path + '../../datasets/MAO/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Monoterpenoides': elif ds_name == 'Monoterpenoides':
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MUTAG': elif ds_name == 'MUTAG':
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI1': elif ds_name == 'NCI1':
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI109': elif ds_name == 'NCI109':
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'PAH': elif ds_name == 'PAH':
ds_file = current_path + '../../datasets/PAH/dataset.ds' ds_file = current_path + '../../datasets/PAH/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'SYNTHETIC': elif ds_name == 'SYNTHETIC':
pass pass
elif ds_name == 'SYNTHETICnew': elif ds_name == 'SYNTHETICnew':
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Synthie': elif ds_name == 'Synthie':
pass pass
else: else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.') raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels() self.clean_labels()


def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
self.__node_labels = node_labels
self.__node_attrs = node_attrs
self.__edge_labels = edge_labels
self.__edge_attrs = edge_attrs
self._node_labels = node_labels
self._node_attrs = node_attrs
self._edge_labels = edge_labels
self._edge_attrs = edge_attrs


def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None):
# @todo: remove labels which have only one possible values. # @todo: remove labels which have only one possible values.
if node_labels is None: if node_labels is None:
self.__node_labels = self.__graphs[0].graph['node_labels']
self._node_labels = self._graphs[0].graph['node_labels']
# # graphs are considered node unlabeled if all nodes have the same label. # # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) # infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
if node_attrs is None: if node_attrs is None:
self.__node_attrs = self.__graphs[0].graph['node_attrs']
self._node_attrs = self._graphs[0].graph['node_attrs']
# for G in Gn: # for G in Gn:
# for n in G.nodes(data=True): # for n in G.nodes(data=True):
# if 'attributes' in n[1]: # if 'attributes' in n[1]:
# return len(n[1]['attributes']) # return len(n[1]['attributes'])
# return 0 # return 0
if edge_labels is None: if edge_labels is None:
self.__edge_labels = self.__graphs[0].graph['edge_labels']
self._edge_labels = self._graphs[0].graph['edge_labels']
# # graphs are considered edge unlabeled if all edges have the same label. # # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if edge_attrs is None: if edge_attrs is None:
self.__edge_attrs = self.__graphs[0].graph['edge_attrs']
self._edge_attrs = self._graphs[0].graph['edge_attrs']
# for G in Gn: # for G in Gn:
# if nx.number_of_edges(G) > 0: # if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True): # for e in G.edges(data=True):
@@ -291,145 +291,145 @@ class Dataset(object):
# dataset size # dataset size
if 'dataset_size' in keys: if 'dataset_size' in keys:
if self.__dataset_size is None:
self.__dataset_size = self.__get_dataset_size()
infos['dataset_size'] = self.__dataset_size
if self._dataset_size is None:
self._dataset_size = self._get_dataset_size()
infos['dataset_size'] = self._dataset_size
# graph node number # graph node number
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']):
all_node_nums = self.__get_all_node_nums()
all_node_nums = self._get_all_node_nums()


if 'total_node_num' in keys: if 'total_node_num' in keys:
if self.__total_node_num is None:
self.__total_node_num = self.__get_total_node_num(all_node_nums)
infos['total_node_num'] = self.__total_node_num
if self._total_node_num is None:
self._total_node_num = self._get_total_node_num(all_node_nums)
infos['total_node_num'] = self._total_node_num
if 'ave_node_num' in keys: if 'ave_node_num' in keys:
if self.__ave_node_num is None:
self.__ave_node_num = self.__get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self.__ave_node_num
if self._ave_node_num is None:
self._ave_node_num = self._get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self._ave_node_num
if 'min_node_num' in keys: if 'min_node_num' in keys:
if self.__min_node_num is None:
self.__min_node_num = self.__get_min_node_num(all_node_nums)
infos['min_node_num'] = self.__min_node_num
if self._min_node_num is None:
self._min_node_num = self._get_min_node_num(all_node_nums)
infos['min_node_num'] = self._min_node_num
if 'max_node_num' in keys: if 'max_node_num' in keys:
if self.__max_node_num is None:
self.__max_node_num = self.__get_max_node_num(all_node_nums)
infos['max_node_num'] = self.__max_node_num
if self._max_node_num is None:
self._max_node_num = self._get_max_node_num(all_node_nums)
infos['max_node_num'] = self._max_node_num
# graph edge number # graph edge number
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']):
all_edge_nums = self.__get_all_edge_nums()
all_edge_nums = self._get_all_edge_nums()


if 'total_edge_num' in keys: if 'total_edge_num' in keys:
if self.__total_edge_num is None:
self.__total_edge_num = self.__get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self.__total_edge_num
if self._total_edge_num is None:
self._total_edge_num = self._get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self._total_edge_num
if 'ave_edge_num' in keys: if 'ave_edge_num' in keys:
if self.__ave_edge_num is None:
self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self.__ave_edge_num
if self._ave_edge_num is None:
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self._ave_edge_num
if 'max_edge_num' in keys: if 'max_edge_num' in keys:
if self.__max_edge_num is None:
self.__max_edge_num = self.__get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self.__max_edge_num
if self._max_edge_num is None:
self._max_edge_num = self._get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self._max_edge_num


if 'min_edge_num' in keys: if 'min_edge_num' in keys:
if self.__min_edge_num is None:
self.__min_edge_num = self.__get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self.__min_edge_num
if self._min_edge_num is None:
self._min_edge_num = self._get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self._min_edge_num
# label number # label number
if 'node_label_dim' in keys: if 'node_label_dim' in keys:
if self.__node_label_dim is None:
self.__node_label_dim = self.__get_node_label_dim()
infos['node_label_dim'] = self.__node_label_dim
if self._node_label_dim is None:
self._node_label_dim = self._get_node_label_dim()
infos['node_label_dim'] = self._node_label_dim
if 'node_label_nums' in keys: if 'node_label_nums' in keys:
if self.__node_label_nums is None:
self.__node_label_nums = {}
for node_label in self.__node_labels:
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label)
infos['node_label_nums'] = self.__node_label_nums
if self._node_label_nums is None:
self._node_label_nums = {}
for node_label in self._node_labels:
self._node_label_nums[node_label] = self._get_node_label_num(node_label)
infos['node_label_nums'] = self._node_label_nums
if 'edge_label_dim' in keys: if 'edge_label_dim' in keys:
if self.__edge_label_dim is None:
self.__edge_label_dim = self.__get_edge_label_dim()
infos['edge_label_dim'] = self.__edge_label_dim
if self._edge_label_dim is None:
self._edge_label_dim = self._get_edge_label_dim()
infos['edge_label_dim'] = self._edge_label_dim
if 'edge_label_nums' in keys: if 'edge_label_nums' in keys:
if self.__edge_label_nums is None:
self.__edge_label_nums = {}
for edge_label in self.__edge_labels:
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label)
infos['edge_label_nums'] = self.__edge_label_nums
if self._edge_label_nums is None:
self._edge_label_nums = {}
for edge_label in self._edge_labels:
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label)
infos['edge_label_nums'] = self._edge_label_nums
if 'directed' in keys or 'substructures' in keys: if 'directed' in keys or 'substructures' in keys:
if self.__directed is None:
self.__directed = self.__is_directed()
infos['directed'] = self.__directed
if self._directed is None:
self._directed = self._is_directed()
infos['directed'] = self._directed
# node degree # node degree
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']):
all_node_degrees = self.__get_all_node_degrees()
all_node_degrees = self._get_all_node_degrees()
if 'ave_node_degree' in keys: if 'ave_node_degree' in keys:
if self.__ave_node_degree is None:
self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self.__ave_node_degree
if self._ave_node_degree is None:
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self._ave_node_degree
if 'max_node_degree' in keys: if 'max_node_degree' in keys:
if self.__max_node_degree is None:
self.__max_node_degree = self.__get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self.__max_node_degree
if self._max_node_degree is None:
self._max_node_degree = self._get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self._max_node_degree
if 'min_node_degree' in keys: if 'min_node_degree' in keys:
if self.__min_node_degree is None:
self.__min_node_degree = self.__get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self.__min_node_degree
if self._min_node_degree is None:
self._min_node_degree = self._get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self._min_node_degree
# fill factor # fill factor
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']):
all_fill_factors = self.__get_all_fill_factors()
all_fill_factors = self._get_all_fill_factors()
if 'ave_fill_factor' in keys: if 'ave_fill_factor' in keys:
if self.__ave_fill_factor is None:
self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self.__ave_fill_factor
if self._ave_fill_factor is None:
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self._ave_fill_factor
if 'max_fill_factor' in keys: if 'max_fill_factor' in keys:
if self.__max_fill_factor is None:
self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self.__max_fill_factor
if self._max_fill_factor is None:
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self._max_fill_factor
if 'min_fill_factor' in keys: if 'min_fill_factor' in keys:
if self.__min_fill_factor is None:
self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self.__min_fill_factor
if self._min_fill_factor is None:
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self._min_fill_factor
if 'substructures' in keys: if 'substructures' in keys:
if self.__substructures is None:
self.__substructures = self.__get_substructures()
infos['substructures'] = self.__substructures
if self._substructures is None:
self._substructures = self._get_substructures()
infos['substructures'] = self._substructures
if 'class_number' in keys: if 'class_number' in keys:
if self.__class_number is None:
self.__class_number = self.__get_class_number()
infos['class_number'] = self.__class_number
if self._class_number is None:
self._class_number = self._get_class_number()
infos['class_number'] = self._class_number
if 'node_attr_dim' in keys: if 'node_attr_dim' in keys:
if self.__node_attr_dim is None:
self.__node_attr_dim = self.__get_node_attr_dim()
infos['node_attr_dim'] = self.__node_attr_dim
if self._node_attr_dim is None:
self._node_attr_dim = self._get_node_attr_dim()
infos['node_attr_dim'] = self._node_attr_dim
if 'edge_attr_dim' in keys: if 'edge_attr_dim' in keys:
if self.__edge_attr_dim is None:
self.__edge_attr_dim = self.__get_edge_attr_dim()
infos['edge_attr_dim'] = self.__edge_attr_dim
if self._edge_attr_dim is None:
self._edge_attr_dim = self._get_edge_attr_dim()
infos['edge_attr_dim'] = self._edge_attr_dim
# entropy of degree distribution. # entropy of degree distribution.
@@ -438,14 +438,14 @@ class Dataset(object):
base = params['all_degree_entropy']['base'] base = params['all_degree_entropy']['base']
else: else:
base = None base = None
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base)
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base)
if 'ave_degree_entropy' in keys: if 'ave_degree_entropy' in keys:
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']):
base = params['ave_degree_entropy']['base'] base = params['ave_degree_entropy']['base']
else: else:
base = None base = None
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base))
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base))
return infos return infos
@@ -457,12 +457,12 @@ class Dataset(object):
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
node_labels = [item for item in node_labels if item in self.__node_labels]
edge_labels = [item for item in edge_labels if item in self.__edge_labels]
node_attrs = [item for item in node_attrs if item in self.__node_attrs]
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs]
node_labels = [item for item in node_labels if item in self._node_labels]
edge_labels = [item for item in edge_labels if item in self._edge_labels]
node_attrs = [item for item in node_attrs if item in self._node_attrs]
edge_attrs = [item for item in edge_attrs if item in self._edge_attrs]


for g in self.__graphs:
for g in self._graphs:
for nd in g.nodes(): for nd in g.nodes():
for nl in node_labels: for nl in node_labels:
del g.nodes[nd][nl] del g.nodes[nd][nl]
@@ -474,99 +474,99 @@ class Dataset(object):
for ea in edge_attrs: for ea in edge_attrs:
del g.edges[ed][ea] del g.edges[ed][ea]
if len(node_labels) > 0: if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
self._node_labels = [nl for nl in self._node_labels if nl not in node_labels]
if len(edge_labels) > 0: if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
self._edge_labels = [el for el in self._edge_labels if el not in edge_labels]
if len(node_attrs) > 0: if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs]
if len(edge_attrs) > 0: if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs]
def clean_labels(self): def clean_labels(self):
labels = [] labels = []
for name in self.__node_labels:
for name in self._node_labels:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values()) label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes(): for nd in G.nodes():
del G.nodes[nd][name] del G.nodes[nd][name]
self.__node_labels = labels
self._node_labels = labels


labels = [] labels = []
for name in self.__edge_labels:
for name in self._edge_labels:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values()) label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges(): for ed in G.edges():
del G.edges[ed][name] del G.edges[ed][name]
self.__edge_labels = labels
self._edge_labels = labels


labels = [] labels = []
for name in self.__node_attrs:
for name in self._node_attrs:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values()) label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes(): for nd in G.nodes():
del G.nodes[nd][name] del G.nodes[nd][name]
self.__node_attrs = labels
self._node_attrs = labels


labels = [] labels = []
for name in self.__edge_attrs:
for name in self._edge_attrs:
label = set() label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values()) label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1: if len(label) > 1:
labels.append(name) labels.append(name)
break break
if len(label) < 2: if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges(): for ed in G.edges():
del G.edges[ed][name] del G.edges[ed][name]
self.__edge_attrs = labels
self._edge_attrs = labels
def cut_graphs(self, range_): def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_]
if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_]
self._graphs = [self._graphs[i] for i in range_]
if self._targets is not None:
self._targets = [self._targets[i] for i in range_]
self.clean_labels() self.clean_labels()




def trim_dataset(self, edge_required=False): def trim_dataset(self, edge_required=False):
if edge_required: if edge_required:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
else: else:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0]
idx = [p[0] for p in trimed_pairs] idx = [p[0] for p in trimed_pairs]
self.__graphs = [p[1] for p in trimed_pairs]
self.__targets = [self.__targets[i] for i in idx]
self._graphs = [p[1] for p in trimed_pairs]
self._targets = [self._targets[i] for i in idx]
self.clean_labels() self.clean_labels()
def copy(self): def copy(self):
dataset = Dataset() dataset = Dataset()
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None
target = self.__targets.copy() if self.__targets is not None else None
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None
target = self._targets.copy() if self._targets is not None else None
node_labels = self._node_labels.copy() if self._node_labels is not None else None
node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None
edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None
edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None
dataset.load_graphs(graphs, target) dataset.load_graphs(graphs, target)
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs)
# @todo: clean_labels and add other class members? # @todo: clean_labels and add other class members?
@@ -575,7 +575,7 @@ class Dataset(object):
def get_all_node_labels(self): def get_all_node_labels(self):
node_labels = [] node_labels = []
for g in self.__graphs:
for g in self._graphs:
for n in g.nodes(): for n in g.nodes():
nl = tuple(g.nodes[n].items()) nl = tuple(g.nodes[n].items())
if nl not in node_labels: if nl not in node_labels:
@@ -585,7 +585,7 @@ class Dataset(object):
def get_all_edge_labels(self): def get_all_edge_labels(self):
edge_labels = [] edge_labels = []
for g in self.__graphs:
for g in self._graphs:
for e in g.edges(): for e in g.edges():
el = tuple(g.edges[e].items()) el = tuple(g.edges[e].items())
if el not in edge_labels: if el not in edge_labels:
@@ -593,93 +593,93 @@ class Dataset(object):
return edge_labels return edge_labels
def __get_dataset_size(self):
return len(self.__graphs)
def _get_dataset_size(self):
return len(self._graphs)
def __get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self.__graphs]
def _get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self._graphs]
def __get_total_node_nums(self, all_node_nums):
def _get_total_node_nums(self, all_node_nums):
return np.sum(all_node_nums) return np.sum(all_node_nums)
def __get_ave_node_num(self, all_node_nums):
def _get_ave_node_num(self, all_node_nums):
return np.mean(all_node_nums) return np.mean(all_node_nums)
def __get_min_node_num(self, all_node_nums):
def _get_min_node_num(self, all_node_nums):
return np.amin(all_node_nums) return np.amin(all_node_nums)
def __get_max_node_num(self, all_node_nums):
def _get_max_node_num(self, all_node_nums):
return np.amax(all_node_nums) return np.amax(all_node_nums)
def __get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self.__graphs]
def _get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self._graphs]
def __get_total_edge_nums(self, all_edge_nums):
def _get_total_edge_nums(self, all_edge_nums):
return np.sum(all_edge_nums) return np.sum(all_edge_nums)
def __get_ave_edge_num(self, all_edge_nums):
def _get_ave_edge_num(self, all_edge_nums):
return np.mean(all_edge_nums) return np.mean(all_edge_nums)
def __get_min_edge_num(self, all_edge_nums):
def _get_min_edge_num(self, all_edge_nums):
return np.amin(all_edge_nums) return np.amin(all_edge_nums)
def __get_max_edge_num(self, all_edge_nums):
def _get_max_edge_num(self, all_edge_nums):
return np.amax(all_edge_nums) return np.amax(all_edge_nums)
def __get_node_label_dim(self):
return len(self.__node_labels)
def _get_node_label_dim(self):
return len(self._node_labels)
def __get_node_label_num(self, node_label):
def _get_node_label_num(self, node_label):
nl = set() nl = set()
for G in self.__graphs:
for G in self._graphs:
nl = nl | set(nx.get_node_attributes(G, node_label).values()) nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl) return len(nl)
def __get_edge_label_dim(self):
return len(self.__edge_labels)
def _get_edge_label_dim(self):
return len(self._edge_labels)
def __get_edge_label_num(self, edge_label):
def _get_edge_label_num(self, edge_label):
el = set() el = set()
for G in self.__graphs:
for G in self._graphs:
el = el | set(nx.get_edge_attributes(G, edge_label).values()) el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el) return len(el)
def __is_directed(self):
return nx.is_directed(self.__graphs[0])
def _is_directed(self):
return nx.is_directed(self._graphs[0])
def __get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs]
def _get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs]
def __get_ave_node_degree(self, all_node_degrees):
def _get_ave_node_degree(self, all_node_degrees):
return np.mean(all_node_degrees) return np.mean(all_node_degrees)
def __get_max_node_degree(self, all_node_degrees):
def _get_max_node_degree(self, all_node_degrees):
return np.amax(all_node_degrees) return np.amax(all_node_degrees)
def __get_min_node_degree(self, all_node_degrees):
def _get_min_node_degree(self, all_node_degrees):
return np.amin(all_node_degrees) return np.amin(all_node_degrees)
def __get_all_fill_factors(self):
def _get_all_fill_factors(self):
"""Get fill factor, the number of non-zero entries in the adjacency matrix. """Get fill factor, the number of non-zero entries in the adjacency matrix.


Returns Returns
@@ -687,24 +687,24 @@ class Dataset(object):
list[float] list[float]
List of fill factors for all graphs. List of fill factors for all graphs.
""" """
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs]
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs]


def __get_ave_fill_factor(self, all_fill_factors):
def _get_ave_fill_factor(self, all_fill_factors):
return np.mean(all_fill_factors) return np.mean(all_fill_factors)
def __get_max_fill_factor(self, all_fill_factors):
def _get_max_fill_factor(self, all_fill_factors):
return np.amax(all_fill_factors) return np.amax(all_fill_factors)
def __get_min_fill_factor(self, all_fill_factors):
def _get_min_fill_factor(self, all_fill_factors):
return np.amin(all_fill_factors) return np.amin(all_fill_factors)
def __get_substructures(self):
def _get_substructures(self):
subs = set() subs = set()
for G in self.__graphs:
for G in self._graphs:
degrees = list(dict(G.degree()).values()) degrees = list(dict(G.degree()).values())
if any(i == 2 for i in degrees): if any(i == 2 for i in degrees):
subs.add('linear') subs.add('linear')
@@ -713,8 +713,8 @@ class Dataset(object):
if 'linear' in subs and 'non linear' in subs: if 'linear' in subs and 'non linear' in subs:
break break


if self.__directed:
for G in self.__graphs:
if self._directed:
for G in self._graphs:
if len(list(nx.find_cycle(G))) > 0: if len(list(nx.find_cycle(G))) > 0:
subs.add('cyclic') subs.add('cyclic')
break break
@@ -737,19 +737,19 @@ class Dataset(object):
return subs return subs
def __get_class_num(self):
return len(set(self.__targets))
def _get_class_num(self):
return len(set(self._targets))
def __get_node_attr_dim(self):
return len(self.__node_attrs)
def _get_node_attr_dim(self):
return len(self._node_attrs)
def __get_edge_attr_dim(self):
return len(self.__edge_attrs)
def _get_edge_attr_dim(self):
return len(self._edge_attrs)


def __compute_all_degree_entropy(self, base=None):
def _compute_all_degree_entropy(self, base=None):
"""Compute the entropy of degree distribution of each graph. """Compute the entropy of degree distribution of each graph.


Parameters Parameters
@@ -765,7 +765,7 @@ class Dataset(object):
from gklearn.utils.stats import entropy from gklearn.utils.stats import entropy
degree_entropy = [] degree_entropy = []
for g in self.__graphs:
for g in self._graphs:
degrees = list(dict(g.degree()).values()) degrees = list(dict(g.degree()).values())
en = entropy(degrees, base=base) en = entropy(degrees, base=base)
degree_entropy.append(en) degree_entropy.append(en)
@@ -774,32 +774,32 @@ class Dataset(object):
@property @property
def graphs(self): def graphs(self):
return self.__graphs
return self._graphs




@property @property
def targets(self): def targets(self):
return self.__targets
return self._targets
@property @property
def node_labels(self): def node_labels(self):
return self.__node_labels
return self._node_labels




@property @property
def edge_labels(self): def edge_labels(self):
return self.__edge_labels
return self._edge_labels
@property @property
def node_attrs(self): def node_attrs(self):
return self.__node_attrs
return self._node_attrs
@property @property
def edge_attrs(self): def edge_attrs(self):
return self.__edge_attrs
return self._edge_attrs
def split_dataset_by_target(dataset): def split_dataset_by_target(dataset):


+ 4
- 4
gklearn/utils/graph_files.py View File

@@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names # remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(float(tmp[1])) y.append(float(tmp[1]))
else: # targets in a seperate file else: # targets in a seperate file
for i in range(0, len(content)): for i in range(0, len(content)):
@@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names # remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
with open(filename_targets) as fnt: with open(filename_targets) as fnt:
content_y = fnt.read().splitlines() content_y = fnt.read().splitlines()
@@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None):
mol_class = graph.attrib['class'] mol_class = graph.attrib['class']
g, l_names = load_gxl(dir_dataset + '/' + mol_filename) g, l_names = load_gxl(dir_dataset + '/' + mol_filename)
data.append(g) data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(mol_class) y.append(mol_class)
return data, y, label_names return data, y, label_names




def __append_label_names(label_names, new_names):
def _append_label_names(label_names, new_names):
for key, val in label_names.items(): for key, val in label_names.items():
label_names[key] += [name for name in new_names[key] if name not in val] label_names[key] += [name for name in new_names[key] if name not in val]


+ 2
- 2
gklearn/utils/knn.py View File

@@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
y_all = dataset.targets y_all = dataset.targets
# compute kernel distances. # compute kernel distances.
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset)
dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset)
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
@@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
return results return results
def __compute_kernel_distances(dataset, kernel_options, trainset=None):
def _compute_kernel_distances(dataset, kernel_options, trainset=None):
graph_kernel = get_graph_kernel_by_name(kernel_options['name'], graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels, node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels, edge_labels=dataset.edge_labels,


+ 5
- 5
gklearn/utils/timer.py View File

@@ -23,8 +23,8 @@ class Timer(object):
time_limit_in_sec : string time_limit_in_sec : string
The time limit in seconds. The time limit in seconds.
""" """
self.__time_limit_in_sec = time_limit_in_sec
self.__start_time = time.time()
self._time_limit_in_sec = time_limit_in_sec
self._start_time = time.time()
def expired(self): def expired(self):
@@ -34,7 +34,7 @@ class Timer(object):
------ ------
Boolean true if the time limit has expired and false otherwise. Boolean true if the time limit has expired and false otherwise.
""" """
if self.__time_limit_in_sec > 0:
runtime = time.time() - self.__start_time
return runtime >= self.__time_limit_in_sec
if self._time_limit_in_sec > 0:
runtime = time.time() - self._start_time
return runtime >= self._time_limit_in_sec
return False return False

Loading…
Cancel
Save