Browse Source

Refactor: deprecate the usage of "__" for the "private" members and methods, use "_" instead.

v0.2.x
jajupmochi 4 years ago
parent
commit
bc5b8b0d25
21 changed files with 1593 additions and 1592 deletions
  1. +51
    -51
      gklearn/kernels/common_walk.py
  2. +7
    -7
      gklearn/kernels/graph_kernel.py
  3. +45
    -45
      gklearn/kernels/marginalized.py
  4. +80
    -80
      gklearn/kernels/path_up_to_h.py
  5. +4
    -4
      gklearn/kernels/random_walk_meta.py
  6. +33
    -33
      gklearn/kernels/shortest_path.py
  7. +56
    -56
      gklearn/kernels/structural_sp.py
  8. +85
    -85
      gklearn/kernels/treelet.py
  9. +41
    -41
      gklearn/kernels/weisfeiler_lehman.py
  10. +2
    -2
      gklearn/preimage/generate_random_preimages_by_class.py
  11. +25
    -25
      gklearn/preimage/kernel_knn_cv.py
  12. +284
    -283
      gklearn/preimage/median_preimage_generator.py
  13. +221
    -221
      gklearn/preimage/median_preimage_generator_cml.py
  14. +283
    -283
      gklearn/preimage/median_preimage_generator_py.py
  15. +109
    -109
      gklearn/preimage/random_preimage_generator.py
  16. +10
    -10
      gklearn/preimage/remove_best_graph.py
  17. +2
    -2
      gklearn/preimage/utils.py
  18. +244
    -244
      gklearn/utils/dataset.py
  19. +4
    -4
      gklearn/utils/graph_files.py
  20. +2
    -2
      gklearn/utils/knn.py
  21. +5
    -5
      gklearn/utils/timer.py

+ 51
- 51
gklearn/kernels/common_walk.py View File

@@ -26,18 +26,18 @@ class CommonWalk(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__weight = kwargs.get('weight', 1)
self.__compute_method = kwargs.get('compute_method', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__compute_method = self.__compute_method.lower()
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._weight = kwargs.get('weight', 1)
self._compute_method = kwargs.get('compute_method', None)
self._ds_infos = kwargs.get('ds_infos', {})
self._compute_method = self._compute_method.lower()


def _compute_gm_series(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
@@ -51,15 +51,15 @@ class CommonWalk(GraphKernel):
iterator = itr
# direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i, j in iterator:
kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i, j in iterator:
kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight)
kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight)
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
@@ -67,9 +67,9 @@ class CommonWalk(GraphKernel):
def _compute_gm_imap_unordered(self):
self.__check_graphs(self._graphs)
self.__add_dummy_labels(self._graphs)
if not self.__ds_infos['directed']: # convert
self._check_graphs(self._graphs)
self._add_dummy_labels(self._graphs)
if not self._ds_infos['directed']: # convert
self._graphs = [G.to_directed() for G in self._graphs]
# compute Gram matrix.
@@ -80,10 +80,10 @@ class CommonWalk(GraphKernel):
# G_gn = gn_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_do_geo
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm,
@@ -93,9 +93,9 @@ class CommonWalk(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
@@ -107,23 +107,23 @@ class CommonWalk(GraphKernel):
iterator = range(len(g_list))
# direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
for i in iterator:
kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight)
kernel = self._kernel_do_exp(g1, g_list[i], self._weight)
kernel_list[i] = kernel
# direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
for i in iterator:
kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight)
kernel = self._kernel_do_geo(g1, g_list[i], self._weight)
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__check_graphs(g_list + [g1])
self.__add_dummy_labels(g_list + [g1])
if not self.__ds_infos['directed']: # convert
self._check_graphs(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed()
g_list = [G.to_directed() for G in g_list]
@@ -136,10 +136,10 @@ class CommonWalk(GraphKernel):
# G_g_list = g_list_toshare
# direct product graph method - exponential
if self.__compute_method == 'exp':
if self._compute_method == 'exp':
do_fun = self._wrapper_kernel_list_do_exp
# direct product graph method - geometric
elif self.__compute_method == 'geo':
elif self._compute_method == 'geo':
do_fun = self._wrapper_kernel_list_do_geo
def func_assign(result, var_to_assign):
@@ -154,31 +154,31 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_list_do_exp(self, itr):
return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight)


def _wrapper_kernel_list_do_geo(self, itr):
return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight)
return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight)
def _compute_single_kernel_series(self, g1, g2):
self.__check_graphs([g1] + [g2])
self.__add_dummy_labels([g1] + [g2])
if not self.__ds_infos['directed']: # convert
self._check_graphs([g1] + [g2])
self._add_dummy_labels([g1] + [g2])
if not self._ds_infos['directed']: # convert
g1 = g1.to_directed()
g2 = g2.to_directed()
# direct product graph method - exponential
if self.__compute_method == 'exp':
kernel = self.__kernel_do_exp(g1, g2, self.__weight)
if self._compute_method == 'exp':
kernel = self._kernel_do_exp(g1, g2, self._weight)
# direct product graph method - geometric
elif self.__compute_method == 'geo':
kernel = self.__kernel_do_geo(g1, g2, self.__weight)
elif self._compute_method == 'geo':
kernel = self._kernel_do_geo(g1, g2, self._weight)

return kernel
def __kernel_do_exp(self, g1, g2, beta):
def _kernel_do_exp(self, g1, g2, beta):
"""Compute common walk graph kernel between 2 graphs using exponential
series.
@@ -195,7 +195,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
@@ -227,10 +227,10 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_exp(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight)
def __kernel_do_geo(self, g1, g2, gamma):
def _kernel_do_geo(self, g1, g2, gamma):
"""Compute common walk graph kernel between 2 graphs using geometric
series.
@@ -247,7 +247,7 @@ class CommonWalk(GraphKernel):
The common walk Kernel between 2 graphs.
"""
# get tensor product / direct product
gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels)
gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels)
# return 0 if the direct product graph have no more than 1 node.
if nx.number_of_nodes(gp) < 2:
return 0
@@ -262,24 +262,24 @@ class CommonWalk(GraphKernel):
def _wrapper_kernel_do_geo(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight)
return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight)
def __check_graphs(self, Gn):
def _check_graphs(self, Gn):
for g in Gn:
if nx.number_of_nodes(g) == 1:
raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.')
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]
def _init_worker_gm(gn_toshare):


+ 7
- 7
gklearn/kernels/graph_kernel.py View File

@@ -37,7 +37,7 @@ class GraphKernel(object):
raise Exception('The graph list given is empty. No computation was performed.')
else:
self._graphs = [g.copy() for g in graphs[0]]
self._gram_matrix = self.__compute_gram_matrix()
self._gram_matrix = self._compute_gram_matrix()
self._gram_matrix_unnorm = np.copy(self._gram_matrix)
if self._normalize:
self._gram_matrix = self.normalize_gm(self._gram_matrix)
@@ -45,17 +45,17 @@ class GraphKernel(object):
elif len(graphs) == 2:
if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy())
kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy())
return kernel, self._run_time
elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
g1 = graphs[0].copy()
g_list = [g.copy() for g in graphs[1]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time
elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
g1 = graphs[1].copy()
g_list = [g.copy() for g in graphs[0]]
kernel_list = self.__compute_kernel_list(g1, g_list)
kernel_list = self._compute_kernel_list(g1, g_list)
return kernel_list, self._run_time
else:
raise Exception('Cannot detect graphs.')
@@ -99,7 +99,7 @@ class GraphKernel(object):
return dis_mat, dis_max, dis_min, dis_mean
def __compute_gram_matrix(self):
def _compute_gram_matrix(self):
start_time = time.time()
if self._parallel == 'imap_unordered':
@@ -125,7 +125,7 @@ class GraphKernel(object):
pass
def __compute_kernel_list(self, g1, g_list):
def _compute_kernel_list(self, g1, g_list):
start_time = time.time()
if self._parallel == 'imap_unordered':
@@ -151,7 +151,7 @@ class GraphKernel(object):
pass
def __compute_single_kernel(self, g1, g2):
def _compute_single_kernel(self, g1, g2):
start_time = time.time()
kernel = self._compute_single_kernel_series(g1, g2)


+ 45
- 45
gklearn/kernels/marginalized.py View File

@@ -33,25 +33,25 @@ class Marginalized(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__p_quit = kwargs.get('p_quit', 0.5)
self.__n_iteration = kwargs.get('n_iteration', 10)
self.__remove_totters = kwargs.get('remove_totters', False)
self.__ds_infos = kwargs.get('ds_infos', {})
self.__n_iteration = int(self.__n_iteration)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._p_quit = kwargs.get('p_quit', 0.5)
self._n_iteration = kwargs.get('n_iteration', 10)
self._remove_totters = kwargs.get('remove_totters', False)
self._ds_infos = kwargs.get('ds_infos', {})
self._n_iteration = int(self._n_iteration)


def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
if self._verbose >= 2:
iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout)
else:
iterator = self._graphs
# @todo: this may not work.
self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -63,7 +63,7 @@ class Marginalized(GraphKernel):
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(self._graphs[i], self._graphs[j])
kernel = self._kernel_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -71,9 +71,9 @@ class Marginalized(GraphKernel):
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
if self.__remove_totters:
if self._remove_totters:
pool = Pool(self._n_jobs)
itr = range(0, len(self._graphs))
if len(self._graphs) < 100 * self._n_jobs:
@@ -105,16 +105,16 @@ class Marginalized(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
if self._verbose >= 2:
iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout)
else:
iterator = g_list
# @todo: this may not work.
g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator]
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -123,17 +123,17 @@ class Marginalized(GraphKernel):
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(g1, g_list[i])
kernel = self._kernel_do(g1, g_list[i])
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs)
itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs:
@@ -171,19 +171,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_g1, G_g_list[itr])
return itr, self._kernel_do(G_g1, G_g_list[itr])
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__remove_totters:
g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels)
kernel = self.__kernel_do(g1, g2)
self._add_dummy_labels([g1] + [g2])
if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
g2 = untotterTransformation(g2, self._node_labels, self._edge_labels)
kernel = self._kernel_do(g1, g2)
return kernel
def __kernel_do(self, g1, g2):
def _kernel_do(self, g1, g2):
"""Compute marginalized graph kernel between 2 graphs.
Parameters
@@ -205,7 +205,7 @@ class Marginalized(GraphKernel):
p_init_G1 = 1 / num_nodes_G1
p_init_G2 = 1 / num_nodes_G2
q = self.__p_quit * self.__p_quit
q = self._p_quit * self._p_quit
r1 = q
# # initial R_inf
@@ -260,36 +260,36 @@ class Marginalized(GraphKernel):
if len(g2[node2]) > 0:
R_inf[(node1, node2)] = r1
else:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else:
if len(g2[node2]) > 0:
R_inf[(node1, node2)] = self.__p_quit
R_inf[(node1, node2)] = self._p_quit
else:
R_inf[(node1, node2)] = 1
# compute all transition probability first.
t_dict = {}
if self.__n_iteration > 1:
if self._n_iteration > 1:
for node1 in g1.nodes():
neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks
# generating step (uniform distribution over the vertices adjacent
# to the current vertex)
if len(neighbor_n1) > 0:
p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1)
p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1)
for node2 in g2.nodes():
neighbor_n2 = g2[node2]
if len(neighbor_n2) > 0:
p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2)
p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2)
for neighbor1 in neighbor_n1:
for neighbor2 in neighbor_n2:
t_dict[(node1, node2, neighbor1, neighbor2)] = \
p_trans_n1 * p_trans_n2 * \
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels))
# Compute R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1):
for i in range(2, self._n_iteration + 1):
R_inf_old = R_inf.copy()
# Compute R_inf for each pair of nodes
@@ -311,7 +311,7 @@ class Marginalized(GraphKernel):
# add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels))
kernel += s * value # ref [1] equation (6)
return kernel
@@ -320,19 +320,19 @@ class Marginalized(GraphKernel):
def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_gn[i], G_gn[j])
return i, j, self._kernel_do(G_gn[i], G_gn[j])

def _wrapper_untotter(self, i):
return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work.
return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work.
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 80
- 80
gklearn/kernels/path_up_to_h.py View File

@@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__depth = int(kwargs.get('depth', 10))
self.__k_func = kwargs.get('k_func', 'MinMax')
self.__compute_method = kwargs.get('compute_method', 'trie')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._depth = int(kwargs.get('depth', 10))
self._k_func = kwargs.get('k_func', 'MinMax')
self._compute_method = kwargs.get('compute_method', 'trie')
self._ds_infos = kwargs.get('ds_infos', {})


def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
from itertools import combinations_with_replacement
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
@@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

if self.__compute_method == 'trie':
all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
if self._compute_method == 'trie':
all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel:
kernel = self.__kernel_do_trie(all_paths[i], all_paths[j])
kernel = self._kernel_do_trie(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else:
all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps]
for i, j in iterator_kernel:
kernel = self.__kernel_do_naive(all_paths[i], all_paths[j])
kernel = self._kernel_do_naive(all_paths[i], all_paths[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
@@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets.
@@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else:
chunksize = 100
all_paths = [[] for _ in range(len(self._graphs))]
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else:
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
@@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
if self.__compute_method == 'trie' and self.__k_func is not None:
if self._compute_method == 'trie' and self._k_func is not None:
def init_worker(trie_toshare):
global G_trie
G_trie = trie_toshare
do_fun = self._wrapper_kernel_do_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
elif self._compute_method != 'trie' and self._k_func is not None:
def init_worker(plist_toshare):
global G_plist
G_plist = plist_toshare
@@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def init_worker(plist_toshare):
global G_plist
G_plist = plist_toshare
do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this?
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)
@@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
if self._verbose >= 2:
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
@@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel_list = [None] * len(g_list)

if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps]
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps]
for i in iterator_kernel:
kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i])
kernel = self._kernel_do_trie(paths_g1, paths_g_list[i])
kernel_list[i] = kernel
else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps]
paths_g1 = self._find_all_paths_until_length(g1)
paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps]
for i in iterator_kernel:
kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i])
kernel = self._kernel_do_naive(paths_g1, paths_g_list[i])
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets.
@@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else:
chunksize = 100
paths_g_list = [[] for _ in range(len(g_list))]
if self.__compute_method == 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_path_as_trie(g1)
if self._compute_method == 'trie' and self._k_func is not None:
paths_g1 = self._find_all_path_as_trie(g1)
get_ps_fun = self._wrapper_find_all_path_as_trie
elif self.__compute_method != 'trie' and self.__k_func is not None:
paths_g1 = self.__find_all_paths_until_length(g1)
elif self._compute_method != 'trie' and self._k_func is not None:
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True)
else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g1 = self._find_all_paths_until_length(g1)
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
if self._verbose >= 2:
iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize),
@@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_list_do(self, itr):
if self.__compute_method == 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_trie(G_p1, G_plist[itr])
elif self.__compute_method != 'trie' and self.__k_func is not None:
return itr, self.__kernel_do_naive(G_p1, G_plist[itr])
if self._compute_method == 'trie' and self._k_func is not None:
return itr, self._kernel_do_trie(G_p1, G_plist[itr])
elif self._compute_method != 'trie' and self._k_func is not None:
return itr, self._kernel_do_naive(G_p1, G_plist[itr])
else:
return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr])
return itr, self._kernel_do_kernelless(G_p1, G_plist[itr])
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
if self.__compute_method == 'trie':
paths_g1 = self.__find_all_path_as_trie(g1)
paths_g2 = self.__find_all_path_as_trie(g2)
kernel = self.__kernel_do_trie(paths_g1, paths_g2)
self._add_dummy_labels([g1] + [g2])
if self._compute_method == 'trie':
paths_g1 = self._find_all_path_as_trie(g1)
paths_g2 = self._find_all_path_as_trie(g2)
kernel = self._kernel_do_trie(paths_g1, paths_g2)
else:
paths_g1 = self.__find_all_paths_until_length(g1)
paths_g2 = self.__find_all_paths_until_length(g2)
kernel = self.__kernel_do_naive(paths_g1, paths_g2)
paths_g1 = self._find_all_paths_until_length(g1)
paths_g2 = self._find_all_paths_until_length(g2)
kernel = self._kernel_do_naive(paths_g1, paths_g2)
return kernel

def __kernel_do_trie(self, trie1, trie2):
def _kernel_do_trie(self, trie1, trie2):
"""Compute path graph kernels up to depth d between 2 graphs using trie.
Parameters
@@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel : float
Path kernel up to h between 2 graphs.
"""
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
# traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied.
def traverseTrie1t(root, trie2, setlist, pcurrent=[]):
@@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# print(setlist)
kernel = setlist[0] / setlist[1]
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
# traverse all paths in graph1 and search them in graph2. Deep-first
# search is applied.
def traverseTrie1m(root, trie2, sumlist, pcurrent=[]):
@@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_trie(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j])
return i, j, self._kernel_do_trie(G_trie[i], G_trie[j])
def __kernel_do_naive(self, paths1, paths2):
def _kernel_do_naive(self, paths1, paths2):
"""Compute path graph kernels up to depth d between 2 graphs naively.
Parameters
@@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
"""
all_paths = list(set(paths1 + paths2))
if self.__k_func == 'tanimoto':
if self._k_func == 'tanimoto':
length_union = len(set(paths1 + paths2))
kernel = (len(set(paths1)) + len(set(paths2)) -
length_union) / length_union
@@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# kernel_uv = np.dot(vector1, vector2)
# kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv)
elif self.__k_func == 'MinMax': # MinMax kernel
elif self._k_func == 'MinMax': # MinMax kernel
path_count1 = Counter(paths1)
path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
kernel = np.sum(np.minimum(vector1, vector2)) / \
np.sum(np.maximum(vector1, vector2))
elif self.__k_func is None: # no sub-kernel used; compare paths directly.
elif self._k_func is None: # no sub-kernel used; compare paths directly.
path_count1 = Counter(paths1)
path_count2 = Counter(paths2)
vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0)
@@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_kernel_do_naive(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j])
return i, j, self._kernel_do_naive(G_plist[i], G_plist[j])
def __find_all_path_as_trie(self, G):
def _find_all_path_as_trie(self, G):
# all_path = find_all_paths_until_length(G, length, ds_attrs,
# node_label=node_label,
# edge_label=edge_label)
@@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# them. Deep-first search is applied. Notice the reverse of each path is
# also stored to the trie.
def traverseGraph(root, ptrie, G, pcurrent=[]):
if len(pcurrent) < self.__depth + 1:
if len(pcurrent) < self._depth + 1:
for neighbor in G[root]:
if neighbor not in pcurrent:
pcurrent.append(neighbor)
plstr = self.__paths2labelseqs([pcurrent], G)
plstr = self._paths2labelseqs([pcurrent], G)
ptrie.insertWord(plstr[0])
traverseGraph(neighbor, ptrie, G, pcurrent)
del pcurrent[-1]
@@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
ptrie = Trie()
path_l = [[n] for n in G.nodes] # paths of length l
path_l_str = self.__paths2labelseqs(path_l, G)
path_l_str = self._paths2labelseqs(path_l, G)
for p in path_l_str:
ptrie.insertWord(p)
for n in G.nodes:
@@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _wrapper_find_all_path_as_trie(self, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, self.__find_all_path_as_trie(g)
return i, self._find_all_path_as_trie(g)
# @todo: (can be removed maybe) this method find paths repetively, it could be faster.
def __find_all_paths_until_length(self, G, tolabelseqs=True):
def _find_all_paths_until_length(self, G, tolabelseqs=True):
"""Find all paths no longer than a certain maximum length in a graph. A
recursive depth first search is applied.
@@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
"""
# path_l = [tuple([n]) for n in G.nodes] # paths of length l
# all_paths = path_l[:]
# for l in range(1, self.__depth + 1):
# for l in range(1, self._depth + 1):
# path_l_new = []
# for path in path_l:
# for neighbor in G[path[-1]]:
@@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
path_l = [[n] for n in G.nodes] # paths of length l
all_paths = [p.copy() for p in path_l]
for l in range(1, self.__depth + 1):
for l in range(1, self._depth + 1):
path_lplus1 = []
for path in path_l:
for neighbor in G[path[-1]]:
@@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
all_paths += path_lplus1
path_l = [p.copy() for p in path_lplus1]
# for i in range(0, self.__depth + 1):
# for i in range(0, self._depth + 1):
# new_paths = find_all_paths(G, i)
# if new_paths == []:
# break
@@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# consider labels
# print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label))
# print()
return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths)
def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs)
def __paths2labelseqs(self, plist, G):
if len(self.__node_labels) > 0:
if len(self.__edge_labels) > 0:
def _paths2labelseqs(self, plist, G):
if len(self._node_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = []
for path in plist:
pths_tmp = []
for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp))
else:
path_strs = []
for path in plist:
pths_tmp = []
for node in path:
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
path_strs.append(tuple(pths_tmp))
return path_strs
else:
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
path_strs = []
for path in plist:
if len(path) == 1:
@@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
else:
pths_tmp = []
for idx, node in enumerate(path[:-1]):
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels))
pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels))
path_strs.append(tuple(pths_tmp))
return path_strs
else:
@@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
# return [tuple([len(path)]) for path in all_paths]
def __add_dummy_labels(self, Gn):
if self.__k_func is not None:
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if self._k_func is not None:
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 4
- 4
gklearn/kernels/random_walk_meta.py View File

@@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel):
def _add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 33
- 33
gklearn/kernels/shortest_path.py View File

@@ -26,11 +26,11 @@ class ShortestPath(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._ds_infos = kwargs.get('ds_infos', {})


def _compute_gm_series(self):
@@ -39,7 +39,7 @@ class ShortestPath(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else:
iterator = self._graphs
self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -51,7 +51,7 @@ class ShortestPath(GraphKernel):
else:
iterator = itr
for i, j in iterator:
kernel = self.__sp_do(self._graphs[i], self._graphs[j])
kernel = self._sp_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else:
iterator = g_list
g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator]
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -106,7 +106,7 @@ class ShortestPath(GraphKernel):
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__sp_do(g1, g_list[i])
kernel = self._sp_do(g1, g_list[i])
kernel_list[i] = kernel
return kernel_list
@@ -114,7 +114,7 @@ class ShortestPath(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
pool = Pool(self._n_jobs)
get_sp_graphs_fun = self._wrapper_get_sp_graphs
itr = zip(g_list, range(0, len(g_list)))
@@ -151,55 +151,55 @@ class ShortestPath(GraphKernel):
def _wrapper_kernel_list_do(self, itr):
return itr, self.__sp_do(G_g1, G_gl[itr])
return itr, self._sp_do(G_g1, G_gl[itr])
def _compute_single_kernel_series(self, g1, g2):
g1 = getSPGraph(g1, edge_weight=self.__edge_weight)
g2 = getSPGraph(g2, edge_weight=self.__edge_weight)
kernel = self.__sp_do(g1, g2)
g1 = getSPGraph(g1, edge_weight=self._edge_weight)
g2 = getSPGraph(g2, edge_weight=self._edge_weight)
kernel = self._sp_do(g1, g2)
return kernel
def _wrapper_get_sp_graphs(self, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, getSPGraph(g, edge_weight=self.__edge_weight)
return i, getSPGraph(g, edge_weight=self._edge_weight)
def __sp_do(self, g1, g2):
def _sp_do(self, g1, g2):
kernel = 0
# compute shortest path matrices first, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
if len(self._node_labels) > 0:
# node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
if len(self._node_attrs) > 0:
kn = self._node_kernels['mix']
for n1, n2 in product(
g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
kn = self.__node_kernels['symb']
kn = self._node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_labels = [n1[1][nl] for nl in self._node_labels]
n2_labels = [n2[1][nl] for nl in self._node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
if len(self._node_attrs) > 0:
kn = self._node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
n1_attrs = [n1[1][na] for na in self._node_attrs]
n2_attrs = [n2[1][na] for na in self._node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled
else:
@@ -210,7 +210,7 @@ class ShortestPath(GraphKernel):
return kernel
# compute graph kernels
if self.__ds_infos['directed']:
if self._ds_infos['directed']:
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
if e1[2]['cost'] == e2[2]['cost']:
nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])]
@@ -261,4 +261,4 @@ class ShortestPath(GraphKernel):
def _wrapper_sp_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__sp_do(G_gs[i], G_gs[j])
return i, j, self._sp_do(G_gs[i], G_gs[j])

+ 56
- 56
gklearn/kernels/structural_sp.py View File

@@ -26,15 +26,15 @@ class StructuralSP(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__node_attrs = kwargs.get('node_attrs', [])
self.__edge_attrs = kwargs.get('edge_attrs', [])
self.__edge_weight = kwargs.get('edge_weight', None)
self.__node_kernels = kwargs.get('node_kernels', None)
self.__edge_kernels = kwargs.get('edge_kernels', None)
self.__compute_method = kwargs.get('compute_method', 'naive')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._node_attrs = kwargs.get('node_attrs', [])
self._edge_attrs = kwargs.get('edge_attrs', [])
self._edge_weight = kwargs.get('edge_weight', None)
self._node_kernels = kwargs.get('node_kernels', None)
self._edge_kernels = kwargs.get('edge_kernels', None)
self._compute_method = kwargs.get('compute_method', 'naive')
self._ds_infos = kwargs.get('ds_infos', {})


def _compute_gm_series(self):
@@ -44,12 +44,12 @@ class StructuralSP(GraphKernel):
iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout)
else:
iterator = self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else:
for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -60,14 +60,14 @@ class StructuralSP(GraphKernel):
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i, j in iterator:
kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel
else:
for i, j in iterator:
kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j])
# if(kernel > 1):
# print("error here ")
gram_matrix[i][j] = kernel
@@ -86,7 +86,7 @@ class StructuralSP(GraphKernel):
else:
chunksize = 100
# get shortest path graphs of self._graphs
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie
else:
get_sps_fun = self._wrapper_get_sps_naive
@@ -107,8 +107,8 @@ class StructuralSP(GraphKernel):
global G_spl, G_gs
G_spl = spl_toshare
G_gs = gs_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else:
do_fun = self._wrapper_ssp_do_naive
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
@@ -119,18 +119,18 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = []
if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout)
else:
iterator = g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for g in iterator:
splist.append(self.__get_sps_as_trie(g))
splist.append(self._get_sps_as_trie(g))
else:
for g in iterator:
splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']))
splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']))
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -138,13 +138,13 @@ class StructuralSP(GraphKernel):
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
for i in iterator:
kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel
else:
for i in iterator:
kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i])
kernel_list[i] = kernel
return kernel_list
@@ -152,7 +152,7 @@ class StructuralSP(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list):
# get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [None] * len(g_list)
pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list)))
@@ -161,7 +161,7 @@ class StructuralSP(GraphKernel):
else:
chunksize = 100
# get shortest path graphs of g_list
if self.__compute_method == 'trie':
if self._compute_method == 'trie':
get_sps_fun = self._wrapper_get_sps_trie
else:
get_sps_fun = self._wrapper_get_sps_naive
@@ -184,8 +184,8 @@ class StructuralSP(GraphKernel):
G_spl = spl_toshare
G_g1 = g1_toshare
G_gl = gl_toshare
if self.__compute_method == 'trie':
do_fun = self.__wrapper_ssp_do_trie
if self._compute_method == 'trie':
do_fun = self._wrapper_ssp_do_trie
else:
do_fun = self._wrapper_kernel_list_do
def func_assign(result, var_to_assign):
@@ -199,36 +199,36 @@ class StructuralSP(GraphKernel):
def _wrapper_kernel_list_do(self, itr):
return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])
return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr])

def _compute_single_kernel_series(self, g1, g2):
sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed'])
sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed'])
if self.__compute_method == 'trie':
kernel = self.__ssp_do_trie(g1, g2, sp1, sp2)
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed'])
if self._compute_method == 'trie':
kernel = self._ssp_do_trie(g1, g2, sp1, sp2)
else:
kernel = self.__ssp_do_naive(g1, g2, sp1, sp2)
kernel = self._ssp_do_naive(g1, g2, sp1, sp2)
return kernel
def _wrapper_get_sps_naive(self, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])
return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])
def __ssp_do_naive(self, g1, g2, spl1, spl2):
def _ssp_do_naive(self, g1, g2, spl1, spl2):
kernel = 0
# First, compute shortest path matrices, method borrowed from FCSP.
vk_dict = self.__get_all_node_kernels(g1, g2)
vk_dict = self._get_all_node_kernels(g1, g2)
# Then, compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict = self.__get_all_edge_kernels(g1, g2)
ek_dict = self._get_all_edge_kernels(g1, g2)
# compute graph kernels
if vk_dict:
@@ -314,27 +314,27 @@ class StructuralSP(GraphKernel):
def _wrapper_ssp_do_naive(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j])
def __get_all_node_kernels(self, g1, g2):
def _get_all_node_kernels(self, g1, g2):
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2):
def _get_all_edge_kernels(self, g1, g2):
# compute kernels between all pairs of edges, which is an idea of
# extension of FCSP. It suits sparse graphs, which is the most case we
# went though. For dense graphs, this would be slow.
ek_dict = {} # dict of edge kernels
if len(self.__edge_labels) > 0:
if len(self._edge_labels) > 0:
# edge symb and non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['mix']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['mix']
for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -342,11 +342,11 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
# edge symb labeled
else:
ke = self.__edge_kernels['symb']
ke = self._edge_kernels['symb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
e1_labels = [e1[2][el] for el in self.__edge_labels]
e2_labels = [e2[2][el] for el in self.__edge_labels]
e1_labels = [e1[2][el] for el in self._edge_labels]
e2_labels = [e2[2][el] for el in self._edge_labels]
ek_temp = ke(e1_labels, e2_labels)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp
@@ -354,12 +354,12 @@ class StructuralSP(GraphKernel):
ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp
else:
# edge non-synb labeled
if len(self.__edge_attrs) > 0:
ke = self.__edge_kernels['nsymb']
if len(self._edge_attrs) > 0:
ke = self._edge_kernels['nsymb']
for e1 in g1.edges(data=True):
for e2 in g2.edges(data=True):
e1_attrs = [e1[2][ea] for ea in self.__edge_attrs]
e2_attrs = [e2[2][ea] for ea in self.__edge_attrs]
e1_attrs = [e1[2][ea] for ea in self._edge_attrs]
e2_attrs = [e2[2][ea] for ea in self._edge_attrs]
ek_temp = ke(e1_attrs, e2_attrs)
ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp
ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp


+ 85
- 85
gklearn/kernels/treelet.py View File

@@ -28,16 +28,16 @@ class Treelet(GraphKernel):
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__sub_kernel = kwargs.get('sub_kernel', None)
self.__ds_infos = kwargs.get('ds_infos', {})
if self.__sub_kernel is None:
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._sub_kernel = kwargs.get('sub_kernel', None)
self._ds_infos = kwargs.get('ds_infos', {})
if self._sub_kernel is None:
raise Exception('Sub kernel not set.')


def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
@@ -47,7 +47,7 @@ class Treelet(GraphKernel):
else:
iterator = self._graphs
for g in iterator:
canonkeys.append(self.__get_canonkeys(g))
canonkeys.append(self._get_canonkeys(g))
# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
@@ -59,7 +59,7 @@ class Treelet(GraphKernel):
else:
iterator = itr
for i, j in iterator:
kernel = self.__kernel_do(canonkeys[i], canonkeys[j])
kernel = self._kernel_do(canonkeys[i], canonkeys[j])
gram_matrix[i][j] = kernel
gram_matrix[j][i] = kernel # @todo: no directed graph considered?
@@ -67,7 +67,7 @@ class Treelet(GraphKernel):
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
self._add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
@@ -103,18 +103,18 @@ class Treelet(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = []
if self._verbose >= 2:
iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout)
else:
iterator = g_list
for g in iterator:
canonkeys_list.append(self.__get_canonkeys(g))
canonkeys_list.append(self._get_canonkeys(g))
# compute kernel list.
kernel_list = [None] * len(g_list)
@@ -123,18 +123,18 @@ class Treelet(GraphKernel):
else:
iterator = range(len(g_list))
for i in iterator:
kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i])
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i])
kernel_list[i] = kernel
return kernel_list
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
self._add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))]
pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list)))
@@ -173,18 +173,18 @@ class Treelet(GraphKernel):
def _wrapper_kernel_list_do(self, itr):
return itr, self.__kernel_do(G_ck_1, G_ck_list[itr])
return itr, self._kernel_do(G_ck_1, G_ck_list[itr])
def _compute_single_kernel_series(self, g1, g2):
self.__add_dummy_labels([g1] + [g2])
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_2 = self.__get_canonkeys(g2)
kernel = self.__kernel_do(canonkeys_1, canonkeys_2)
self._add_dummy_labels([g1] + [g2])
canonkeys_1 = self._get_canonkeys(g1)
canonkeys_2 = self._get_canonkeys(g2)
kernel = self._kernel_do(canonkeys_1, canonkeys_2)
return kernel
def __kernel_do(self, canonkey1, canonkey2):
def _kernel_do(self, canonkey1, canonkey2):
"""Compute treelet graph kernel between 2 graphs.
Parameters
@@ -200,17 +200,17 @@ class Treelet(GraphKernel):
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
kernel = self.__sub_kernel(vector1, vector2)
kernel = self._sub_kernel(vector1, vector2)
return kernel
def _wrapper_kernel_do(self, itr):
i = itr[0]
j = itr[1]
return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j])
return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j])
def __get_canonkeys(self, G):
def _get_canonkeys(self, G):
"""Generate canonical keys of all treelets in a graph.
Parameters
@@ -236,7 +236,7 @@ class Treelet(GraphKernel):
patterns['0'] = list(G.nodes())
canonkey['0'] = nx.number_of_nodes(G)
for i in range(1, 6): # for i in range(1, 6):
patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed'])
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed'])
canonkey[str(i)] = len(patterns[str(i)])
# n-star patterns
@@ -330,11 +330,11 @@ class Treelet(GraphKernel):
### pattern obtained in the structural analysis section above, which is a
### string corresponding to a unique treelet. A dictionary is built to keep
### track of the amount of every treelet.
if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0:
if len(self._node_labels) > 0 or len(self._edge_labels) > 0:
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet.
# linear patterns
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels))
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels))
for key in canonkey_t:
canonkey_l[('0', key)] = canonkey_t[key]
@@ -343,9 +343,9 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i)]:
canonlist = []
for idx, node in enumerate(pattern[:-1]):
canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels))
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels))
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1]
treelet.append(tuple([str(i)] + canonkey_t))
canonkey_l.update(Counter(treelet))
@@ -356,13 +356,13 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i) + 'star']:
canonlist = []
for leaf in pattern[1:]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels)))
canonlist.sort()
canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] +
[tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonlist)
treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet))
@@ -372,17 +372,17 @@ class Treelet(GraphKernel):
for pattern in patterns['7']:
canonlist = []
for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels)))
canonlist.sort()
canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['7']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)])
treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet))
@@ -391,38 +391,38 @@ class Treelet(GraphKernel):
for pattern in patterns['11']:
canonlist = []
for leaf in pattern[1:4]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels)))
canonlist.sort()
canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['b']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)])
treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet))
# pattern 10
treelet = []
for pattern in patterns['10']:
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]
canonlist = []
for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist.append(tuple((nlabels, elabels)))
canonlist.sort()
canonkey0 = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['a']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonkey4 + canonkey0)
treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet))
@@ -432,15 +432,15 @@ class Treelet(GraphKernel):
for pattern in patterns['12']:
canonlist0 = []
for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
canonlist0.append(tuple((nlabels, elabels)))
canonlist0.sort()
canonlist0 = list(chain.from_iterable(canonlist0))
canonlist3 = []
for leaf in pattern[4:6]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels)
canonlist3.append(tuple((nlabels, elabels)))
canonlist3.sort()
canonlist3 = list(chain.from_iterable(canonlist3))
@@ -448,14 +448,14 @@ class Treelet(GraphKernel):
# 2 possible key can be generated from 2 nodes with extended label 3,
# select the one with lower lexicographic order.
canonkey_t1 = tuple(['c']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ canonlist3)
canonkey_t2 = tuple(['c']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ canonlist0)
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)
canonkey_l.update(Counter(treelet))
@@ -463,24 +463,24 @@ class Treelet(GraphKernel):
# pattern 9
treelet = []
for pattern in patterns['9']:
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)]
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
if prekey2 + canonkey2 < prekey3 + canonkey3:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey2 + prekey3 + canonkey2 + canonkey3
else:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
+ prekey3 + prekey2 + canonkey3 + canonkey2
treelet.append(tuple(['9']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ canonkey_t))
canonkey_l.update(Counter(treelet))
@@ -492,15 +492,15 @@ class Treelet(GraphKernel):
def _wrapper_get_canonkeys(self, itr_item):
g = itr_item[0]
i = itr_item[1]
return i, self.__get_canonkeys(g)
return i, self._get_canonkeys(g)
def __add_dummy_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY):
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__edge_labels = [SpecialLabel.DUMMY]
self._edge_labels = [SpecialLabel.DUMMY]

+ 41
- 41
gklearn/kernels/weisfeiler_lehman.py View File

@@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __init__(self, **kwargs):
GraphKernel.__init__(self)
self.__node_labels = kwargs.get('node_labels', [])
self.__edge_labels = kwargs.get('edge_labels', [])
self.__height = int(kwargs.get('height', 0))
self.__base_kernel = kwargs.get('base_kernel', 'subtree')
self.__ds_infos = kwargs.get('ds_infos', {})
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._height = int(kwargs.get('height', 0))
self._base_kernel = kwargs.get('base_kernel', 'subtree')
self._ds_infos = kwargs.get('ds_infos', {})


def _compute_gm_series(self):
@@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings
warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(self._graphs)
self._add_dummy_node_labels(self._graphs)
# for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(self._graphs)
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(self._graphs)
# for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(self._graphs)
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(self._graphs)
# for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(self._graphs)
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(self._graphs)
# for user defined base kernel
else:
gram_matrix = self.__user_kernel_do(self._graphs)
gram_matrix = self._user_kernel_do(self._graphs)
return gram_matrix
@@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
import warnings
warnings.warn('A part of the computation is parallelized.')
self.__add_dummy_node_labels(g_list + [g1])
self._add_dummy_node_labels(g_list + [g1])
# for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do(g_list + [g1])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(g_list + [g1])
# for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do(g_list + [g1])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(g_list + [g1])
# for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do(g_list + [g1])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(g_list + [g1])
# for user defined base kernel
else:
gram_matrix = self.__user_kernel_do(g_list + [g1])
gram_matrix = self._user_kernel_do(g_list + [g1])
return list(gram_matrix[-1][0:-1])
@@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better.
self.__add_dummy_node_labels([g1] + [g2])
self._add_dummy_node_labels([g1] + [g2])

# for WL subtree kernel
if self.__base_kernel == 'subtree':
gram_matrix = self.__subtree_kernel_do([g1] + [g2])
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do([g1] + [g2])
# for WL shortest path kernel
elif self.__base_kernel == 'sp':
gram_matrix = self.__sp_kernel_do([g1] + [g2])
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do([g1] + [g2])
# for WL edge kernel
elif self.__base_kernel == 'edge':
gram_matrix = self.__edge_kernel_do([g1] + [g2])
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do([g1] + [g2])
# for user defined base kernel
else:
gram_matrix = self.__user_kernel_do([g1] + [g2])
gram_matrix = self._user_kernel_do([g1] + [g2])
return gram_matrix[0][1]
def __subtree_kernel_do(self, Gn):
def _subtree_kernel_do(self, Gn):
"""Compute Weisfeiler-Lehman kernels between graphs.
Parameters
@@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for G in Gn:
# set all labels into a tuple.
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels)
G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
# get the set of original labels
labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
# number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori)))
# Compute subtree kernel with the 0th iteration and add it to the final kernel.
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
# iterate each height
for h in range(1, self.__height + 1):
for h in range(1, self._height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
@@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
all_num_of_each_label.append(dict(Counter(labels_comp)))
# Compute subtree kernel with h iterations and add it to the final kernel
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
return gram_matrix

def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn):
def _compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn):
"""Compute Gram matrix using the base kernel.
"""
if self._parallel == 'imap_unordered':
@@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
elif self._parallel is None:
for i in range(len(gram_matrix)):
for j in range(i, len(gram_matrix)):
gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i],
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
all_num_of_each_label[j], gram_matrix[i][j])
gram_matrix[j][i] = gram_matrix[i][j]
def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
"""Compute the subtree kernel.
"""
labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
@@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
i = itr[0]
j = itr[1]
return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
def _wl_spkernel_do(Gn, node_label, edge_label, height):
@@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
return gram_matrix
def __add_dummy_node_labels(self, Gn):
if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY):
def _add_dummy_node_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.__node_labels = [SpecialLabel.DUMMY]
self._node_labels = [SpecialLabel.DUMMY]
class WLSubtree(WeisfeilerLehman):


+ 2
- 2
gklearn/preimage/generate_random_preimages_by_class.py View File

@@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save)

dis_k_dataset_list = []
@@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav
print('\ncomplete.\n')

def __init_output_file_preimage(ds_name, gkernel, dir_output):
def _init_output_file_preimage(ds_name, gkernel, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 25
- 25
gklearn/preimage/kernel_knn_cv.py View File

@@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti
if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else:
fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...')
gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all)
# 3. perform k-nn CV.
print('3. performing k-nn CV...')
if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random':
__kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'best-dataset':
__kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
elif train_examples == 'trainset':
__kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)
_kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary)

print('\ncomplete.\n')
def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs
y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']

# get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = [[], [], []]
for trial in range(len(train_indices)):
@@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
mge_options['update_order'] = True
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0
set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
mge_options['update_order'] = False
mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy()
mpg_options['runtime_precompute_gm'] = 0
_, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
_, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options)
medians[0].append(set_median)
medians[1].append(gen_median)
medians[2].append(gen_median_uo)
@@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
# compute dis_mat between medians.
dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in G_app], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_medians = []
for g in G_app:
kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne
f_summary.close()
def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
Gn = dataset_all.graphs
y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']

# get shuffles.
train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size)
accuracies = []
for trial in range(len(train_indices)):
@@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
# compute dis_mat between medians.
dataset = dataset_all.copy()
dataset.load_graphs([g.copy() for g in best_graphs], targets=None)
gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy())
gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy())
# compute the entire Gram matrix.
graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy())
graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy())
kernels_to_best_graphs = []
for g in best_graphs:
kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy())
@@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g
f_summary.close()
def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary):
y_all = dataset_all.targets
n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size']
@@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
dis_mat, _, _, _ = compute_distance_matrix(gram_matrix)

# get shuffles.
train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size)
train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size)
accuracies = []
for trial in range(len(train_indices)):
@@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options,
f_summary.close()
def __get_shuffles(y_all, n_splits, test_size):
def _get_shuffles(y_all, n_splits, test_size):
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
train_indices = [[] for _ in range(n_splits)]
test_indices = [[] for _ in range(n_splits)]
@@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size):
return train_indices, test_indices, train_nums, keys
def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options):
mpg = MedianPreimageGenerator()
mpg.dataset = dataset.copy()
mpg.set_options(**mpg_options.copy())
@@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option
return mpg.set_median, mpg.gen_median


def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
gram_matrix_unnorm = gmfile['gram_matrix_unnorm']
time_precompute_gm = float(gmfile['run_time'])
else:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
elif not load_gm:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm)
else:
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
@@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all):
return gram_matrix_unnorm, time_precompute_gm


def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
@@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
@@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time
def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv'


+ 284
- 283
gklearn/preimage/median_preimage_generator.py
File diff suppressed because it is too large
View File


+ 221
- 221
gklearn/preimage/median_preimage_generator_cml.py View File

@@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
### arguments to set.
self.__mge = None
self.__ged_options = {}
self.__mge_options = {}
# self.__fit_method = 'k-graphs'
self.__init_method = 'random'
self.__init_ecc = None
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__ds_name = None
self._mge = None
self._ged_options = {}
self._mge_options = {}
# self._fit_method = 'k-graphs'
self._init_method = 'random'
self._init_ecc = None
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._ds_name = None
# for cml.
self.__time_limit_in_sec = 0
self.__max_itrs = 100
self.__max_itrs_without_update = 3
self.__epsilon_residual = 0.01
self.__epsilon_ec = 0.1
self.__allow_zeros = True
# self.__triangle_rule = True
self._time_limit_in_sec = 0
self._max_itrs = 100
self._max_itrs_without_update = 3
self._epsilon_residual = 0.01
self._epsilon_ec = 0.1
self._allow_zeros = True
# self._triangle_rule = True
### values to compute.
self.__runtime_optimize_ec = None
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__set_median = None
self.__gen_median = None
self.__best_from_dataset = None
self.__sod_set_median = None
self.__sod_gen_median = None
self.__k_dis_set_median = None
self.__k_dis_gen_median = None
self.__k_dis_dataset = None
self.__node_label_costs = None
self.__edge_label_costs = None
self._runtime_optimize_ec = None
self._runtime_generate_preimage = None
self._runtime_total = None
self._set_median = None
self._gen_median = None
self._best_from_dataset = None
self._sod_set_median = None
self._sod_gen_median = None
self._k_dis_set_median = None
self._k_dis_gen_median = None
self._k_dis_dataset = None
self._node_label_costs = None
self._edge_label_costs = None
# for cml.
self.__itrs = 0
self.__converged = False
self.__num_updates_ecs = 0
self._itrs = 0
self._converged = False
self._num_updates_ecs = 0
### values that can be set or to be computed.
self.__edit_cost_constants = []
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._edit_cost_constants = []
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None

def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2)
self.__ged_options = kwargs.get('ged_options', {})
self.__mge_options = kwargs.get('mge_options', {})
# self.__fit_method = kwargs.get('fit_method', 'k-graphs')
self.__init_method = kwargs.get('init_method', 'random')
self.__init_ecc = kwargs.get('init_ecc', None)
self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__ds_name = kwargs.get('ds_name', None)
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 100)
self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self.__allow_zeros = kwargs.get('allow_zeros', True)
# self.__triangle_rule = kwargs.get('triangle_rule', True)
self._ged_options = kwargs.get('ged_options', {})
self._mge_options = kwargs.get('mge_options', {})
# self._fit_method = kwargs.get('fit_method', 'k-graphs')
self._init_method = kwargs.get('init_method', 'random')
self._init_ecc = kwargs.get('init_ecc', None)
self._edit_cost_constants = kwargs.get('edit_cost_constants', [])
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._ds_name = kwargs.get('ds_name', None)
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 100)
self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
self._epsilon_residual = kwargs.get('epsilon_residual', 0.01)
self._epsilon_ec = kwargs.get('epsilon_ec', 0.1)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._allow_zeros = kwargs.get('allow_zeros', True)
# self._triangle_rule = kwargs.get('triangle_rule', True)
def run(self):
@@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
start = time.time()
# 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset':
# start = time.time()
# self.__runtime_precompute_gm = 0
# self._runtime_precompute_gm = 0
# end_precompute_gm = start
# 2. optimize edit cost constants.
self.__optimize_edit_cost_vector()
self._optimize_edit_cost_vector()
end_optimize_ec = time.time()
self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm
self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm
# 3. compute set median and gen median using optimized edit costs.
if self._verbose >= 2:
print('\nstart computing set median and gen median using optimized edit costs...\n')
self.__gmg_bcu()
self._gmg_bcu()
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self.__runtime_total = end_generate_preimage - start
self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec
self._runtime_total = end_generate_preimage - start
if self._verbose >= 2:
print('medians computed.')
print('SOD of the set median: ', self.__sod_set_median)
print('SOD of the generalized median: ', self.__sod_gen_median)
print('SOD of the set median: ', self._sod_set_median)
print('SOD of the generalized median: ', self._sod_gen_median)
# 4. compute kernel distances to the true median.
if self._verbose >= 2:
print('\nstart computing distances to true median....\n')
self.__compute_distances_to_true_median()
self._compute_distances_to_true_median()

# 5. print out results.
if self._verbose:
@@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
print('================================================================================')
print('Finished generation of preimages.')
print('--------------------------------------------------------------------------------')
print('The optimized edit costs:', self.__edit_cost_constants)
print('SOD of the set median:', self.__sod_set_median)
print('SOD of the generalized median:', self.__sod_gen_median)
print('Distance in kernel space for set median:', self.__k_dis_set_median)
print('Distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to optimize edit costs:', self.__runtime_optimize_ec)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating edit costs:', self.__num_updates_ecs)
print('Is optimization of edit costs converged:', self.__converged)
print('The optimized edit costs:', self._edit_cost_constants)
print('SOD of the set median:', self._sod_set_median)
print('SOD of the generalized median:', self._sod_gen_median)
print('Distance in kernel space for set median:', self._k_dis_set_median)
print('Distance in kernel space for generalized median:', self._k_dis_gen_median)
print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to optimize edit costs:', self._runtime_optimize_ec)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating edit costs:', self._num_updates_ecs)
print('Is optimization of edit costs converged:', self._converged)
print('================================================================================')
print()


def get_results(self):
results = {}
results['edit_cost_constants'] = self.__edit_cost_constants
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_optimize_ec'] = self.__runtime_optimize_ec
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['sod_set_median'] = self.__sod_set_median
results['sod_gen_median'] = self.__sod_gen_median
results['k_dis_set_median'] = self.__k_dis_set_median
results['k_dis_gen_median'] = self.__k_dis_gen_median
results['k_dis_dataset'] = self.__k_dis_dataset
results['itrs'] = self.__itrs
results['converged'] = self.__converged
results['num_updates_ecc'] = self.__num_updates_ecs
results['edit_cost_constants'] = self._edit_cost_constants
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_optimize_ec'] = self._runtime_optimize_ec
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['sod_set_median'] = self._sod_set_median
results['sod_gen_median'] = self._sod_gen_median
results['k_dis_set_median'] = self._k_dis_set_median
results['k_dis_gen_median'] = self._k_dis_gen_median
results['k_dis_dataset'] = self._k_dis_dataset
results['itrs'] = self._itrs
results['converged'] = self._converged
results['num_updates_ecc'] = self._num_updates_ecs
results['mge'] = {}
results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents()
results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased()
results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased()
results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents()
return results

def __optimize_edit_cost_vector(self):
def _optimize_edit_cost_vector(self):
"""Learn edit cost vector.
"""
# Initialize label costs randomly.
if self.__init_method == 'random':
if self._init_method == 'random':
# Initialize label costs.
self.__initialize_label_costs()
self._initialize_label_costs()
# Optimize edit cost matrices.
self.__optimize_ecm_by_kernel_distances()
self._optimize_ecm_by_kernel_distances()
# Initialize all label costs with the same value.
elif self.__init_method == 'uniform': # random
elif self._init_method == 'uniform': # random
pass
elif self.__fit_method == 'random': # random
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = random.sample(range(1, 1000), 3)
self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'LETTER2':
elif self._fit_method == 'random': # random
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = random.sample(range(1, 1000), 3)
self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'LETTER2':
random.seed(time.time())
self.__edit_cost_constants = random.sample(range(1, 1000), 5)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 5)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._dataset.node_attrs == []:
self.__edit_cost_constants[2] = 0
self._edit_cost_constants[2] = 0
if self._dataset.edge_attrs == []:
self.__edit_cost_constants[5] = 0
self._edit_cost_constants[5] = 0
else:
self.__edit_cost_constants = random.sample(range(1, 1000), 6)
self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants]
self._edit_cost_constants = random.sample(range(1, 1000), 6)
self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants]
if self._verbose >= 2:
print('edit cost constants used:', self.__edit_cost_constants)
elif self.__fit_method == 'expert': # expert
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__edit_cost_constants = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
print('edit cost constants used:', self._edit_cost_constants)
elif self._fit_method == 'expert': # expert
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._edit_cost_constants = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
self.__edit_cost_constants = [3, 3, 1, 3, 3, 1]
self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
else:
self.__edit_cost_constants = self.__init_ecc
elif self.__fit_method == 'k-graphs':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
self.__init_ecc = [0, 0, 1, 1, 1, 0]
self._edit_cost_constants = self._init_ecc
elif self._fit_method == 'k-graphs':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC':
self._init_ecc = [0, 0, 1, 1, 1, 0]
if self._dataset.node_attrs == []:
self.__init_ecc[2] = 0
self._init_ecc[2] = 0
if self._dataset.edge_attrs == []:
self.__init_ecc[5] = 0
self._init_ecc[5] = 0
else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimize on the k-graph subset.
self.__optimize_ecm_by_kernel_distances()
elif self.__fit_method == 'whole-dataset':
if self.__init_ecc is None:
if self.__ged_options['edit_cost'] == 'LETTER':
self.__init_ecc = [0.9, 1.7, 0.75]
elif self.__ged_options['edit_cost'] == 'LETTER2':
self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
self._optimize_ecm_by_kernel_distances()
elif self._fit_method == 'whole-dataset':
if self._init_ecc is None:
if self._ged_options['edit_cost'] == 'LETTER':
self._init_ecc = [0.9, 1.7, 0.75]
elif self._ged_options['edit_cost'] == 'LETTER2':
self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
else:
self.__init_ecc = [3, 3, 1, 3, 3, 1]
self._init_ecc = [3, 3, 1, 3, 3, 1]
# optimizeon the whole set.
self.__optimize_ecc_by_kernel_distances()
elif self.__fit_method == 'precomputed':
self._optimize_ecc_by_kernel_distances()
elif self._fit_method == 'precomputed':
pass
def __initialize_label_costs(self):
self.__initialize_node_label_costs()
self.__initialize_edge_label_costs()
def _initialize_label_costs(self):
self._initialize_node_label_costs()
self._initialize_edge_label_costs()
def __initialize_node_label_costs(self):
def _initialize_node_label_costs(self):
# Get list of node labels.
nls = self._dataset.get_all_node_labels()
# Generate random costs.
nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls))
rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__node_label_costs = rand_costs
self._node_label_costs = rand_costs


def __initialize_edge_label_costs(self):
def _initialize_edge_label_costs(self):
# Get list of edge labels.
els = self._dataset.get_all_edge_labels()
# Generate random costs.
nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els))
rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el)
rand_costs /= np.max(rand_costs) # @todo: maybe not needed.
self.__edge_label_costs = rand_costs
self._edge_label_costs = rand_costs
def __optimize_ecm_by_kernel_distances(self):
def _optimize_ecm_by_kernel_distances(self):
# compute distances in feature space.
dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
dis_k_vec = []
@@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
dis_k_vec = np.array(dis_k_vec)
# Set GEDEnv options.
# graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
# self.__edit_cost_constants = self.__init_ecc
options = self.__ged_options.copy()
options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed.
# graphs = [self._clean_graph(g) for g in self._dataset.graphs]
# self._edit_cost_constants = self._init_ecc
options = self._ged_options.copy()
options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed.
options['node_labels'] = self._dataset.node_labels
options['edge_labels'] = self._dataset.edge_labels
# options['node_attrs'] = self._dataset.node_attrs
# options['edge_attrs'] = self._dataset.edge_attrs
options['node_label_costs'] = self.__node_label_costs
options['edge_label_costs'] = self.__edge_label_costs
options['node_label_costs'] = self._node_label_costs
options['edge_label_costs'] = self._edge_label_costs
# Learner cost matrices.
# Initialize cost learner.
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec)
cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo
cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec)
# Run cost learner.
cml.update(dis_k_vec, self._dataset.graphs, options)
# Get results.
results = cml.get_results()
self.__converged = results['converged']
self.__itrs = results['itrs']
self.__num_updates_ecs = results['num_updates_ecs']
self._converged = results['converged']
self._itrs = results['itrs']
self._num_updates_ecs = results['num_updates_ecs']
cost_list = results['cost_list']
self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)]
self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):]
self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)]
self._edge_label_costs = cost_list[-1][len(self._node_label_costs):]

def __gmg_bcu(self):
def _gmg_bcu(self):
"""
The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG).

@@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
# Set up the ged environment.
ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible.
# gedlibpy.restart_env()
ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants)
graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants)
graphs = [self._clean_graph(g) for g in self._dataset.graphs]
for g in graphs:
ged_env.add_nx_graph(g, '')
graph_ids = ged_env.get_all_graph_ids()
node_labels = ged_env.get_all_node_labels()
edge_labels = ged_env.get_all_edge_labels()
node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels))
node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels))
edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels))
ged_env.set_label_costs(node_label_costs, edge_label_costs)
set_median_id = ged_env.add_graph('set_median')
gen_median_id = ged_env.add_graph('gen_median')
ged_env.init(init_type=self.__ged_options['init_option'])
ged_env.init(init_type=self._ged_options['init_option'])
# Set up the madian graph estimator.
self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options)
options = self.__mge_options.copy()
self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost']))
self._mge.set_refine_method(self._ged_options['method'], self._ged_options)
options = self._mge_options.copy()
if not 'seed' in options:
options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
options['parallel'] = self.__parallel
options['parallel'] = self._parallel
# Select the GED algorithm.
self.__mge.set_options(mge_options_to_string(options))
self.__mge.set_label_names(node_labels=self._dataset.node_labels,
self._mge.set_options(mge_options_to_string(options))
self._mge.set_label_names(node_labels=self._dataset.node_labels,
edge_labels=self._dataset.edge_labels,
node_attrs=self._dataset.node_attrs,
edge_attrs=self._dataset.edge_attrs)
ged_options = self.__ged_options.copy()
if self.__parallel:
ged_options = self._ged_options.copy()
if self._parallel:
ged_options['threads'] = 1
self.__mge.set_init_method(ged_options['method'], ged_options)
self.__mge.set_descent_method(ged_options['method'], ged_options)
self._mge.set_init_method(ged_options['method'], ged_options)
self._mge.set_descent_method(ged_options['method'], ged_options)
# Run the estimator.
self.__mge.run(graph_ids, set_median_id, gen_median_id)
self._mge.run(graph_ids, set_median_id, gen_median_id)
# Get SODs.
self.__sod_set_median = self.__mge.get_sum_of_distances('initialized')
self.__sod_gen_median = self.__mge.get_sum_of_distances('converged')
self._sod_set_median = self._mge.get_sum_of_distances('initialized')
self._sod_gen_median = self._mge.get_sum_of_distances('converged')
# Get median graphs.
self.__set_median = ged_env.get_nx_graph(set_median_id)
self.__gen_median = ged_env.get_nx_graph(gen_median_id)
self._set_median = ged_env.get_nx_graph(set_median_id)
self._gen_median = ged_env.get_nx_graph(gen_median_id)
def __compute_distances_to_true_median(self):
def _compute_distances_to_true_median(self):
# compute distance in kernel space for set median.
kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options)
kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize
kernel_sm = 1
# @todo: not correct kernel value
gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1)
self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_sm, withterm3=False)
# compute distance in kernel space for generalized median.
kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options)
kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
kernel_gm = 1
gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1)
self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)),
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False)
@@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
[1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
gram_with_gm, withterm3=False))
idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
if self._verbose >= 2:
print()
print('distance in kernel space for set median:', self.__k_dis_set_median)
print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
print('distance in kernel space for set median:', self._k_dis_set_median)
print('distance in kernel space for generalized median:', self._k_dis_gen_median)
print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset)
print('distance in kernel space for each graph in median set:', k_dis_median_set)
# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
"""
Cleans node and edge labels and attributes of the given graph.
"""
@@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator):
@property
def mge(self):
return self.__mge
return self._mge
@property
def ged_options(self):
return self.__ged_options
return self._ged_options

@ged_options.setter
def ged_options(self, value):
self.__ged_options = value
self._ged_options = value

@property
def mge_options(self):
return self.__mge_options
return self._mge_options

@mge_options.setter
def mge_options(self, value):
self.__mge_options = value
self._mge_options = value


@property
def fit_method(self):
return self.__fit_method
return self._fit_method

@fit_method.setter
def fit_method(self, value):
self.__fit_method = value
self._fit_method = value
@property
def init_ecc(self):
return self.__init_ecc
return self._init_ecc

@init_ecc.setter
def init_ecc(self, value):
self.__init_ecc = value
self._init_ecc = value
@property
def set_median(self):
return self.__set_median
return self._set_median


@property
def gen_median(self):
return self.__gen_median
return self._gen_median
@property
def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property
def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 283
- 283
gklearn/preimage/median_preimage_generator_py.py
File diff suppressed because it is too large
View File


+ 109
- 109
gklearn/preimage/random_preimage_generator.py View File

@@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator):
def __init__(self, dataset=None):
PreimageGenerator.__init__(self, dataset=dataset)
# arguments to set.
self.__k = 5 # number of nearest neighbors of phi in D_N.
self.__r_max = 10 # maximum number of iterations.
self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self.__alphas = None # weights of linear combinations of points in kernel space.
self.__parallel = True
self.__n_jobs = multiprocessing.cpu_count()
self.__time_limit_in_sec = 0
self.__max_itrs = 20
self._k = 5 # number of nearest neighbors of phi in D_N.
self._r_max = 10 # maximum number of iterations.
self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}.
self._alphas = None # weights of linear combinations of points in kernel space.
self._parallel = True
self._n_jobs = multiprocessing.cpu_count()
self._time_limit_in_sec = 0
self._max_itrs = 20
# values to compute.
self.__runtime_generate_preimage = None
self.__runtime_total = None
self.__preimage = None
self.__best_from_dataset = None
self.__k_dis_preimage = None
self.__k_dis_dataset = None
self.__itrs = 0
self.__converged = False # @todo
self.__num_updates = 0
self._runtime_generate_preimage = None
self._runtime_total = None
self._preimage = None
self._best_from_dataset = None
self._k_dis_preimage = None
self._k_dis_dataset = None
self._itrs = 0
self._converged = False # @todo
self._num_updates = 0
# values that can be set or to be computed.
self.__gram_matrix_unnorm = None
self.__runtime_precompute_gm = None
self._gram_matrix_unnorm = None
self._runtime_precompute_gm = None

def set_options(self, **kwargs):
self._kernel_options = kwargs.get('kernel_options', {})
self._graph_kernel = kwargs.get('graph_kernel', None)
self._verbose = kwargs.get('verbose', 2)
self.__k = kwargs.get('k', 5)
self.__r_max = kwargs.get('r_max', 10)
self.__l = kwargs.get('l', 500)
self.__alphas = kwargs.get('alphas', None)
self.__parallel = kwargs.get('parallel', True)
self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self.__max_itrs = kwargs.get('max_itrs', 20)
self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
self._k = kwargs.get('k', 5)
self._r_max = kwargs.get('r_max', 10)
self._l = kwargs.get('l', 500)
self._alphas = kwargs.get('alphas', None)
self._parallel = kwargs.get('parallel', True)
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
self._max_itrs = kwargs.get('max_itrs', 20)
self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
def run(self):
@@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator):
start = time.time()
# 1. precompute gram matrix.
if self.__gram_matrix_unnorm is None:
if self._gram_matrix_unnorm is None:
gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
end_precompute_gm = time.time()
self.__runtime_precompute_gm = end_precompute_gm - start
self._runtime_precompute_gm = end_precompute_gm - start
else:
if self.__runtime_precompute_gm is None:
if self._runtime_precompute_gm is None:
raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm
if self._kernel_options['normalize']:
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm))
else:
self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm)
end_precompute_gm = time.time()
start -= self.__runtime_precompute_gm
start -= self._runtime_precompute_gm
# 2. compute k nearest neighbors of phi in D_N.
if self._verbose >= 2:
print('\nstart computing k nearest neighbors of phi in D_N...\n')
D_N = self._dataset.graphs
if self.__alphas is None:
self.__alphas = [1 / len(D_N)] * len(D_N)
if self._alphas is None:
self._alphas = [1 / len(D_N)] * len(D_N)
k_dis_list = [] # distance between g_star and each graph.
term3 = 0
for i1, a1 in enumerate(self.__alphas):
for i2, a2 in enumerate(self.__alphas):
for i1, a1 in enumerate(self._alphas):
for i2, a2 in enumerate(self._alphas):
term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2]
for idx in range(len(D_N)):
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True))
# sort.
sort_idx = np.argsort(k_dis_list)
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances.
dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances.
nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N
self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self.__k_dis_dataset = dis_gs[0]
self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple.
self._k_dis_dataset = dis_gs[0]
if self.__k_dis_dataset == 0: # get the exact pre-image.
if self._k_dis_dataset == 0: # get the exact pre-image.
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = self.__best_from_dataset.copy()
self.__k_dis_preimage = self.__k_dis_dataset
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = self._best_from_dataset.copy()
self._k_dis_preimage = self._k_dis_dataset
if self._verbose:
print()
print('=============================================================================')
print('The exact pre-image is found from the input dataset.')
print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================')
print()
return
dhat = dis_gs[0] # the nearest distance
Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors
Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors
Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk]
# 3. start iterations.
@@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator):
dihat_list = []
r = 0
dis_of_each_itr = [dhat]
if self.__parallel:
if self._parallel:
self._kernel_options['parallel'] = None
self.__itrs = 0
self.__num_updates = 0
timer = Timer(self.__time_limit_in_sec)
while not self.__termination_criterion_met(timer, self.__itrs, r):
self._itrs = 0
self._num_updates = 0
timer = Timer(self._time_limit_in_sec)
while not self._termination_criterion_met(timer, self._itrs, r):
print('\n- r =', r)
found = False
dis_bests = dis_gs + dihat_list
@@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator):
nb_modif = 1
for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)):
nb_modif *= nb / (fdgs_max - idx)
while fdgs_max < nb_vpairs_min and nb_modif < self.__l:
while fdgs_max < nb_vpairs_min and nb_modif < self._l:
fdgs_max += 1
nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max
nb_increase = int(fdgs_max - fdgs_max_old)
@@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator):
for ig, gs in enumerate(Gs_nearest + gihat_list):
if self._verbose >= 2:
print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list))
gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3)
if found:
r = 0
@@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator):
r += 1
dis_of_each_itr.append(dhat)
self.__itrs += 1
self._itrs += 1
if self._verbose >= 2:
print('Total number of iterations is', self.__itrs, '.')
print('The preimage is updated', self.__num_updates, 'times.')
print('Total number of iterations is', self._itrs, '.')
print('The preimage is updated', self._num_updates, 'times.')
print('The shortest distances for previous iterations are', dis_of_each_itr, '.')
# get results and print.
end_generate_preimage = time.time()
self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self.__runtime_total = end_generate_preimage - start
self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self.__k_dis_preimage = dhat
self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm
self._runtime_total = end_generate_preimage - start
self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
self._k_dis_preimage = dhat
if self._verbose:
print()
print('=============================================================================')
print('Finished generation of preimages.')
print('-----------------------------------------------------------------------------')
print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset)
print('Distance in kernel space for the preimage:', self.__k_dis_preimage)
print('Total number of iterations for optimizing:', self.__itrs)
print('Total number of updating preimage:', self.__num_updates)
print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
print('Time to generate pre-images:', self.__runtime_generate_preimage)
print('Total time:', self.__runtime_total)
print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset)
print('Distance in kernel space for the preimage:', self._k_dis_preimage)
print('Total number of iterations for optimizing:', self._itrs)
print('Total number of updating preimage:', self._num_updates)
print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm)
print('Time to generate pre-images:', self._runtime_generate_preimage)
print('Total time:', self._runtime_total)
print('=============================================================================')
print()
def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self.__parallel:
gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3):
if self._parallel:
gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3)
else:
gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3)
return gnew, dhat, found
def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None
updated = False
for trial in range(0, self.__l):
for trial in range(0, self._l):
if self._verbose >= 2:
print('---', trial + 1, 'trial out of', self.__l)
print('---', trial + 1, 'trial out of', self._l)

gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)

# get the better graph preimage.
if dnew <= dhat: # @todo: the new distance is smaller or also equal?
@@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator):
found = True # found better or equally good graph.
if updated:
self.__num_updates += 1
self._num_updates += 1
return gnew, dhat, found
def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3):
gnew = None
len_itr = self.__l
len_itr = self._l
gnew_list = [None] * len_itr
dnew_list = [None] * len_itr
itr = range(0, len_itr)
@@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator):
print('I am smaller!')
print('index (as in D_k U {gihat}) =', str(ig))
print('distance:', dhat, '->', dnew, '\n')
self.__num_updates += 1
self._num_updates += 1
else:
if self._verbose >= 2:
print('I am equal!')
@@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator):
def _generate_graph_parallel(self, g_init, fdgs, term3, itr):
trial = itr
gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial)
gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial)
return trial, gtemp, dnew
def __do_trial(self, g_init, fdgs, term3, trial):
def _do_trial(self, g_init, fdgs, term3, trial):
# add and delete edges.
gtemp = g_init.copy()
seed = (trial + int(time.time())) % (2 ** 32 - 1)
@@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator):
kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options)
kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options)
if self._kernel_options['normalize']:
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize
kernel_gtmp = 1
# @todo: not correct kernel value
gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True)
dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True)
return gtemp, dnew

def get_results(self):
results = {}
results['runtime_precompute_gm'] = self.__runtime_precompute_gm
results['runtime_generate_preimage'] = self.__runtime_generate_preimage
results['runtime_total'] = self.__runtime_total
results['k_dis_dataset'] = self.__k_dis_dataset
results['k_dis_preimage'] = self.__k_dis_preimage
results['itrs'] = self.__itrs
results['num_updates'] = self.__num_updates
results['runtime_precompute_gm'] = self._runtime_precompute_gm
results['runtime_generate_preimage'] = self._runtime_generate_preimage
results['runtime_total'] = self._runtime_total
results['k_dis_dataset'] = self._k_dis_dataset
results['k_dis_preimage'] = self._k_dis_preimage
results['itrs'] = self._itrs
results['num_updates'] = self._num_updates
return results


def __termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# if self.__state == AlgorithmState.TERMINATED:
# self.__state = AlgorithmState.INITIALIZED
def _termination_criterion_met(self, timer, itr, r):
if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False):
# if self._state == AlgorithmState.TERMINATED:
# self._state = AlgorithmState.INITIALIZED
return True
return (r >= self.__r_max if self.__r_max >= 0 else False)
# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
return (r >= self._r_max if self._r_max >= 0 else False)
# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False)
@property
def preimage(self):
return self.__preimage
return self._preimage
@property
def best_from_dataset(self):
return self.__best_from_dataset
return self._best_from_dataset
@property
def gram_matrix_unnorm(self):
return self.__gram_matrix_unnorm
return self._gram_matrix_unnorm
@gram_matrix_unnorm.setter
def gram_matrix_unnorm(self, value):
self.__gram_matrix_unnorm = value
self._gram_matrix_unnorm = value

+ 10
- 10
gklearn/preimage/remove_best_graph.py View File

@@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
else:
fn_output_detail, fn_output_summary = None, None
# 2. compute/load Gram matrix a priori.
print('2. computing/loading Gram matrix...')
gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets)
sod_sm_list = []
sod_gm_list = []
@@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
# 3. get the best graph and remove it from median set.
print('3. getting and removing the best graph...')
gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset]
best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy()))
median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index]
num_graphs -= 1
if num_graphs == 1:
@@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt
print('\ncomplete.\n')


def __get_best_graph(Gn, gram_matrix):
def _get_best_graph(Gn, gram_matrix):
k_dis_list = []
for idx in range(len(Gn)):
k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False))
@@ -313,7 +313,7 @@ def get_relations(sign):
return 'worse'


def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
if load_gm == 'auto':
gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
@@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
gram_matrix_unnorm_list = []
time_precompute_gm_list = []
for dataset in datasets:
gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options)
gram_matrix_unnorm_list.append(gram_matrix_unnorm)
time_precompute_gm_list.append(time_precompute_gm)
np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list)
@@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets):
return gram_matrix_unnorm_list, time_precompute_gm_list


def __get_graph_kernel(dataset, kernel_options):
def _get_graph_kernel(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
@@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options):
return graph_kernel
def __compute_gram_matrix_unnorm(dataset, kernel_options):
def _compute_gram_matrix_unnorm(dataset, kernel_options):
from gklearn.utils.utils import get_graph_kernel_by_name
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
@@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options):
return gram_matrix_unnorm, run_time
def __init_output_file(ds_name, gkernel, fit_method, dir_output):
def _init_output_file(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'


+ 2
- 2
gklearn/preimage/utils.py View File

@@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
if save_results:
# create result files.
print('creating output files...')
fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save)
sod_sm_list = []
sod_gm_list = []
@@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
print('\ncomplete.\n')

def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output):
if not os.path.exists(dir_output):
os.makedirs(dir_output)
# fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'


+ 244
- 244
gklearn/utils/dataset.py View File

@@ -16,54 +16,54 @@ class Dataset(object):
def __init__(self, filename=None, filename_targets=None, **kwargs):
if filename is None:
self.__graphs = None
self.__targets = None
self.__node_labels = None
self.__edge_labels = None
self.__node_attrs = None
self.__edge_attrs = None
self._graphs = None
self._targets = None
self._node_labels = None
self._edge_labels = None
self._node_attrs = None
self._edge_attrs = None
else:
self.load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__substructures = None
self.__node_label_dim = None
self.__edge_label_dim = None
self.__directed = None
self.__dataset_size = None
self.__total_node_num = None
self.__ave_node_num = None
self.__min_node_num = None
self.__max_node_num = None
self.__total_edge_num = None
self.__ave_edge_num = None
self.__min_edge_num = None
self.__max_edge_num = None
self.__ave_node_degree = None
self.__min_node_degree = None
self.__max_node_degree = None
self.__ave_fill_factor = None
self.__min_fill_factor = None
self.__max_fill_factor = None
self.__node_label_nums = None
self.__edge_label_nums = None
self.__node_attr_dim = None
self.__edge_attr_dim = None
self.__class_number = None
self._substructures = None
self._node_label_dim = None
self._edge_label_dim = None
self._directed = None
self._dataset_size = None
self._total_node_num = None
self._ave_node_num = None
self._min_node_num = None
self._max_node_num = None
self._total_edge_num = None
self._ave_edge_num = None
self._min_edge_num = None
self._max_edge_num = None
self._ave_node_degree = None
self._min_node_degree = None
self._max_node_degree = None
self._ave_fill_factor = None
self._min_fill_factor = None
self._max_fill_factor = None
self._node_label_nums = None
self._edge_label_nums = None
self._node_attr_dim = None
self._edge_attr_dim = None
self._class_number = None
def load_dataset(self, filename, filename_targets=None, **kwargs):
self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs)
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels()
def load_graphs(self, graphs, targets=None):
# this has to be followed by set_labels().
self.__graphs = graphs
self.__targets = targets
self._graphs = graphs
self._targets = targets
# self.set_labels_attrs() # @todo
@@ -71,108 +71,108 @@ class Dataset(object):
current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
if ds_name == 'Acyclic':
ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'AIDS':
ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Alkane':
ds_file = current_path + '../../datasets/Alkane/dataset.ds'
fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets)
elif ds_name == 'COIL-DEL':
ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COIL-RAG':
ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'COLORS-3':
ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Cuneiform':
ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'DD':
ds_file = current_path + '../../datasets/DD/DD_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'ENZYMES':
ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Fingerprint':
ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'FRANKENSTEIN':
ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-high': # node non-symb
ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-low': # node non-symb
ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Letter-med': # node non-symb
ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MAO':
ds_file = current_path + '../../datasets/MAO/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Monoterpenoides':
ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'MUTAG':
ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI1':
ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'NCI109':
ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'PAH':
ds_file = current_path + '../../datasets/PAH/dataset.ds'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'SYNTHETIC':
pass
elif ds_name == 'SYNTHETICnew':
ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
self.__graphs, self.__targets, label_names = load_dataset(ds_file)
self._graphs, self._targets, label_names = load_dataset(ds_file)
elif ds_name == 'Synthie':
pass
else:
raise Exception('The dataset name "', ds_name, '" is not pre-defined.')
self.__node_labels = label_names['node_labels']
self.__node_attrs = label_names['node_attrs']
self.__edge_labels = label_names['edge_labels']
self.__edge_attrs = label_names['edge_attrs']
self._node_labels = label_names['node_labels']
self._node_attrs = label_names['node_attrs']
self._edge_labels = label_names['edge_labels']
self._edge_attrs = label_names['edge_attrs']
self.clean_labels()

def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]):
self.__node_labels = node_labels
self.__node_attrs = node_attrs
self.__edge_labels = edge_labels
self.__edge_attrs = edge_attrs
self._node_labels = node_labels
self._node_attrs = node_attrs
self._edge_labels = edge_labels
self._edge_attrs = edge_attrs

def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None):
# @todo: remove labels which have only one possible values.
if node_labels is None:
self.__node_labels = self.__graphs[0].graph['node_labels']
self._node_labels = self._graphs[0].graph['node_labels']
# # graphs are considered node unlabeled if all nodes have the same label.
# infos.update({'node_labeled': is_nl if node_label_num > 1 else False})
if node_attrs is None:
self.__node_attrs = self.__graphs[0].graph['node_attrs']
self._node_attrs = self._graphs[0].graph['node_attrs']
# for G in Gn:
# for n in G.nodes(data=True):
# if 'attributes' in n[1]:
# return len(n[1]['attributes'])
# return 0
if edge_labels is None:
self.__edge_labels = self.__graphs[0].graph['edge_labels']
self._edge_labels = self._graphs[0].graph['edge_labels']
# # graphs are considered edge unlabeled if all edges have the same label.
# infos.update({'edge_labeled': is_el if edge_label_num > 1 else False})
if edge_attrs is None:
self.__edge_attrs = self.__graphs[0].graph['edge_attrs']
self._edge_attrs = self._graphs[0].graph['edge_attrs']
# for G in Gn:
# if nx.number_of_edges(G) > 0:
# for e in G.edges(data=True):
@@ -291,145 +291,145 @@ class Dataset(object):
# dataset size
if 'dataset_size' in keys:
if self.__dataset_size is None:
self.__dataset_size = self.__get_dataset_size()
infos['dataset_size'] = self.__dataset_size
if self._dataset_size is None:
self._dataset_size = self._get_dataset_size()
infos['dataset_size'] = self._dataset_size
# graph node number
if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']):
all_node_nums = self.__get_all_node_nums()
all_node_nums = self._get_all_node_nums()

if 'total_node_num' in keys:
if self.__total_node_num is None:
self.__total_node_num = self.__get_total_node_num(all_node_nums)
infos['total_node_num'] = self.__total_node_num
if self._total_node_num is None:
self._total_node_num = self._get_total_node_num(all_node_nums)
infos['total_node_num'] = self._total_node_num
if 'ave_node_num' in keys:
if self.__ave_node_num is None:
self.__ave_node_num = self.__get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self.__ave_node_num
if self._ave_node_num is None:
self._ave_node_num = self._get_ave_node_num(all_node_nums)
infos['ave_node_num'] = self._ave_node_num
if 'min_node_num' in keys:
if self.__min_node_num is None:
self.__min_node_num = self.__get_min_node_num(all_node_nums)
infos['min_node_num'] = self.__min_node_num
if self._min_node_num is None:
self._min_node_num = self._get_min_node_num(all_node_nums)
infos['min_node_num'] = self._min_node_num
if 'max_node_num' in keys:
if self.__max_node_num is None:
self.__max_node_num = self.__get_max_node_num(all_node_nums)
infos['max_node_num'] = self.__max_node_num
if self._max_node_num is None:
self._max_node_num = self._get_max_node_num(all_node_nums)
infos['max_node_num'] = self._max_node_num
# graph edge number
if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']):
all_edge_nums = self.__get_all_edge_nums()
all_edge_nums = self._get_all_edge_nums()

if 'total_edge_num' in keys:
if self.__total_edge_num is None:
self.__total_edge_num = self.__get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self.__total_edge_num
if self._total_edge_num is None:
self._total_edge_num = self._get_total_edge_num(all_edge_nums)
infos['total_edge_num'] = self._total_edge_num
if 'ave_edge_num' in keys:
if self.__ave_edge_num is None:
self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self.__ave_edge_num
if self._ave_edge_num is None:
self._ave_edge_num = self._get_ave_edge_num(all_edge_nums)
infos['ave_edge_num'] = self._ave_edge_num
if 'max_edge_num' in keys:
if self.__max_edge_num is None:
self.__max_edge_num = self.__get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self.__max_edge_num
if self._max_edge_num is None:
self._max_edge_num = self._get_max_edge_num(all_edge_nums)
infos['max_edge_num'] = self._max_edge_num

if 'min_edge_num' in keys:
if self.__min_edge_num is None:
self.__min_edge_num = self.__get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self.__min_edge_num
if self._min_edge_num is None:
self._min_edge_num = self._get_min_edge_num(all_edge_nums)
infos['min_edge_num'] = self._min_edge_num
# label number
if 'node_label_dim' in keys:
if self.__node_label_dim is None:
self.__node_label_dim = self.__get_node_label_dim()
infos['node_label_dim'] = self.__node_label_dim
if self._node_label_dim is None:
self._node_label_dim = self._get_node_label_dim()
infos['node_label_dim'] = self._node_label_dim
if 'node_label_nums' in keys:
if self.__node_label_nums is None:
self.__node_label_nums = {}
for node_label in self.__node_labels:
self.__node_label_nums[node_label] = self.__get_node_label_num(node_label)
infos['node_label_nums'] = self.__node_label_nums
if self._node_label_nums is None:
self._node_label_nums = {}
for node_label in self._node_labels:
self._node_label_nums[node_label] = self._get_node_label_num(node_label)
infos['node_label_nums'] = self._node_label_nums
if 'edge_label_dim' in keys:
if self.__edge_label_dim is None:
self.__edge_label_dim = self.__get_edge_label_dim()
infos['edge_label_dim'] = self.__edge_label_dim
if self._edge_label_dim is None:
self._edge_label_dim = self._get_edge_label_dim()
infos['edge_label_dim'] = self._edge_label_dim
if 'edge_label_nums' in keys:
if self.__edge_label_nums is None:
self.__edge_label_nums = {}
for edge_label in self.__edge_labels:
self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label)
infos['edge_label_nums'] = self.__edge_label_nums
if self._edge_label_nums is None:
self._edge_label_nums = {}
for edge_label in self._edge_labels:
self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label)
infos['edge_label_nums'] = self._edge_label_nums
if 'directed' in keys or 'substructures' in keys:
if self.__directed is None:
self.__directed = self.__is_directed()
infos['directed'] = self.__directed
if self._directed is None:
self._directed = self._is_directed()
infos['directed'] = self._directed
# node degree
if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']):
all_node_degrees = self.__get_all_node_degrees()
all_node_degrees = self._get_all_node_degrees()
if 'ave_node_degree' in keys:
if self.__ave_node_degree is None:
self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self.__ave_node_degree
if self._ave_node_degree is None:
self._ave_node_degree = self._get_ave_node_degree(all_node_degrees)
infos['ave_node_degree'] = self._ave_node_degree
if 'max_node_degree' in keys:
if self.__max_node_degree is None:
self.__max_node_degree = self.__get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self.__max_node_degree
if self._max_node_degree is None:
self._max_node_degree = self._get_max_node_degree(all_node_degrees)
infos['max_node_degree'] = self._max_node_degree
if 'min_node_degree' in keys:
if self.__min_node_degree is None:
self.__min_node_degree = self.__get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self.__min_node_degree
if self._min_node_degree is None:
self._min_node_degree = self._get_min_node_degree(all_node_degrees)
infos['min_node_degree'] = self._min_node_degree
# fill factor
if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']):
all_fill_factors = self.__get_all_fill_factors()
all_fill_factors = self._get_all_fill_factors()
if 'ave_fill_factor' in keys:
if self.__ave_fill_factor is None:
self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self.__ave_fill_factor
if self._ave_fill_factor is None:
self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors)
infos['ave_fill_factor'] = self._ave_fill_factor
if 'max_fill_factor' in keys:
if self.__max_fill_factor is None:
self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self.__max_fill_factor
if self._max_fill_factor is None:
self._max_fill_factor = self._get_max_fill_factor(all_fill_factors)
infos['max_fill_factor'] = self._max_fill_factor
if 'min_fill_factor' in keys:
if self.__min_fill_factor is None:
self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self.__min_fill_factor
if self._min_fill_factor is None:
self._min_fill_factor = self._get_min_fill_factor(all_fill_factors)
infos['min_fill_factor'] = self._min_fill_factor
if 'substructures' in keys:
if self.__substructures is None:
self.__substructures = self.__get_substructures()
infos['substructures'] = self.__substructures
if self._substructures is None:
self._substructures = self._get_substructures()
infos['substructures'] = self._substructures
if 'class_number' in keys:
if self.__class_number is None:
self.__class_number = self.__get_class_number()
infos['class_number'] = self.__class_number
if self._class_number is None:
self._class_number = self._get_class_number()
infos['class_number'] = self._class_number
if 'node_attr_dim' in keys:
if self.__node_attr_dim is None:
self.__node_attr_dim = self.__get_node_attr_dim()
infos['node_attr_dim'] = self.__node_attr_dim
if self._node_attr_dim is None:
self._node_attr_dim = self._get_node_attr_dim()
infos['node_attr_dim'] = self._node_attr_dim
if 'edge_attr_dim' in keys:
if self.__edge_attr_dim is None:
self.__edge_attr_dim = self.__get_edge_attr_dim()
infos['edge_attr_dim'] = self.__edge_attr_dim
if self._edge_attr_dim is None:
self._edge_attr_dim = self._get_edge_attr_dim()
infos['edge_attr_dim'] = self._edge_attr_dim
# entropy of degree distribution.
@@ -438,14 +438,14 @@ class Dataset(object):
base = params['all_degree_entropy']['base']
else:
base = None
infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base)
infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base)
if 'ave_degree_entropy' in keys:
if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']):
base = params['ave_degree_entropy']['base']
else:
base = None
infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base))
infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base))
return infos
@@ -457,12 +457,12 @@ class Dataset(object):
def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
node_labels = [item for item in node_labels if item in self.__node_labels]
edge_labels = [item for item in edge_labels if item in self.__edge_labels]
node_attrs = [item for item in node_attrs if item in self.__node_attrs]
edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs]
node_labels = [item for item in node_labels if item in self._node_labels]
edge_labels = [item for item in edge_labels if item in self._edge_labels]
node_attrs = [item for item in node_attrs if item in self._node_attrs]
edge_attrs = [item for item in edge_attrs if item in self._edge_attrs]

for g in self.__graphs:
for g in self._graphs:
for nd in g.nodes():
for nl in node_labels:
del g.nodes[nd][nl]
@@ -474,99 +474,99 @@ class Dataset(object):
for ea in edge_attrs:
del g.edges[ed][ea]
if len(node_labels) > 0:
self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels]
self._node_labels = [nl for nl in self._node_labels if nl not in node_labels]
if len(edge_labels) > 0:
self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels]
self._edge_labels = [el for el in self._edge_labels if el not in edge_labels]
if len(node_attrs) > 0:
self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs]
self._node_attrs = [na for na in self._node_attrs if na not in node_attrs]
if len(edge_attrs) > 0:
self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs]
self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs]
def clean_labels(self):
labels = []
for name in self.__node_labels:
for name in self._node_labels:
label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_labels = labels
self._node_labels = labels

labels = []
for name in self.__edge_labels:
for name in self._edge_labels:
label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_labels = labels
self._edge_labels = labels

labels = []
for name in self.__node_attrs:
for name in self._node_attrs:
label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_node_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for nd in G.nodes():
del G.nodes[nd][name]
self.__node_attrs = labels
self._node_attrs = labels

labels = []
for name in self.__edge_attrs:
for name in self._edge_attrs:
label = set()
for G in self.__graphs:
for G in self._graphs:
label = label | set(nx.get_edge_attributes(G, name).values())
if len(label) > 1:
labels.append(name)
break
if len(label) < 2:
for G in self.__graphs:
for G in self._graphs:
for ed in G.edges():
del G.edges[ed][name]
self.__edge_attrs = labels
self._edge_attrs = labels
def cut_graphs(self, range_):
self.__graphs = [self.__graphs[i] for i in range_]
if self.__targets is not None:
self.__targets = [self.__targets[i] for i in range_]
self._graphs = [self._graphs[i] for i in range_]
if self._targets is not None:
self._targets = [self._targets[i] for i in range_]
self.clean_labels()


def trim_dataset(self, edge_required=False):
if edge_required:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)]
else:
trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0]
trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0]
idx = [p[0] for p in trimed_pairs]
self.__graphs = [p[1] for p in trimed_pairs]
self.__targets = [self.__targets[i] for i in idx]
self._graphs = [p[1] for p in trimed_pairs]
self._targets = [self._targets[i] for i in idx]
self.clean_labels()
def copy(self):
dataset = Dataset()
graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None
target = self.__targets.copy() if self.__targets is not None else None
node_labels = self.__node_labels.copy() if self.__node_labels is not None else None
node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None
edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None
edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None
graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None
target = self._targets.copy() if self._targets is not None else None
node_labels = self._node_labels.copy() if self._node_labels is not None else None
node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None
edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None
edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None
dataset.load_graphs(graphs, target)
dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs)
# @todo: clean_labels and add other class members?
@@ -575,7 +575,7 @@ class Dataset(object):
def get_all_node_labels(self):
node_labels = []
for g in self.__graphs:
for g in self._graphs:
for n in g.nodes():
nl = tuple(g.nodes[n].items())
if nl not in node_labels:
@@ -585,7 +585,7 @@ class Dataset(object):
def get_all_edge_labels(self):
edge_labels = []
for g in self.__graphs:
for g in self._graphs:
for e in g.edges():
el = tuple(g.edges[e].items())
if el not in edge_labels:
@@ -593,93 +593,93 @@ class Dataset(object):
return edge_labels
def __get_dataset_size(self):
return len(self.__graphs)
def _get_dataset_size(self):
return len(self._graphs)
def __get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self.__graphs]
def _get_all_node_nums(self):
return [nx.number_of_nodes(G) for G in self._graphs]
def __get_total_node_nums(self, all_node_nums):
def _get_total_node_nums(self, all_node_nums):
return np.sum(all_node_nums)
def __get_ave_node_num(self, all_node_nums):
def _get_ave_node_num(self, all_node_nums):
return np.mean(all_node_nums)
def __get_min_node_num(self, all_node_nums):
def _get_min_node_num(self, all_node_nums):
return np.amin(all_node_nums)
def __get_max_node_num(self, all_node_nums):
def _get_max_node_num(self, all_node_nums):
return np.amax(all_node_nums)
def __get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self.__graphs]
def _get_all_edge_nums(self):
return [nx.number_of_edges(G) for G in self._graphs]
def __get_total_edge_nums(self, all_edge_nums):
def _get_total_edge_nums(self, all_edge_nums):
return np.sum(all_edge_nums)
def __get_ave_edge_num(self, all_edge_nums):
def _get_ave_edge_num(self, all_edge_nums):
return np.mean(all_edge_nums)
def __get_min_edge_num(self, all_edge_nums):
def _get_min_edge_num(self, all_edge_nums):
return np.amin(all_edge_nums)
def __get_max_edge_num(self, all_edge_nums):
def _get_max_edge_num(self, all_edge_nums):
return np.amax(all_edge_nums)
def __get_node_label_dim(self):
return len(self.__node_labels)
def _get_node_label_dim(self):
return len(self._node_labels)
def __get_node_label_num(self, node_label):
def _get_node_label_num(self, node_label):
nl = set()
for G in self.__graphs:
for G in self._graphs:
nl = nl | set(nx.get_node_attributes(G, node_label).values())
return len(nl)
def __get_edge_label_dim(self):
return len(self.__edge_labels)
def _get_edge_label_dim(self):
return len(self._edge_labels)
def __get_edge_label_num(self, edge_label):
def _get_edge_label_num(self, edge_label):
el = set()
for G in self.__graphs:
for G in self._graphs:
el = el | set(nx.get_edge_attributes(G, edge_label).values())
return len(el)
def __is_directed(self):
return nx.is_directed(self.__graphs[0])
def _is_directed(self):
return nx.is_directed(self._graphs[0])
def __get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs]
def _get_all_node_degrees(self):
return [np.mean(list(dict(G.degree()).values())) for G in self._graphs]
def __get_ave_node_degree(self, all_node_degrees):
def _get_ave_node_degree(self, all_node_degrees):
return np.mean(all_node_degrees)
def __get_max_node_degree(self, all_node_degrees):
def _get_max_node_degree(self, all_node_degrees):
return np.amax(all_node_degrees)
def __get_min_node_degree(self, all_node_degrees):
def _get_min_node_degree(self, all_node_degrees):
return np.amin(all_node_degrees)
def __get_all_fill_factors(self):
def _get_all_fill_factors(self):
"""Get fill factor, the number of non-zero entries in the adjacency matrix.

Returns
@@ -687,24 +687,24 @@ class Dataset(object):
list[float]
List of fill factors for all graphs.
"""
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs]
return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs]

def __get_ave_fill_factor(self, all_fill_factors):
def _get_ave_fill_factor(self, all_fill_factors):
return np.mean(all_fill_factors)
def __get_max_fill_factor(self, all_fill_factors):
def _get_max_fill_factor(self, all_fill_factors):
return np.amax(all_fill_factors)
def __get_min_fill_factor(self, all_fill_factors):
def _get_min_fill_factor(self, all_fill_factors):
return np.amin(all_fill_factors)
def __get_substructures(self):
def _get_substructures(self):
subs = set()
for G in self.__graphs:
for G in self._graphs:
degrees = list(dict(G.degree()).values())
if any(i == 2 for i in degrees):
subs.add('linear')
@@ -713,8 +713,8 @@ class Dataset(object):
if 'linear' in subs and 'non linear' in subs:
break

if self.__directed:
for G in self.__graphs:
if self._directed:
for G in self._graphs:
if len(list(nx.find_cycle(G))) > 0:
subs.add('cyclic')
break
@@ -737,19 +737,19 @@ class Dataset(object):
return subs
def __get_class_num(self):
return len(set(self.__targets))
def _get_class_num(self):
return len(set(self._targets))
def __get_node_attr_dim(self):
return len(self.__node_attrs)
def _get_node_attr_dim(self):
return len(self._node_attrs)
def __get_edge_attr_dim(self):
return len(self.__edge_attrs)
def _get_edge_attr_dim(self):
return len(self._edge_attrs)

def __compute_all_degree_entropy(self, base=None):
def _compute_all_degree_entropy(self, base=None):
"""Compute the entropy of degree distribution of each graph.

Parameters
@@ -765,7 +765,7 @@ class Dataset(object):
from gklearn.utils.stats import entropy
degree_entropy = []
for g in self.__graphs:
for g in self._graphs:
degrees = list(dict(g.degree()).values())
en = entropy(degrees, base=base)
degree_entropy.append(en)
@@ -774,32 +774,32 @@ class Dataset(object):
@property
def graphs(self):
return self.__graphs
return self._graphs


@property
def targets(self):
return self.__targets
return self._targets
@property
def node_labels(self):
return self.__node_labels
return self._node_labels


@property
def edge_labels(self):
return self.__edge_labels
return self._edge_labels
@property
def node_attrs(self):
return self.__node_attrs
return self._node_attrs
@property
def edge_attrs(self):
return self.__edge_attrs
return self._edge_attrs
def split_dataset_by_target(dataset):


+ 4
- 4
gklearn/utils/graph_files.py View File

@@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(float(tmp[1]))
else: # targets in a seperate file
for i in range(0, len(content)):
@@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets):
# remove the '#'s in file names
g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1))
data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
with open(filename_targets) as fnt:
content_y = fnt.read().splitlines()
@@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None):
mol_class = graph.attrib['class']
g, l_names = load_gxl(dir_dataset + '/' + mol_filename)
data.append(g)
__append_label_names(label_names, l_names)
_append_label_names(label_names, l_names)
y.append(mol_class)
return data, y, label_names


def __append_label_names(label_names, new_names):
def _append_label_names(label_names, new_names):
for key, val in label_names.items():
label_names[key] += [name for name in new_names[key] if name not in val]


+ 2
- 2
gklearn/utils/knn.py View File

@@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
y_all = dataset.targets
# compute kernel distances.
dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset)
dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset)
rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0)
@@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t
return results
def __compute_kernel_distances(dataset, kernel_options, trainset=None):
def _compute_kernel_distances(dataset, kernel_options, trainset=None):
graph_kernel = get_graph_kernel_by_name(kernel_options['name'],
node_labels=dataset.node_labels,
edge_labels=dataset.edge_labels,


+ 5
- 5
gklearn/utils/timer.py View File

@@ -23,8 +23,8 @@ class Timer(object):
time_limit_in_sec : string
The time limit in seconds.
"""
self.__time_limit_in_sec = time_limit_in_sec
self.__start_time = time.time()
self._time_limit_in_sec = time_limit_in_sec
self._start_time = time.time()
def expired(self):
@@ -34,7 +34,7 @@ class Timer(object):
------
Boolean true if the time limit has expired and false otherwise.
"""
if self.__time_limit_in_sec > 0:
runtime = time.time() - self.__start_time
return runtime >= self.__time_limit_in_sec
if self._time_limit_in_sec > 0:
runtime = time.time() - self._start_time
return runtime >= self._time_limit_in_sec
return False

Loading…
Cancel
Save