diff --git a/gklearn/kernels/common_walk.py b/gklearn/kernels/common_walk.py index 6372200..a58fb86 100644 --- a/gklearn/kernels/common_walk.py +++ b/gklearn/kernels/common_walk.py @@ -26,18 +26,18 @@ class CommonWalk(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__weight = kwargs.get('weight', 1) - self.__compute_method = kwargs.get('compute_method', None) - self.__ds_infos = kwargs.get('ds_infos', {}) - self.__compute_method = self.__compute_method.lower() + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._weight = kwargs.get('weight', 1) + self._compute_method = kwargs.get('compute_method', None) + self._ds_infos = kwargs.get('ds_infos', {}) + self._compute_method = self._compute_method.lower() def _compute_gm_series(self): - self.__check_graphs(self._graphs) - self.__add_dummy_labels(self._graphs) - if not self.__ds_infos['directed']: # convert + self._check_graphs(self._graphs) + self._add_dummy_labels(self._graphs) + if not self._ds_infos['directed']: # convert self._graphs = [G.to_directed() for G in self._graphs] # compute Gram matrix. @@ -51,15 +51,15 @@ class CommonWalk(GraphKernel): iterator = itr # direct product graph method - exponential - if self.__compute_method == 'exp': + if self._compute_method == 'exp': for i, j in iterator: - kernel = self.__kernel_do_exp(self._graphs[i], self._graphs[j], self.__weight) + kernel = self._kernel_do_exp(self._graphs[i], self._graphs[j], self._weight) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # direct product graph method - geometric - elif self.__compute_method == 'geo': + elif self._compute_method == 'geo': for i, j in iterator: - kernel = self.__kernel_do_geo(self._graphs[i], self._graphs[j], self.__weight) + kernel = self._kernel_do_geo(self._graphs[i], self._graphs[j], self._weight) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel @@ -67,9 +67,9 @@ class CommonWalk(GraphKernel): def _compute_gm_imap_unordered(self): - self.__check_graphs(self._graphs) - self.__add_dummy_labels(self._graphs) - if not self.__ds_infos['directed']: # convert + self._check_graphs(self._graphs) + self._add_dummy_labels(self._graphs) + if not self._ds_infos['directed']: # convert self._graphs = [G.to_directed() for G in self._graphs] # compute Gram matrix. @@ -80,10 +80,10 @@ class CommonWalk(GraphKernel): # G_gn = gn_toshare # direct product graph method - exponential - if self.__compute_method == 'exp': + if self._compute_method == 'exp': do_fun = self._wrapper_kernel_do_exp # direct product graph method - geometric - elif self.__compute_method == 'geo': + elif self._compute_method == 'geo': do_fun = self._wrapper_kernel_do_geo parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, @@ -93,9 +93,9 @@ class CommonWalk(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): - self.__check_graphs(g_list + [g1]) - self.__add_dummy_labels(g_list + [g1]) - if not self.__ds_infos['directed']: # convert + self._check_graphs(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) + if not self._ds_infos['directed']: # convert g1 = g1.to_directed() g_list = [G.to_directed() for G in g_list] @@ -107,23 +107,23 @@ class CommonWalk(GraphKernel): iterator = range(len(g_list)) # direct product graph method - exponential - if self.__compute_method == 'exp': + if self._compute_method == 'exp': for i in iterator: - kernel = self.__kernel_do_exp(g1, g_list[i], self.__weight) + kernel = self._kernel_do_exp(g1, g_list[i], self._weight) kernel_list[i] = kernel # direct product graph method - geometric - elif self.__compute_method == 'geo': + elif self._compute_method == 'geo': for i in iterator: - kernel = self.__kernel_do_geo(g1, g_list[i], self.__weight) + kernel = self._kernel_do_geo(g1, g_list[i], self._weight) kernel_list[i] = kernel return kernel_list def _compute_kernel_list_imap_unordered(self, g1, g_list): - self.__check_graphs(g_list + [g1]) - self.__add_dummy_labels(g_list + [g1]) - if not self.__ds_infos['directed']: # convert + self._check_graphs(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) + if not self._ds_infos['directed']: # convert g1 = g1.to_directed() g_list = [G.to_directed() for G in g_list] @@ -136,10 +136,10 @@ class CommonWalk(GraphKernel): # G_g_list = g_list_toshare # direct product graph method - exponential - if self.__compute_method == 'exp': + if self._compute_method == 'exp': do_fun = self._wrapper_kernel_list_do_exp # direct product graph method - geometric - elif self.__compute_method == 'geo': + elif self._compute_method == 'geo': do_fun = self._wrapper_kernel_list_do_geo def func_assign(result, var_to_assign): @@ -154,31 +154,31 @@ class CommonWalk(GraphKernel): def _wrapper_kernel_list_do_exp(self, itr): - return itr, self.__kernel_do_exp(G_g1, G_g_list[itr], self.__weight) + return itr, self._kernel_do_exp(G_g1, G_g_list[itr], self._weight) def _wrapper_kernel_list_do_geo(self, itr): - return itr, self.__kernel_do_geo(G_g1, G_g_list[itr], self.__weight) + return itr, self._kernel_do_geo(G_g1, G_g_list[itr], self._weight) def _compute_single_kernel_series(self, g1, g2): - self.__check_graphs([g1] + [g2]) - self.__add_dummy_labels([g1] + [g2]) - if not self.__ds_infos['directed']: # convert + self._check_graphs([g1] + [g2]) + self._add_dummy_labels([g1] + [g2]) + if not self._ds_infos['directed']: # convert g1 = g1.to_directed() g2 = g2.to_directed() # direct product graph method - exponential - if self.__compute_method == 'exp': - kernel = self.__kernel_do_exp(g1, g2, self.__weight) + if self._compute_method == 'exp': + kernel = self._kernel_do_exp(g1, g2, self._weight) # direct product graph method - geometric - elif self.__compute_method == 'geo': - kernel = self.__kernel_do_geo(g1, g2, self.__weight) + elif self._compute_method == 'geo': + kernel = self._kernel_do_geo(g1, g2, self._weight) return kernel - def __kernel_do_exp(self, g1, g2, beta): + def _kernel_do_exp(self, g1, g2, beta): """Compute common walk graph kernel between 2 graphs using exponential series. @@ -195,7 +195,7 @@ class CommonWalk(GraphKernel): The common walk Kernel between 2 graphs. """ # get tensor product / direct product - gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) + gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) # return 0 if the direct product graph have no more than 1 node. if nx.number_of_nodes(gp) < 2: return 0 @@ -227,10 +227,10 @@ class CommonWalk(GraphKernel): def _wrapper_kernel_do_exp(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_exp(G_gn[i], G_gn[j], self.__weight) + return i, j, self._kernel_do_exp(G_gn[i], G_gn[j], self._weight) - def __kernel_do_geo(self, g1, g2, gamma): + def _kernel_do_geo(self, g1, g2, gamma): """Compute common walk graph kernel between 2 graphs using geometric series. @@ -247,7 +247,7 @@ class CommonWalk(GraphKernel): The common walk Kernel between 2 graphs. """ # get tensor product / direct product - gp = direct_product_graph(g1, g2, self.__node_labels, self.__edge_labels) + gp = direct_product_graph(g1, g2, self._node_labels, self._edge_labels) # return 0 if the direct product graph have no more than 1 node. if nx.number_of_nodes(gp) < 2: return 0 @@ -262,24 +262,24 @@ class CommonWalk(GraphKernel): def _wrapper_kernel_do_geo(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_geo(G_gn[i], G_gn[j], self.__weight) + return i, j, self._kernel_do_geo(G_gn[i], G_gn[j], self._weight) - def __check_graphs(self, Gn): + def _check_graphs(self, Gn): for g in Gn: if nx.number_of_nodes(g) == 1: raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') - def __add_dummy_labels(self, Gn): - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_labels(self, Gn): + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] + self._edge_labels = [SpecialLabel.DUMMY] def _init_worker_gm(gn_toshare): diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py index a8dbd32..d263828 100644 --- a/gklearn/kernels/graph_kernel.py +++ b/gklearn/kernels/graph_kernel.py @@ -37,7 +37,7 @@ class GraphKernel(object): raise Exception('The graph list given is empty. No computation was performed.') else: self._graphs = [g.copy() for g in graphs[0]] - self._gram_matrix = self.__compute_gram_matrix() + self._gram_matrix = self._compute_gram_matrix() self._gram_matrix_unnorm = np.copy(self._gram_matrix) if self._normalize: self._gram_matrix = self.normalize_gm(self._gram_matrix) @@ -45,17 +45,17 @@ class GraphKernel(object): elif len(graphs) == 2: if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): - kernel = self.__compute_single_kernel(graphs[0].copy(), graphs[1].copy()) + kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy()) return kernel, self._run_time elif self.is_graph(graphs[0]) and isinstance(graphs[1], list): g1 = graphs[0].copy() g_list = [g.copy() for g in graphs[1]] - kernel_list = self.__compute_kernel_list(g1, g_list) + kernel_list = self._compute_kernel_list(g1, g_list) return kernel_list, self._run_time elif isinstance(graphs[0], list) and self.is_graph(graphs[1]): g1 = graphs[1].copy() g_list = [g.copy() for g in graphs[0]] - kernel_list = self.__compute_kernel_list(g1, g_list) + kernel_list = self._compute_kernel_list(g1, g_list) return kernel_list, self._run_time else: raise Exception('Cannot detect graphs.') @@ -99,7 +99,7 @@ class GraphKernel(object): return dis_mat, dis_max, dis_min, dis_mean - def __compute_gram_matrix(self): + def _compute_gram_matrix(self): start_time = time.time() if self._parallel == 'imap_unordered': @@ -125,7 +125,7 @@ class GraphKernel(object): pass - def __compute_kernel_list(self, g1, g_list): + def _compute_kernel_list(self, g1, g_list): start_time = time.time() if self._parallel == 'imap_unordered': @@ -151,7 +151,7 @@ class GraphKernel(object): pass - def __compute_single_kernel(self, g1, g2): + def _compute_single_kernel(self, g1, g2): start_time = time.time() kernel = self._compute_single_kernel_series(g1, g2) diff --git a/gklearn/kernels/marginalized.py b/gklearn/kernels/marginalized.py index 499d51b..75355b1 100644 --- a/gklearn/kernels/marginalized.py +++ b/gklearn/kernels/marginalized.py @@ -33,25 +33,25 @@ class Marginalized(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__p_quit = kwargs.get('p_quit', 0.5) - self.__n_iteration = kwargs.get('n_iteration', 10) - self.__remove_totters = kwargs.get('remove_totters', False) - self.__ds_infos = kwargs.get('ds_infos', {}) - self.__n_iteration = int(self.__n_iteration) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._p_quit = kwargs.get('p_quit', 0.5) + self._n_iteration = kwargs.get('n_iteration', 10) + self._remove_totters = kwargs.get('remove_totters', False) + self._ds_infos = kwargs.get('ds_infos', {}) + self._n_iteration = int(self._n_iteration) def _compute_gm_series(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) - if self.__remove_totters: + if self._remove_totters: if self._verbose >= 2: iterator = tqdm(self._graphs, desc='removing tottering', file=sys.stdout) else: iterator = self._graphs # @todo: this may not work. - self._graphs = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] + self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -63,7 +63,7 @@ class Marginalized(GraphKernel): else: iterator = itr for i, j in iterator: - kernel = self.__kernel_do(self._graphs[i], self._graphs[j]) + kernel = self._kernel_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # @todo: no directed graph considered? @@ -71,9 +71,9 @@ class Marginalized(GraphKernel): def _compute_gm_imap_unordered(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) - if self.__remove_totters: + if self._remove_totters: pool = Pool(self._n_jobs) itr = range(0, len(self._graphs)) if len(self._graphs) < 100 * self._n_jobs: @@ -105,16 +105,16 @@ class Marginalized(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) - if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + if self._remove_totters: + g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. if self._verbose >= 2: iterator = tqdm(g_list, desc='removing tottering', file=sys.stdout) else: iterator = g_list # @todo: this may not work. - g_list = [untotterTransformation(G, self.__node_labels, self.__edge_labels) for G in iterator] + g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] # compute kernel list. kernel_list = [None] * len(g_list) @@ -123,17 +123,17 @@ class Marginalized(GraphKernel): else: iterator = range(len(g_list)) for i in iterator: - kernel = self.__kernel_do(g1, g_list[i]) + kernel = self._kernel_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list def _compute_kernel_list_imap_unordered(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) - if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. + if self._remove_totters: + g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. pool = Pool(self._n_jobs) itr = range(0, len(g_list)) if len(g_list) < 100 * self._n_jobs: @@ -171,19 +171,19 @@ class Marginalized(GraphKernel): def _wrapper_kernel_list_do(self, itr): - return itr, self.__kernel_do(G_g1, G_g_list[itr]) + return itr, self._kernel_do(G_g1, G_g_list[itr]) def _compute_single_kernel_series(self, g1, g2): - self.__add_dummy_labels([g1] + [g2]) - if self.__remove_totters: - g1 = untotterTransformation(g1, self.__node_labels, self.__edge_labels) # @todo: this may not work. - g2 = untotterTransformation(g2, self.__node_labels, self.__edge_labels) - kernel = self.__kernel_do(g1, g2) + self._add_dummy_labels([g1] + [g2]) + if self._remove_totters: + g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. + g2 = untotterTransformation(g2, self._node_labels, self._edge_labels) + kernel = self._kernel_do(g1, g2) return kernel - def __kernel_do(self, g1, g2): + def _kernel_do(self, g1, g2): """Compute marginalized graph kernel between 2 graphs. Parameters @@ -205,7 +205,7 @@ class Marginalized(GraphKernel): p_init_G1 = 1 / num_nodes_G1 p_init_G2 = 1 / num_nodes_G2 - q = self.__p_quit * self.__p_quit + q = self._p_quit * self._p_quit r1 = q # # initial R_inf @@ -260,36 +260,36 @@ class Marginalized(GraphKernel): if len(g2[node2]) > 0: R_inf[(node1, node2)] = r1 else: - R_inf[(node1, node2)] = self.__p_quit + R_inf[(node1, node2)] = self._p_quit else: if len(g2[node2]) > 0: - R_inf[(node1, node2)] = self.__p_quit + R_inf[(node1, node2)] = self._p_quit else: R_inf[(node1, node2)] = 1 # compute all transition probability first. t_dict = {} - if self.__n_iteration > 1: + if self._n_iteration > 1: for node1 in g1.nodes(): neighbor_n1 = g1[node1] # the transition probability distribution in the random walks # generating step (uniform distribution over the vertices adjacent # to the current vertex) if len(neighbor_n1) > 0: - p_trans_n1 = (1 - self.__p_quit) / len(neighbor_n1) + p_trans_n1 = (1 - self._p_quit) / len(neighbor_n1) for node2 in g2.nodes(): neighbor_n2 = g2[node2] if len(neighbor_n2) > 0: - p_trans_n2 = (1 - self.__p_quit) / len(neighbor_n2) + p_trans_n2 = (1 - self._p_quit) / len(neighbor_n2) for neighbor1 in neighbor_n1: for neighbor2 in neighbor_n2: t_dict[(node1, node2, neighbor1, neighbor2)] = \ p_trans_n1 * p_trans_n2 * \ - deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ - deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) + deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self._node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self._node_labels)) * \ + deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self._edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self._edge_labels)) # Compute R_inf with a simple interative method - for i in range(2, self.__n_iteration + 1): + for i in range(2, self._n_iteration + 1): R_inf_old = R_inf.copy() # Compute R_inf for each pair of nodes @@ -311,7 +311,7 @@ class Marginalized(GraphKernel): # add elements of R_inf up and compute kernel. for (n1, n2), value in R_inf.items(): - s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) + s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self._node_labels), tuple(g2.nodes[n2][nl] for nl in self._node_labels)) kernel += s * value # ref [1] equation (6) return kernel @@ -320,19 +320,19 @@ class Marginalized(GraphKernel): def _wrapper_kernel_do(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do(G_gn[i], G_gn[j]) + return i, j, self._kernel_do(G_gn[i], G_gn[j]) def _wrapper_untotter(self, i): - return i, untotterTransformation(self._graphs[i], self.__node_labels, self.__edge_labels) # @todo: this may not work. + return i, untotterTransformation(self._graphs[i], self._node_labels, self._edge_labels) # @todo: this may not work. - def __add_dummy_labels(self, Gn): - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_labels(self, Gn): + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file + self._edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/path_up_to_h.py b/gklearn/kernels/path_up_to_h.py index d8cc387..e9869ea 100644 --- a/gklearn/kernels/path_up_to_h.py +++ b/gklearn/kernels/path_up_to_h.py @@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__depth = int(kwargs.get('depth', 10)) - self.__k_func = kwargs.get('k_func', 'MinMax') - self.__compute_method = kwargs.get('compute_method', 'trie') - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._depth = int(kwargs.get('depth', 10)) + self._k_func = kwargs.get('k_func', 'MinMax') + self._compute_method = kwargs.get('compute_method', 'trie') + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) from itertools import combinations_with_replacement itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) @@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - if self.__compute_method == 'trie': - all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] + if self._compute_method == 'trie': + all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: - kernel = self.__kernel_do_trie(all_paths[i], all_paths[j]) + kernel = self._kernel_do_trie(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: - all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] + all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: - kernel = self.__kernel_do_naive(all_paths[i], all_paths[j]) + kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel @@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _compute_gm_imap_unordered(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. @@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: chunksize = 100 all_paths = [[] for _ in range(len(self._graphs))] - if self.__compute_method == 'trie' and self.__k_func is not None: + if self._compute_method == 'trie' and self._k_func is not None: get_ps_fun = self._wrapper_find_all_path_as_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: + elif self._compute_method != 'trie' and self._k_func is not None: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) @@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - if self.__compute_method == 'trie' and self.__k_func is not None: + if self._compute_method == 'trie' and self._k_func is not None: def init_worker(trie_toshare): global G_trie G_trie = trie_toshare do_fun = self._wrapper_kernel_do_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: + elif self._compute_method != 'trie' and self._k_func is not None: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare @@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def init_worker(plist_toshare): global G_plist G_plist = plist_toshare - do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this? + do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) @@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _compute_kernel_list_series(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) if self._verbose >= 2: iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) @@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel_list = [None] * len(g_list) - if self.__compute_method == 'trie': - paths_g1 = self.__find_all_path_as_trie(g1) - paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps] + if self._compute_method == 'trie': + paths_g1 = self._find_all_path_as_trie(g1) + paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps] for i in iterator_kernel: - kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i]) + kernel = self._kernel_do_trie(paths_g1, paths_g_list[i]) kernel_list[i] = kernel else: - paths_g1 = self.__find_all_paths_until_length(g1) - paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps] + paths_g1 = self._find_all_paths_until_length(g1) + paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps] for i in iterator_kernel: - kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i]) + kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) kernel_list[i] = kernel return kernel_list def _compute_kernel_list_imap_unordered(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. @@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: chunksize = 100 paths_g_list = [[] for _ in range(len(g_list))] - if self.__compute_method == 'trie' and self.__k_func is not None: - paths_g1 = self.__find_all_path_as_trie(g1) + if self._compute_method == 'trie' and self._k_func is not None: + paths_g1 = self._find_all_path_as_trie(g1) get_ps_fun = self._wrapper_find_all_path_as_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: - paths_g1 = self.__find_all_paths_until_length(g1) + elif self._compute_method != 'trie' and self._k_func is not None: + paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: - paths_g1 = self.__find_all_paths_until_length(g1) + paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) if self._verbose >= 2: iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), @@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_list_do(self, itr): - if self.__compute_method == 'trie' and self.__k_func is not None: - return itr, self.__kernel_do_trie(G_p1, G_plist[itr]) - elif self.__compute_method != 'trie' and self.__k_func is not None: - return itr, self.__kernel_do_naive(G_p1, G_plist[itr]) + if self._compute_method == 'trie' and self._k_func is not None: + return itr, self._kernel_do_trie(G_p1, G_plist[itr]) + elif self._compute_method != 'trie' and self._k_func is not None: + return itr, self._kernel_do_naive(G_p1, G_plist[itr]) else: - return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr]) + return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) def _compute_single_kernel_series(self, g1, g2): - self.__add_dummy_labels([g1] + [g2]) - if self.__compute_method == 'trie': - paths_g1 = self.__find_all_path_as_trie(g1) - paths_g2 = self.__find_all_path_as_trie(g2) - kernel = self.__kernel_do_trie(paths_g1, paths_g2) + self._add_dummy_labels([g1] + [g2]) + if self._compute_method == 'trie': + paths_g1 = self._find_all_path_as_trie(g1) + paths_g2 = self._find_all_path_as_trie(g2) + kernel = self._kernel_do_trie(paths_g1, paths_g2) else: - paths_g1 = self.__find_all_paths_until_length(g1) - paths_g2 = self.__find_all_paths_until_length(g2) - kernel = self.__kernel_do_naive(paths_g1, paths_g2) + paths_g1 = self._find_all_paths_until_length(g1) + paths_g2 = self._find_all_paths_until_length(g2) + kernel = self._kernel_do_naive(paths_g1, paths_g2) return kernel - def __kernel_do_trie(self, trie1, trie2): + def _kernel_do_trie(self, trie1, trie2): """Compute path graph kernels up to depth d between 2 graphs using trie. Parameters @@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel : float Path kernel up to h between 2 graphs. """ - if self.__k_func == 'tanimoto': + if self._k_func == 'tanimoto': # traverse all paths in graph1 and search them in graph2. Deep-first # search is applied. def traverseTrie1t(root, trie2, setlist, pcurrent=[]): @@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # print(setlist) kernel = setlist[0] / setlist[1] - elif self.__k_func == 'MinMax': # MinMax kernel + elif self._k_func == 'MinMax': # MinMax kernel # traverse all paths in graph1 and search them in graph2. Deep-first # search is applied. def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): @@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_do_trie(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j]) + return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) - def __kernel_do_naive(self, paths1, paths2): + def _kernel_do_naive(self, paths1, paths2): """Compute path graph kernels up to depth d between 2 graphs naively. Parameters @@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None """ all_paths = list(set(paths1 + paths2)) - if self.__k_func == 'tanimoto': + if self._k_func == 'tanimoto': length_union = len(set(paths1 + paths2)) kernel = (len(set(paths1)) + len(set(paths2)) - length_union) / length_union @@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # kernel_uv = np.dot(vector1, vector2) # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) - elif self.__k_func == 'MinMax': # MinMax kernel + elif self._k_func == 'MinMax': # MinMax kernel path_count1 = Counter(paths1) path_count2 = Counter(paths2) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) @@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel = np.sum(np.minimum(vector1, vector2)) / \ np.sum(np.maximum(vector1, vector2)) - elif self.__k_func is None: # no sub-kernel used; compare paths directly. + elif self._k_func is None: # no sub-kernel used; compare paths directly. path_count1 = Counter(paths1) path_count2 = Counter(paths2) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) @@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_do_naive(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j]) + return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) - def __find_all_path_as_trie(self, G): + def _find_all_path_as_trie(self, G): # all_path = find_all_paths_until_length(G, length, ds_attrs, # node_label=node_label, # edge_label=edge_label) @@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # them. Deep-first search is applied. Notice the reverse of each path is # also stored to the trie. def traverseGraph(root, ptrie, G, pcurrent=[]): - if len(pcurrent) < self.__depth + 1: + if len(pcurrent) < self._depth + 1: for neighbor in G[root]: if neighbor not in pcurrent: pcurrent.append(neighbor) - plstr = self.__paths2labelseqs([pcurrent], G) + plstr = self._paths2labelseqs([pcurrent], G) ptrie.insertWord(plstr[0]) traverseGraph(neighbor, ptrie, G, pcurrent) del pcurrent[-1] @@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None ptrie = Trie() path_l = [[n] for n in G.nodes] # paths of length l - path_l_str = self.__paths2labelseqs(path_l, G) + path_l_str = self._paths2labelseqs(path_l, G) for p in path_l_str: ptrie.insertWord(p) for n in G.nodes: @@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_find_all_path_as_trie(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, self.__find_all_path_as_trie(g) + return i, self._find_all_path_as_trie(g) # @todo: (can be removed maybe) this method find paths repetively, it could be faster. - def __find_all_paths_until_length(self, G, tolabelseqs=True): + def _find_all_paths_until_length(self, G, tolabelseqs=True): """Find all paths no longer than a certain maximum length in a graph. A recursive depth first search is applied. @@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None """ # path_l = [tuple([n]) for n in G.nodes] # paths of length l # all_paths = path_l[:] - # for l in range(1, self.__depth + 1): + # for l in range(1, self._depth + 1): # path_l_new = [] # for path in path_l: # for neighbor in G[path[-1]]: @@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None path_l = [[n] for n in G.nodes] # paths of length l all_paths = [p.copy() for p in path_l] - for l in range(1, self.__depth + 1): + for l in range(1, self._depth + 1): path_lplus1 = [] for path in path_l: for neighbor in G[path[-1]]: @@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None all_paths += path_lplus1 path_l = [p.copy() for p in path_lplus1] - # for i in range(0, self.__depth + 1): + # for i in range(0, self._depth + 1): # new_paths = find_all_paths(G, i) # if new_paths == []: # break @@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # consider labels # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) # print() - return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) + return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): g = itr_item[0] i = itr_item[1] - return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs) + return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) - def __paths2labelseqs(self, plist, G): - if len(self.__node_labels) > 0: - if len(self.__edge_labels) > 0: + def _paths2labelseqs(self, plist, G): + if len(self._node_labels) > 0: + if len(self._edge_labels) > 0: path_strs = [] for path in plist: pths_tmp = [] for idx, node in enumerate(path[:-1]): - pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) - pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) - pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels)) + pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) + pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) + pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels)) path_strs.append(tuple(pths_tmp)) else: path_strs = [] for path in plist: pths_tmp = [] for node in path: - pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) + pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) path_strs.append(tuple(pths_tmp)) return path_strs else: - if len(self.__edge_labels) > 0: + if len(self._edge_labels) > 0: path_strs = [] for path in plist: if len(path) == 1: @@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: pths_tmp = [] for idx, node in enumerate(path[:-1]): - pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) + pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) path_strs.append(tuple(pths_tmp)) return path_strs else: @@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # return [tuple([len(path)]) for path in all_paths] - def __add_dummy_labels(self, Gn): - if self.__k_func is not None: - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_labels(self, Gn): + if self._k_func is not None: + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file + self._edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/random_walk_meta.py b/gklearn/kernels/random_walk_meta.py index f67f33e..c30c8ef 100644 --- a/gklearn/kernels/random_walk_meta.py +++ b/gklearn/kernels/random_walk_meta.py @@ -76,11 +76,11 @@ class RandomWalkMeta(GraphKernel): def _add_dummy_labels(self, Gn): - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file + self._edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/shortest_path.py b/gklearn/kernels/shortest_path.py index b068e6e..794095e 100644 --- a/gklearn/kernels/shortest_path.py +++ b/gklearn/kernels/shortest_path.py @@ -26,11 +26,11 @@ class ShortestPath(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__node_attrs = kwargs.get('node_attrs', []) - self.__edge_weight = kwargs.get('edge_weight', None) - self.__node_kernels = kwargs.get('node_kernels', None) - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._node_attrs = kwargs.get('node_attrs', []) + self._edge_weight = kwargs.get('edge_weight', None) + self._node_kernels = kwargs.get('node_kernels', None) + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): @@ -39,7 +39,7 @@ class ShortestPath(GraphKernel): iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) else: iterator = self._graphs - self._graphs = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] + self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -51,7 +51,7 @@ class ShortestPath(GraphKernel): else: iterator = itr for i, j in iterator: - kernel = self.__sp_do(self._graphs[i], self._graphs[j]) + kernel = self._sp_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel @@ -92,12 +92,12 @@ class ShortestPath(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): # get shortest path graphs of g1 and each graph in g_list. - g1 = getSPGraph(g1, edge_weight=self.__edge_weight) + g1 = getSPGraph(g1, edge_weight=self._edge_weight) if self._verbose >= 2: iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) else: iterator = g_list - g_list = [getSPGraph(g, edge_weight=self.__edge_weight) for g in iterator] + g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] # compute kernel list. kernel_list = [None] * len(g_list) @@ -106,7 +106,7 @@ class ShortestPath(GraphKernel): else: iterator = range(len(g_list)) for i in iterator: - kernel = self.__sp_do(g1, g_list[i]) + kernel = self._sp_do(g1, g_list[i]) kernel_list[i] = kernel return kernel_list @@ -114,7 +114,7 @@ class ShortestPath(GraphKernel): def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest path graphs of g1 and each graph in g_list. - g1 = getSPGraph(g1, edge_weight=self.__edge_weight) + g1 = getSPGraph(g1, edge_weight=self._edge_weight) pool = Pool(self._n_jobs) get_sp_graphs_fun = self._wrapper_get_sp_graphs itr = zip(g_list, range(0, len(g_list))) @@ -151,55 +151,55 @@ class ShortestPath(GraphKernel): def _wrapper_kernel_list_do(self, itr): - return itr, self.__sp_do(G_g1, G_gl[itr]) + return itr, self._sp_do(G_g1, G_gl[itr]) def _compute_single_kernel_series(self, g1, g2): - g1 = getSPGraph(g1, edge_weight=self.__edge_weight) - g2 = getSPGraph(g2, edge_weight=self.__edge_weight) - kernel = self.__sp_do(g1, g2) + g1 = getSPGraph(g1, edge_weight=self._edge_weight) + g2 = getSPGraph(g2, edge_weight=self._edge_weight) + kernel = self._sp_do(g1, g2) return kernel def _wrapper_get_sp_graphs(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, getSPGraph(g, edge_weight=self.__edge_weight) + return i, getSPGraph(g, edge_weight=self._edge_weight) - def __sp_do(self, g1, g2): + def _sp_do(self, g1, g2): kernel = 0 # compute shortest path matrices first, method borrowed from FCSP. vk_dict = {} # shortest path matrices dict - if len(self.__node_labels) > 0: + if len(self._node_labels) > 0: # node symb and non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['mix'] + if len(self._node_attrs) > 0: + kn = self._node_kernels['mix'] for n1, n2 in product( g1.nodes(data=True), g2.nodes(data=True)): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] + n1_labels = [n1[1][nl] for nl in self._node_labels] + n2_labels = [n2[1][nl] for nl in self._node_labels] + n1_attrs = [n1[1][na] for na in self._node_attrs] + n2_attrs = [n2[1][na] for na in self._node_attrs] vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) # node symb labeled else: - kn = self.__node_kernels['symb'] + kn = self._node_kernels['symb'] for n1 in g1.nodes(data=True): for n2 in g2.nodes(data=True): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] + n1_labels = [n1[1][nl] for nl in self._node_labels] + n2_labels = [n2[1][nl] for nl in self._node_labels] vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) else: # node non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['nsymb'] + if len(self._node_attrs) > 0: + kn = self._node_kernels['nsymb'] for n1 in g1.nodes(data=True): for n2 in g2.nodes(data=True): - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] + n1_attrs = [n1[1][na] for na in self._node_attrs] + n2_attrs = [n2[1][na] for na in self._node_attrs] vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) # node unlabeled else: @@ -210,7 +210,7 @@ class ShortestPath(GraphKernel): return kernel # compute graph kernels - if self.__ds_infos['directed']: + if self._ds_infos['directed']: for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): if e1[2]['cost'] == e2[2]['cost']: nk11, nk22 = vk_dict[(e1[0], e2[0])], vk_dict[(e1[1], e2[1])] @@ -261,4 +261,4 @@ class ShortestPath(GraphKernel): def _wrapper_sp_do(self, itr): i = itr[0] j = itr[1] - return i, j, self.__sp_do(G_gs[i], G_gs[j]) \ No newline at end of file + return i, j, self._sp_do(G_gs[i], G_gs[j]) \ No newline at end of file diff --git a/gklearn/kernels/structural_sp.py b/gklearn/kernels/structural_sp.py index 254f2cc..19322a7 100644 --- a/gklearn/kernels/structural_sp.py +++ b/gklearn/kernels/structural_sp.py @@ -26,15 +26,15 @@ class StructuralSP(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__node_attrs = kwargs.get('node_attrs', []) - self.__edge_attrs = kwargs.get('edge_attrs', []) - self.__edge_weight = kwargs.get('edge_weight', None) - self.__node_kernels = kwargs.get('node_kernels', None) - self.__edge_kernels = kwargs.get('edge_kernels', None) - self.__compute_method = kwargs.get('compute_method', 'naive') - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._node_attrs = kwargs.get('node_attrs', []) + self._edge_attrs = kwargs.get('edge_attrs', []) + self._edge_weight = kwargs.get('edge_weight', None) + self._node_kernels = kwargs.get('node_kernels', None) + self._edge_kernels = kwargs.get('edge_kernels', None) + self._compute_method = kwargs.get('compute_method', 'naive') + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): @@ -44,12 +44,12 @@ class StructuralSP(GraphKernel): iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) else: iterator = self._graphs - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for g in iterator: - splist.append(self.__get_sps_as_trie(g)) + splist.append(self._get_sps_as_trie(g)) else: for g in iterator: - splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) + splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -60,14 +60,14 @@ class StructuralSP(GraphKernel): iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for i, j in iterator: - kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) + kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: for i, j in iterator: - kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) + kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) # if(kernel > 1): # print("error here ") gram_matrix[i][j] = kernel @@ -86,7 +86,7 @@ class StructuralSP(GraphKernel): else: chunksize = 100 # get shortest path graphs of self._graphs - if self.__compute_method == 'trie': + if self._compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive @@ -107,8 +107,8 @@ class StructuralSP(GraphKernel): global G_spl, G_gs G_spl = spl_toshare G_gs = gs_toshare - if self.__compute_method == 'trie': - do_fun = self.__wrapper_ssp_do_trie + if self._compute_method == 'trie': + do_fun = self._wrapper_ssp_do_trie else: do_fun = self._wrapper_ssp_do_naive parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, @@ -119,18 +119,18 @@ class StructuralSP(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [] if self._verbose >= 2: iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) else: iterator = g_list - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for g in iterator: - splist.append(self.__get_sps_as_trie(g)) + splist.append(self._get_sps_as_trie(g)) else: for g in iterator: - splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) + splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute kernel list. kernel_list = [None] * len(g_list) @@ -138,13 +138,13 @@ class StructuralSP(GraphKernel): iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for i in iterator: - kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i]) + kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel else: for i in iterator: - kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i]) + kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel return kernel_list @@ -152,7 +152,7 @@ class StructuralSP(GraphKernel): def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [None] * len(g_list) pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) @@ -161,7 +161,7 @@ class StructuralSP(GraphKernel): else: chunksize = 100 # get shortest path graphs of g_list - if self.__compute_method == 'trie': + if self._compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive @@ -184,8 +184,8 @@ class StructuralSP(GraphKernel): G_spl = spl_toshare G_g1 = g1_toshare G_gl = gl_toshare - if self.__compute_method == 'trie': - do_fun = self.__wrapper_ssp_do_trie + if self._compute_method == 'trie': + do_fun = self._wrapper_ssp_do_trie else: do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): @@ -199,36 +199,36 @@ class StructuralSP(GraphKernel): def _wrapper_kernel_list_do(self, itr): - return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) + return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) def _compute_single_kernel_series(self, g1, g2): - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) - sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed']) - if self.__compute_method == 'trie': - kernel = self.__ssp_do_trie(g1, g2, sp1, sp2) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) + sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed']) + if self._compute_method == 'trie': + kernel = self._ssp_do_trie(g1, g2, sp1, sp2) else: - kernel = self.__ssp_do_naive(g1, g2, sp1, sp2) + kernel = self._ssp_do_naive(g1, g2, sp1, sp2) return kernel def _wrapper_get_sps_naive(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']) + return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']) - def __ssp_do_naive(self, g1, g2, spl1, spl2): + def _ssp_do_naive(self, g1, g2, spl1, spl2): kernel = 0 # First, compute shortest path matrices, method borrowed from FCSP. - vk_dict = self.__get_all_node_kernels(g1, g2) + vk_dict = self._get_all_node_kernels(g1, g2) # Then, compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we # went though. For dense graphs, this would be slow. - ek_dict = self.__get_all_edge_kernels(g1, g2) + ek_dict = self._get_all_edge_kernels(g1, g2) # compute graph kernels if vk_dict: @@ -314,27 +314,27 @@ class StructuralSP(GraphKernel): def _wrapper_ssp_do_naive(self, itr): i = itr[0] j = itr[1] - return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) + return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) - def __get_all_node_kernels(self, g1, g2): + def _get_all_node_kernels(self, g1, g2): return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) - def __get_all_edge_kernels(self, g1, g2): + def _get_all_edge_kernels(self, g1, g2): # compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we # went though. For dense graphs, this would be slow. ek_dict = {} # dict of edge kernels - if len(self.__edge_labels) > 0: + if len(self._edge_labels) > 0: # edge symb and non-synb labeled - if len(self.__edge_attrs) > 0: - ke = self.__edge_kernels['mix'] + if len(self._edge_attrs) > 0: + ke = self._edge_kernels['mix'] for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): - e1_labels = [e1[2][el] for el in self.__edge_labels] - e2_labels = [e2[2][el] for el in self.__edge_labels] - e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] - e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] + e1_labels = [e1[2][el] for el in self._edge_labels] + e2_labels = [e2[2][el] for el in self._edge_labels] + e1_attrs = [e1[2][ea] for ea in self._edge_attrs] + e2_attrs = [e2[2][ea] for ea in self._edge_attrs] ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp @@ -342,11 +342,11 @@ class StructuralSP(GraphKernel): ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp # edge symb labeled else: - ke = self.__edge_kernels['symb'] + ke = self._edge_kernels['symb'] for e1 in g1.edges(data=True): for e2 in g2.edges(data=True): - e1_labels = [e1[2][el] for el in self.__edge_labels] - e2_labels = [e2[2][el] for el in self.__edge_labels] + e1_labels = [e1[2][el] for el in self._edge_labels] + e2_labels = [e2[2][el] for el in self._edge_labels] ek_temp = ke(e1_labels, e2_labels) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp @@ -354,12 +354,12 @@ class StructuralSP(GraphKernel): ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp else: # edge non-synb labeled - if len(self.__edge_attrs) > 0: - ke = self.__edge_kernels['nsymb'] + if len(self._edge_attrs) > 0: + ke = self._edge_kernels['nsymb'] for e1 in g1.edges(data=True): for e2 in g2.edges(data=True): - e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] - e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] + e1_attrs = [e1[2][ea] for ea in self._edge_attrs] + e2_attrs = [e2[2][ea] for ea in self._edge_attrs] ek_temp = ke(e1_attrs, e2_attrs) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py index 61ffd47..c27ebde 100644 --- a/gklearn/kernels/treelet.py +++ b/gklearn/kernels/treelet.py @@ -28,16 +28,16 @@ class Treelet(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__sub_kernel = kwargs.get('sub_kernel', None) - self.__ds_infos = kwargs.get('ds_infos', {}) - if self.__sub_kernel is None: + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._sub_kernel = kwargs.get('sub_kernel', None) + self._ds_infos = kwargs.get('ds_infos', {}) + if self._sub_kernel is None: raise Exception('Sub kernel not set.') def _compute_gm_series(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. @@ -47,7 +47,7 @@ class Treelet(GraphKernel): else: iterator = self._graphs for g in iterator: - canonkeys.append(self.__get_canonkeys(g)) + canonkeys.append(self._get_canonkeys(g)) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -59,7 +59,7 @@ class Treelet(GraphKernel): else: iterator = itr for i, j in iterator: - kernel = self.__kernel_do(canonkeys[i], canonkeys[j]) + kernel = self._kernel_do(canonkeys[i], canonkeys[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel # @todo: no directed graph considered? @@ -67,7 +67,7 @@ class Treelet(GraphKernel): def _compute_gm_imap_unordered(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. @@ -103,18 +103,18 @@ class Treelet(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. - canonkeys_1 = self.__get_canonkeys(g1) + canonkeys_1 = self._get_canonkeys(g1) canonkeys_list = [] if self._verbose >= 2: iterator = tqdm(g_list, desc='getting canonkeys', file=sys.stdout) else: iterator = g_list for g in iterator: - canonkeys_list.append(self.__get_canonkeys(g)) + canonkeys_list.append(self._get_canonkeys(g)) # compute kernel list. kernel_list = [None] * len(g_list) @@ -123,18 +123,18 @@ class Treelet(GraphKernel): else: iterator = range(len(g_list)) for i in iterator: - kernel = self.__kernel_do(canonkeys_1, canonkeys_list[i]) + kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) kernel_list[i] = kernel return kernel_list def _compute_kernel_list_imap_unordered(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. - canonkeys_1 = self.__get_canonkeys(g1) + canonkeys_1 = self._get_canonkeys(g1) canonkeys_list = [[] for _ in range(len(g_list))] pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) @@ -173,18 +173,18 @@ class Treelet(GraphKernel): def _wrapper_kernel_list_do(self, itr): - return itr, self.__kernel_do(G_ck_1, G_ck_list[itr]) + return itr, self._kernel_do(G_ck_1, G_ck_list[itr]) def _compute_single_kernel_series(self, g1, g2): - self.__add_dummy_labels([g1] + [g2]) - canonkeys_1 = self.__get_canonkeys(g1) - canonkeys_2 = self.__get_canonkeys(g2) - kernel = self.__kernel_do(canonkeys_1, canonkeys_2) + self._add_dummy_labels([g1] + [g2]) + canonkeys_1 = self._get_canonkeys(g1) + canonkeys_2 = self._get_canonkeys(g2) + kernel = self._kernel_do(canonkeys_1, canonkeys_2) return kernel - def __kernel_do(self, canonkey1, canonkey2): + def _kernel_do(self, canonkey1, canonkey2): """Compute treelet graph kernel between 2 graphs. Parameters @@ -200,17 +200,17 @@ class Treelet(GraphKernel): keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) - kernel = self.__sub_kernel(vector1, vector2) + kernel = self._sub_kernel(vector1, vector2) return kernel def _wrapper_kernel_do(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do(G_canonkeys[i], G_canonkeys[j]) + return i, j, self._kernel_do(G_canonkeys[i], G_canonkeys[j]) - def __get_canonkeys(self, G): + def _get_canonkeys(self, G): """Generate canonical keys of all treelets in a graph. Parameters @@ -236,7 +236,7 @@ class Treelet(GraphKernel): patterns['0'] = list(G.nodes()) canonkey['0'] = nx.number_of_nodes(G) for i in range(1, 6): # for i in range(1, 6): - patterns[str(i)] = find_all_paths(G, i, self.__ds_infos['directed']) + patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed']) canonkey[str(i)] = len(patterns[str(i)]) # n-star patterns @@ -330,11 +330,11 @@ class Treelet(GraphKernel): ### pattern obtained in the structural analysis section above, which is a ### string corresponding to a unique treelet. A dictionary is built to keep ### track of the amount of every treelet. - if len(self.__node_labels) > 0 or len(self.__edge_labels) > 0: + if len(self._node_labels) > 0 or len(self._edge_labels) > 0: canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. # linear patterns - canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.__node_labels)) + canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels)) for key in canonkey_t: canonkey_l[('0', key)] = canonkey_t[key] @@ -343,9 +343,9 @@ class Treelet(GraphKernel): for pattern in patterns[str(i)]: canonlist = [] for idx, node in enumerate(pattern[:-1]): - canonlist.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) - canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.__edge_labels)) - canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.__node_labels)) + canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) + canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels)) + canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels)) canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] treelet.append(tuple([str(i)] + canonkey_t)) canonkey_l.update(Counter(treelet)) @@ -356,13 +356,13 @@ class Treelet(GraphKernel): for pattern in patterns[str(i) + 'star']: canonlist = [] for leaf in pattern[1:]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) canonlist.append(tuple((nlabels, elabels))) canonlist.sort() canonlist = list(chain.from_iterable(canonlist)) canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + - [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist) treelet.append(canonkey_t) canonkey_l.update(Counter(treelet)) @@ -372,17 +372,17 @@ class Treelet(GraphKernel): for pattern in patterns['7']: canonlist = [] for leaf in pattern[1:3]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) canonlist.append(tuple((nlabels, elabels))) canonlist.sort() canonlist = list(chain.from_iterable(canonlist)) canonkey_t = tuple(['7'] - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist - + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] - + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)]) + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist + + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] + + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]) treelet.append(canonkey_t) canonkey_l.update(Counter(treelet)) @@ -391,38 +391,38 @@ class Treelet(GraphKernel): for pattern in patterns['11']: canonlist = [] for leaf in pattern[1:4]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) canonlist.append(tuple((nlabels, elabels))) canonlist.sort() canonlist = list(chain.from_iterable(canonlist)) canonkey_t = tuple(['b'] - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist - + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[4]][pattern[0]][el] for el in self.__edge_labels)] - + [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)]) + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist + + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)] + + [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]) treelet.append(canonkey_t) canonkey_l.update(Counter(treelet)) # pattern 10 treelet = [] for pattern in patterns['10']: - canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), - tuple(G[pattern[5]][pattern[4]][el] for el in self.__edge_labels)] + canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), + tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)] canonlist = [] for leaf in pattern[1:3]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) canonlist.append(tuple((nlabels, elabels))) canonlist.sort() canonkey0 = list(chain.from_iterable(canonlist)) canonkey_t = tuple(['a'] - + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] - + [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[4]][pattern[3]][el] for el in self.__edge_labels)] - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + + [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)] + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] + canonkey4 + canonkey0) treelet.append(canonkey_t) canonkey_l.update(Counter(treelet)) @@ -432,15 +432,15 @@ class Treelet(GraphKernel): for pattern in patterns['12']: canonlist0 = [] for leaf in pattern[1:3]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[0]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels) canonlist0.append(tuple((nlabels, elabels))) canonlist0.sort() canonlist0 = list(chain.from_iterable(canonlist0)) canonlist3 = [] for leaf in pattern[4:6]: - nlabels = tuple(G.nodes[leaf][nl] for nl in self.__node_labels) - elabels = tuple(G[leaf][pattern[3]][el] for el in self.__edge_labels) + nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels) + elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels) canonlist3.append(tuple((nlabels, elabels))) canonlist3.sort() canonlist3 = list(chain.from_iterable(canonlist3)) @@ -448,14 +448,14 @@ class Treelet(GraphKernel): # 2 possible key can be generated from 2 nodes with extended label 3, # select the one with lower lexicographic order. canonkey_t1 = tuple(['c'] - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + canonlist0 - + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0 + + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] + canonlist3) canonkey_t2 = tuple(['c'] - + [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels)] + canonlist3 - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] - + [tuple(G[pattern[0]][pattern[3]][el] for el in self.__edge_labels)] + + [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3 + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + + [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)] + canonlist0) treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) canonkey_l.update(Counter(treelet)) @@ -463,24 +463,24 @@ class Treelet(GraphKernel): # pattern 9 treelet = [] for pattern in patterns['9']: - canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.__node_labels), - tuple(G[pattern[4]][pattern[2]][el] for el in self.__edge_labels)] - canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.__node_labels), - tuple(G[pattern[5]][pattern[3]][el] for el in self.__edge_labels)] - prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.__node_labels), - tuple(G[pattern[2]][pattern[0]][el] for el in self.__edge_labels)] - prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.__node_labels), - tuple(G[pattern[3]][pattern[0]][el] for el in self.__edge_labels)] + canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels), + tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)] + canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels), + tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)] + prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels), + tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)] + prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels), + tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)] if prekey2 + canonkey2 < prekey3 + canonkey3: - canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ - + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ + canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ + + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ + prekey2 + prekey3 + canonkey2 + canonkey3 else: - canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.__node_labels)] \ - + [tuple(G[pattern[1]][pattern[0]][el] for el in self.__edge_labels)] \ + canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \ + + [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \ + prekey3 + prekey2 + canonkey3 + canonkey2 treelet.append(tuple(['9'] - + [tuple(G.nodes[pattern[0]][nl] for nl in self.__node_labels)] + + [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonkey_t)) canonkey_l.update(Counter(treelet)) @@ -492,15 +492,15 @@ class Treelet(GraphKernel): def _wrapper_get_canonkeys(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, self.__get_canonkeys(g) + return i, self._get_canonkeys(g) - def __add_dummy_labels(self, Gn): - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_labels(self, Gn): + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file + self._edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py index 8ab7634..124e1f4 100644 --- a/gklearn/kernels/weisfeiler_lehman.py +++ b/gklearn/kernels/weisfeiler_lehman.py @@ -25,11 +25,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__height = int(kwargs.get('height', 0)) - self.__base_kernel = kwargs.get('base_kernel', 'subtree') - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._height = int(kwargs.get('height', 0)) + self._base_kernel = kwargs.get('base_kernel', 'subtree') + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): @@ -37,23 +37,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge import warnings warnings.warn('A part of the computation is parallelized.') - self.__add_dummy_node_labels(self._graphs) + self._add_dummy_node_labels(self._graphs) # for WL subtree kernel - if self.__base_kernel == 'subtree': - gram_matrix = self.__subtree_kernel_do(self._graphs) + if self._base_kernel == 'subtree': + gram_matrix = self._subtree_kernel_do(self._graphs) # for WL shortest path kernel - elif self.__base_kernel == 'sp': - gram_matrix = self.__sp_kernel_do(self._graphs) + elif self._base_kernel == 'sp': + gram_matrix = self._sp_kernel_do(self._graphs) # for WL edge kernel - elif self.__base_kernel == 'edge': - gram_matrix = self.__edge_kernel_do(self._graphs) + elif self._base_kernel == 'edge': + gram_matrix = self._edge_kernel_do(self._graphs) # for user defined base kernel else: - gram_matrix = self.__user_kernel_do(self._graphs) + gram_matrix = self._user_kernel_do(self._graphs) return gram_matrix @@ -70,23 +70,23 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge import warnings warnings.warn('A part of the computation is parallelized.') - self.__add_dummy_node_labels(g_list + [g1]) + self._add_dummy_node_labels(g_list + [g1]) # for WL subtree kernel - if self.__base_kernel == 'subtree': - gram_matrix = self.__subtree_kernel_do(g_list + [g1]) + if self._base_kernel == 'subtree': + gram_matrix = self._subtree_kernel_do(g_list + [g1]) # for WL shortest path kernel - elif self.__base_kernel == 'sp': - gram_matrix = self.__sp_kernel_do(g_list + [g1]) + elif self._base_kernel == 'sp': + gram_matrix = self._sp_kernel_do(g_list + [g1]) # for WL edge kernel - elif self.__base_kernel == 'edge': - gram_matrix = self.__edge_kernel_do(g_list + [g1]) + elif self._base_kernel == 'edge': + gram_matrix = self._edge_kernel_do(g_list + [g1]) # for user defined base kernel else: - gram_matrix = self.__user_kernel_do(g_list + [g1]) + gram_matrix = self._user_kernel_do(g_list + [g1]) return list(gram_matrix[-1][0:-1]) @@ -103,28 +103,28 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. - self.__add_dummy_node_labels([g1] + [g2]) + self._add_dummy_node_labels([g1] + [g2]) # for WL subtree kernel - if self.__base_kernel == 'subtree': - gram_matrix = self.__subtree_kernel_do([g1] + [g2]) + if self._base_kernel == 'subtree': + gram_matrix = self._subtree_kernel_do([g1] + [g2]) # for WL shortest path kernel - elif self.__base_kernel == 'sp': - gram_matrix = self.__sp_kernel_do([g1] + [g2]) + elif self._base_kernel == 'sp': + gram_matrix = self._sp_kernel_do([g1] + [g2]) # for WL edge kernel - elif self.__base_kernel == 'edge': - gram_matrix = self.__edge_kernel_do([g1] + [g2]) + elif self._base_kernel == 'edge': + gram_matrix = self._edge_kernel_do([g1] + [g2]) # for user defined base kernel else: - gram_matrix = self.__user_kernel_do([g1] + [g2]) + gram_matrix = self._user_kernel_do([g1] + [g2]) return gram_matrix[0][1] - def __subtree_kernel_do(self, Gn): + def _subtree_kernel_do(self, Gn): """Compute Weisfeiler-Lehman kernels between graphs. Parameters @@ -146,17 +146,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge for G in Gn: # set all labels into a tuple. for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. - G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self.__node_labels) + G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels) # get the set of original labels labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) # number of occurence of each label in G all_num_of_each_label.append(dict(Counter(labels_ori))) # Compute subtree kernel with the 0th iteration and add it to the final kernel. - self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) + self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) # iterate each height - for h in range(1, self.__height + 1): + for h in range(1, self._height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration @@ -199,12 +199,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge all_num_of_each_label.append(dict(Counter(labels_comp))) # Compute subtree kernel with h iterations and add it to the final kernel - self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) + self._compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) return gram_matrix - def __compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn): + def _compute_gram_matrix(self, gram_matrix, all_num_of_each_label, Gn): """Compute Gram matrix using the base kernel. """ if self._parallel == 'imap_unordered': @@ -218,12 +218,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge elif self._parallel is None: for i in range(len(gram_matrix)): for j in range(i, len(gram_matrix)): - gram_matrix[i][j] = self.__compute_subtree_kernel(all_num_of_each_label[i], + gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], all_num_of_each_label[j], gram_matrix[i][j]) gram_matrix[j][i] = gram_matrix[i][j] - def __compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): + def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): """Compute the subtree kernel. """ labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) @@ -240,7 +240,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): i = itr[0] j = itr[1] - return i, j, self.__compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) + return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) def _wl_spkernel_do(Gn, node_label, edge_label, height): @@ -469,11 +469,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge return gram_matrix - def __add_dummy_node_labels(self, Gn): - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_node_labels(self, Gn): + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] + self._node_labels = [SpecialLabel.DUMMY] class WLSubtree(WeisfeilerLehman): diff --git a/gklearn/preimage/generate_random_preimages_by_class.py b/gklearn/preimage/generate_random_preimages_by_class.py index 656579f..66f6c57 100644 --- a/gklearn/preimage/generate_random_preimages_by_class.py +++ b/gklearn/preimage/generate_random_preimages_by_class.py @@ -31,7 +31,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav if save_results: # create result files. print('creating output files...') - fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], dir_save) + fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], dir_save) dis_k_dataset_list = [] @@ -166,7 +166,7 @@ def generate_random_preimages_by_class(ds_name, rpg_options, kernel_options, sav print('\ncomplete.\n') -def __init_output_file_preimage(ds_name, gkernel, dir_output): +def _init_output_file_preimage(ds_name, gkernel, dir_output): if not os.path.exists(dir_output): os.makedirs(dir_output) fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' diff --git a/gklearn/preimage/kernel_knn_cv.py b/gklearn/preimage/kernel_knn_cv.py index 073fa31..1ae25d1 100644 --- a/gklearn/preimage/kernel_knn_cv.py +++ b/gklearn/preimage/kernel_knn_cv.py @@ -33,35 +33,35 @@ def kernel_knn_cv(ds_name, train_examples, knn_options, mpg_options, kernel_opti if save_results: # create result files. print('creating output files...') - fn_output_detail, fn_output_summary = __init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) + fn_output_detail, fn_output_summary = _init_output_file_knn(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) else: fn_output_detail, fn_output_summary = None, None # 2. compute/load Gram matrix a priori. print('2. computing/loading Gram matrix...') - gram_matrix_unnorm, time_precompute_gm = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) + gram_matrix_unnorm, time_precompute_gm = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all) # 3. perform k-nn CV. print('3. performing k-nn CV...') if train_examples == 'k-graphs' or train_examples == 'expert' or train_examples == 'random': - __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) + _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) elif train_examples == 'best-dataset': - __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) + _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) elif train_examples == 'trainset': - __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) + _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary) print('\ncomplete.\n') -def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): +def _kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kernel_options, mge_options, ged_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): Gn = dataset_all.graphs y_all = dataset_all.targets n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] # get shuffles. - train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) + train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) accuracies = [[], [], []] for trial in range(len(train_indices)): @@ -89,11 +89,11 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne mge_options['update_order'] = True mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['runtime_precompute_gm'] = 0 - set_median, gen_median_uo = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) + set_median, gen_median_uo = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) mge_options['update_order'] = False mpg_options['gram_matrix_unnorm'] = gm_unnorm_trial[i_start:i_end,i_start:i_end].copy() mpg_options['runtime_precompute_gm'] = 0 - _, gen_median = __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) + _, gen_median = _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options) medians[0].append(set_median) medians[1].append(gen_median) medians[2].append(gen_median_uo) @@ -104,10 +104,10 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne # compute dis_mat between medians. dataset = dataset_all.copy() dataset.load_graphs([g.copy() for g in G_app], targets=None) - gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) + gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) # compute the entire Gram matrix. - graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) + graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) kernels_to_medians = [] for g in G_app: kernels_to_median, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) @@ -161,13 +161,13 @@ def __kernel_knn_cv_median(dataset_all, ds_name, knn_options, mpg_options, kerne f_summary.close() -def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): +def _kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): Gn = dataset_all.graphs y_all = dataset_all.targets n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] # get shuffles. - train_indices, test_indices, train_nums, y_app = __get_shuffles(y_all, n_splits, test_size) + train_indices, test_indices, train_nums, y_app = _get_shuffles(y_all, n_splits, test_size) accuracies = [] for trial in range(len(train_indices)): @@ -204,10 +204,10 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g # compute dis_mat between medians. dataset = dataset_all.copy() dataset.load_graphs([g.copy() for g in best_graphs], targets=None) - gm_app_unnorm, _ = __compute_gram_matrix_unnorm(dataset, kernel_options.copy()) + gm_app_unnorm, _ = _compute_gram_matrix_unnorm(dataset, kernel_options.copy()) # compute the entire Gram matrix. - graph_kernel = __get_graph_kernel(dataset.copy(), kernel_options.copy()) + graph_kernel = _get_graph_kernel(dataset.copy(), kernel_options.copy()) kernels_to_best_graphs = [] for g in best_graphs: kernels_to_best_graph, _ = graph_kernel.compute(g, G_test, **kernel_options.copy()) @@ -259,7 +259,7 @@ def __kernel_knn_cv_best_ds(dataset_all, ds_name, knn_options, kernel_options, g f_summary.close() -def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): +def _kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, gram_matrix_unnorm, time_precompute_gm, train_examples, save_results, dir_save, fn_output_detail, fn_output_summary): y_all = dataset_all.targets n_neighbors, n_splits, test_size = knn_options['n_neighbors'], knn_options['n_splits'], knn_options['test_size'] @@ -268,7 +268,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, dis_mat, _, _, _ = compute_distance_matrix(gram_matrix) # get shuffles. - train_indices, test_indices, _, _ = __get_shuffles(y_all, n_splits, test_size) + train_indices, test_indices, _, _ = _get_shuffles(y_all, n_splits, test_size) accuracies = [] for trial in range(len(train_indices)): @@ -317,7 +317,7 @@ def __kernel_knn_cv_trainset(dataset_all, ds_name, knn_options, kernel_options, f_summary.close() -def __get_shuffles(y_all, n_splits, test_size): +def _get_shuffles(y_all, n_splits, test_size): rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) train_indices = [[] for _ in range(n_splits)] test_indices = [[] for _ in range(n_splits)] @@ -335,7 +335,7 @@ def __get_shuffles(y_all, n_splits, test_size): return train_indices, test_indices, train_nums, keys -def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): +def _generate_median_preimages(dataset, mpg_options, kernel_options, ged_options, mge_options): mpg = MedianPreimageGenerator() mpg.dataset = dataset.copy() mpg.set_options(**mpg_options.copy()) @@ -346,7 +346,7 @@ def __generate_median_preimages(dataset, mpg_options, kernel_options, ged_option return mpg.set_median, mpg.gen_median -def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): +def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): if load_gm == 'auto': gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) @@ -355,10 +355,10 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): gram_matrix_unnorm = gmfile['gram_matrix_unnorm'] time_precompute_gm = float(gmfile['run_time']) else: - gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) + gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) elif not load_gm: - gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset_all, kernel_options) + gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset_all, kernel_options) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm=gram_matrix_unnorm, run_time=time_precompute_gm) else: gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' @@ -369,7 +369,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, dataset_all): return gram_matrix_unnorm, time_precompute_gm -def __get_graph_kernel(dataset, kernel_options): +def _get_graph_kernel(dataset, kernel_options): from gklearn.utils.utils import get_graph_kernel_by_name graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, @@ -381,7 +381,7 @@ def __get_graph_kernel(dataset, kernel_options): return graph_kernel -def __compute_gram_matrix_unnorm(dataset, kernel_options): +def _compute_gram_matrix_unnorm(dataset, kernel_options): from gklearn.utils.utils import get_graph_kernel_by_name graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, @@ -397,7 +397,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): return gram_matrix_unnorm, run_time -def __init_output_file_knn(ds_name, gkernel, fit_method, dir_output): +def _init_output_file_knn(ds_name, gkernel, fit_method, dir_output): if not os.path.exists(dir_output): os.makedirs(dir_output) fn_output_detail = 'results_detail_knn.' + ds_name + '.' + gkernel + '.csv' diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py index 657ee4e..dae2b0a 100644 --- a/gklearn/preimage/median_preimage_generator.py +++ b/gklearn/preimage/median_preimage_generator.py @@ -26,63 +26,63 @@ class MedianPreimageGenerator(PreimageGenerator): def __init__(self, dataset=None): PreimageGenerator.__init__(self, dataset=dataset) # arguments to set. - self.__mge = None - self.__ged_options = {} - self.__mge_options = {} - self.__fit_method = 'k-graphs' - self.__init_ecc = None - self.__parallel = True - self.__n_jobs = multiprocessing.cpu_count() - self.__ds_name = None - self.__time_limit_in_sec = 0 - self.__max_itrs = 100 - self.__max_itrs_without_update = 3 - self.__epsilon_residual = 0.01 - self.__epsilon_ec = 0.1 - self.__allow_zeros = False - self.__triangle_rule = True + self._mge = None + self._ged_options = {} + self._mge_options = {} + self._fit_method = 'k-graphs' + self._init_ecc = None + self._parallel = True + self._n_jobs = multiprocessing.cpu_count() + self._ds_name = None + self._time_limit_in_sec = 0 + self._max_itrs = 100 + self._max_itrs_without_update = 3 + self._epsilon_residual = 0.01 + self._epsilon_ec = 0.1 + self._allow_zeros = False + self._triangle_rule = True # values to compute. - self.__runtime_optimize_ec = None - self.__runtime_generate_preimage = None - self.__runtime_total = None - self.__set_median = None - self.__gen_median = None - self.__best_from_dataset = None - self.__sod_set_median = None - self.__sod_gen_median = None - self.__k_dis_set_median = None - self.__k_dis_gen_median = None - self.__k_dis_dataset = None - self.__itrs = 0 - self.__converged = False - self.__num_updates_ecc = 0 + self._runtime_optimize_ec = None + self._runtime_generate_preimage = None + self._runtime_total = None + self._set_median = None + self._gen_median = None + self._best_from_dataset = None + self._sod_set_median = None + self._sod_gen_median = None + self._k_dis_set_median = None + self._k_dis_gen_median = None + self._k_dis_dataset = None + self._itrs = 0 + self._converged = False + self._num_updates_ecc = 0 # values that can be set or to be computed. - self.__edit_cost_constants = [] - self.__gram_matrix_unnorm = None - self.__runtime_precompute_gm = None + self._edit_cost_constants = [] + self._gram_matrix_unnorm = None + self._runtime_precompute_gm = None def set_options(self, **kwargs): self._kernel_options = kwargs.get('kernel_options', {}) self._graph_kernel = kwargs.get('graph_kernel', None) self._verbose = kwargs.get('verbose', 2) - self.__ged_options = kwargs.get('ged_options', {}) - self.__mge_options = kwargs.get('mge_options', {}) - self.__fit_method = kwargs.get('fit_method', 'k-graphs') - self.__init_ecc = kwargs.get('init_ecc', None) - self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) - self.__parallel = kwargs.get('parallel', True) - self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) - self.__ds_name = kwargs.get('ds_name', None) - self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) - self.__max_itrs = kwargs.get('max_itrs', 100) - self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) - self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) - self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) - self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) - self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) - self.__allow_zeros = kwargs.get('allow_zeros', False) - self.__triangle_rule = kwargs.get('triangle_rule', True) + self._ged_options = kwargs.get('ged_options', {}) + self._mge_options = kwargs.get('mge_options', {}) + self._fit_method = kwargs.get('fit_method', 'k-graphs') + self._init_ecc = kwargs.get('init_ecc', None) + self._edit_cost_constants = kwargs.get('edit_cost_constants', []) + self._parallel = kwargs.get('parallel', True) + self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) + self._ds_name = kwargs.get('ds_name', None) + self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) + self._max_itrs = kwargs.get('max_itrs', 100) + self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) + self._epsilon_residual = kwargs.get('epsilon_residual', 0.01) + self._epsilon_ec = kwargs.get('epsilon_ec', 0.1) + self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) + self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) + self._allow_zeros = kwargs.get('allow_zeros', False) + self._triangle_rule = kwargs.get('triangle_rule', True) def run(self): @@ -98,48 +98,48 @@ class MedianPreimageGenerator(PreimageGenerator): start = time.time() # 1. precompute gram matrix. - if self.__gram_matrix_unnorm is None: + if self._gram_matrix_unnorm is None: gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) - self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm + self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm end_precompute_gm = time.time() - self.__runtime_precompute_gm = end_precompute_gm - start + self._runtime_precompute_gm = end_precompute_gm - start else: - if self.__runtime_precompute_gm is None: + if self._runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') - self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm + self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm if self._kernel_options['normalize']: - self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) else: - self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) + self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) end_precompute_gm = time.time() - start -= self.__runtime_precompute_gm + start -= self._runtime_precompute_gm - if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': + if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset': start = time.time() - self.__runtime_precompute_gm = 0 + self._runtime_precompute_gm = 0 end_precompute_gm = start # 2. optimize edit cost constants. - self.__optimize_edit_cost_constants() + self._optimize_edit_cost_constants() end_optimize_ec = time.time() - self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm + self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm # 3. compute set median and gen median using optimized edit costs. if self._verbose >= 2: print('\nstart computing set median and gen median using optimized edit costs...\n') - self.__gmg_bcu() + self._gmg_bcu() end_generate_preimage = time.time() - self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec - self.__runtime_total = end_generate_preimage - start + self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec + self._runtime_total = end_generate_preimage - start if self._verbose >= 2: print('medians computed.') - print('SOD of the set median: ', self.__sod_set_median) - print('SOD of the generalized median: ', self.__sod_gen_median) + print('SOD of the set median: ', self._sod_set_median) + print('SOD of the generalized median: ', self._sod_gen_median) # 4. compute kernel distances to the true median. if self._verbose >= 2: print('\nstart computing distances to true median....\n') - self.__compute_distances_to_true_median() + self._compute_distances_to_true_median() # 5. print out results. if self._verbose: @@ -147,109 +147,110 @@ class MedianPreimageGenerator(PreimageGenerator): print('================================================================================') print('Finished generation of preimages.') print('--------------------------------------------------------------------------------') - print('The optimized edit cost constants:', self.__edit_cost_constants) - print('SOD of the set median:', self.__sod_set_median) - print('SOD of the generalized median:', self.__sod_gen_median) - print('Distance in kernel space for set median:', self.__k_dis_set_median) - print('Distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) - print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) - print('Time to optimize edit costs:', self.__runtime_optimize_ec) - print('Time to generate pre-images:', self.__runtime_generate_preimage) - print('Total time:', self.__runtime_total) - print('Total number of iterations for optimizing:', self.__itrs) - print('Total number of updating edit costs:', self.__num_updates_ecc) - print('Is optimization of edit costs converged:', self.__converged) + print('The optimized edit cost constants:', self._edit_cost_constants) + print('SOD of the set median:', self._sod_set_median) + print('SOD of the generalized median:', self._sod_gen_median) + print('Distance in kernel space for set median:', self._k_dis_set_median) + print('Distance in kernel space for generalized median:', self._k_dis_gen_median) + print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) + print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) + print('Time to optimize edit costs:', self._runtime_optimize_ec) + print('Time to generate pre-images:', self._runtime_generate_preimage) + print('Total time:', self._runtime_total) + print('Total number of iterations for optimizing:', self._itrs) + print('Total number of updating edit costs:', self._num_updates_ecc) + print('Is optimization of edit costs converged:', self._converged) print('================================================================================') print() def get_results(self): results = {} - results['edit_cost_constants'] = self.__edit_cost_constants - results['runtime_precompute_gm'] = self.__runtime_precompute_gm - results['runtime_optimize_ec'] = self.__runtime_optimize_ec - results['runtime_generate_preimage'] = self.__runtime_generate_preimage - results['runtime_total'] = self.__runtime_total - results['sod_set_median'] = self.__sod_set_median - results['sod_gen_median'] = self.__sod_gen_median - results['k_dis_set_median'] = self.__k_dis_set_median - results['k_dis_gen_median'] = self.__k_dis_gen_median - results['k_dis_dataset'] = self.__k_dis_dataset - results['itrs'] = self.__itrs - results['converged'] = self.__converged - results['num_updates_ecc'] = self.__num_updates_ecc + results['edit_cost_constants'] = self._edit_cost_constants + results['runtime_precompute_gm'] = self._runtime_precompute_gm + results['runtime_optimize_ec'] = self._runtime_optimize_ec + results['runtime_generate_preimage'] = self._runtime_generate_preimage + results['runtime_total'] = self._runtime_total + results['sod_set_median'] = self._sod_set_median + results['sod_gen_median'] = self._sod_gen_median + results['k_dis_set_median'] = self._k_dis_set_median + results['k_dis_gen_median'] = self._k_dis_gen_median + results['k_dis_dataset'] = self._k_dis_dataset + results['itrs'] = self._itrs + results['converged'] = self._converged + results['num_updates_ecc'] = self._num_updates_ecc results['mge'] = {} - results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() - results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() - results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() + results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased() + results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased() + results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents() + results['ged_matrix_set_median'] = self._mge.ged_matrix_set_median_tmp return results - def __optimize_edit_cost_constants(self): + def _optimize_edit_cost_constants(self): """fit edit cost constants. """ - if self.__fit_method == 'random': # random - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = random.sample(range(1, 1000), 3) - self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'LETTER2': + if self._fit_method == 'random': # random + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = random.sample(range(1, 1000), 3) + self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'LETTER2': random.seed(time.time()) - self.__edit_cost_constants = random.sample(range(1, 1000), 5) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 5) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._dataset.node_attrs == []: - self.__edit_cost_constants[2] = 0 + self._edit_cost_constants[2] = 0 if self._dataset.edge_attrs == []: - self.__edit_cost_constants[5] = 0 + self._edit_cost_constants[5] = 0 else: - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._verbose >= 2: - print('edit cost constants used:', self.__edit_cost_constants) - elif self.__fit_method == 'expert': # expert - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] + print('edit cost constants used:', self._edit_cost_constants) + elif self._fit_method == 'expert': # expert + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] + self._edit_cost_constants = [3, 3, 1, 3, 3, 1] else: - self.__edit_cost_constants = self.__init_ecc - elif self.__fit_method == 'k-graphs': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__init_ecc = [0, 0, 1, 1, 1, 0] + self._edit_cost_constants = self._init_ecc + elif self._fit_method == 'k-graphs': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._init_ecc = [0, 0, 1, 1, 1, 0] if self._dataset.node_attrs == []: - self.__init_ecc[2] = 0 + self._init_ecc[2] = 0 if self._dataset.edge_attrs == []: - self.__init_ecc[5] = 0 + self._init_ecc[5] = 0 else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimize on the k-graph subset. - self.__optimize_ecc_by_kernel_distances() - elif self.__fit_method == 'whole-dataset': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + self._optimize_ecc_by_kernel_distances() + elif self._fit_method == 'whole-dataset': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimizeon the whole set. - self.__optimize_ecc_by_kernel_distances() - elif self.__fit_method == 'precomputed': + self._optimize_ecc_by_kernel_distances() + elif self._fit_method == 'precomputed': pass - def __optimize_ecc_by_kernel_distances(self): + def _optimize_ecc_by_kernel_distances(self): # compute distances in feature space. dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_vec = [] @@ -263,120 +264,120 @@ class MedianPreimageGenerator(PreimageGenerator): if self._verbose >= 2: print('\ninitial:') time0 = time.time() - graphs = [self.__clean_graph(g) for g in self._dataset.graphs] - self.__edit_cost_constants = self.__init_ecc - options = self.__ged_options.copy() - options['edit_cost_constants'] = self.__edit_cost_constants # @todo + graphs = [self._clean_graph(g) for g in self._dataset.graphs] + self._edit_cost_constants = self._init_ecc + options = self._ged_options.copy() + options['edit_cost_constants'] = self._edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs - ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) + ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1)) residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] time_list = [time.time() - time0] - edit_cost_list = [self.__init_ecc] + edit_cost_list = [self._init_ecc] nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list = [nb_cost_mat] if self._verbose >= 2: - print('Current edit cost constants:', self.__edit_cost_constants) + print('Current edit cost constants:', self._edit_cost_constants) print('Residual list:', residual_list) # run iteration from initial edit costs. - self.__converged = False + self._converged = False itrs_without_update = 0 - self.__itrs = 0 - self.__num_updates_ecc = 0 - timer = Timer(self.__time_limit_in_sec) - while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): + self._itrs = 0 + self._num_updates_ecc = 0 + timer = Timer(self._time_limit_in_sec) + while not self._termination_criterion_met(self._converged, timer, self._itrs, itrs_without_update): if self._verbose >= 2: - print('\niteration', self.__itrs + 1) + print('\niteration', self._itrs + 1) time0 = time.time() # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. -# np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', +# np.savez('results/xp_fit_method/fit_data_debug' + str(self._itrs) + '.gm', # nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, # n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init, # ged_mat=ged_mat) - self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec) - for i in range(len(self.__edit_cost_constants)): - if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9: - self.__edit_cost_constants[i] = 0 - if self.__edit_cost_constants[i] < 0: + self._edit_cost_constants, _ = self._update_ecc(nb_cost_mat, dis_k_vec) + for i in range(len(self._edit_cost_constants)): + if -1e-9 <= self._edit_cost_constants[i] <= 1e-9: + self._edit_cost_constants[i] = 0 + if self._edit_cost_constants[i] < 0: raise ValueError('The edit cost is negative.') - # for i in range(len(self.__edit_cost_constants)): - # if self.__edit_cost_constants[i] < 0: - # self.__edit_cost_constants[i] = 0 + # for i in range(len(self._edit_cost_constants)): + # if self._edit_cost_constants[i] < 0: + # self._edit_cost_constants[i] = 0 # compute new GEDs and numbers of edit operations. - options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) - options['edit_cost_constants'] = self.__edit_cost_constants # @todo + options = self._ged_options.copy() # np.array([self._edit_cost_constants[0], self._edit_cost_constants[1], 0.75]) + options['edit_cost_constants'] = self._edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs - ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) + ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1)) residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) time_list.append(time.time() - time0) - edit_cost_list.append(self.__edit_cost_constants) + edit_cost_list.append(self._edit_cost_constants) nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list.append(nb_cost_mat) # check convergency. ec_changed = False - for i, cost in enumerate(self.__edit_cost_constants): + for i, cost in enumerate(self._edit_cost_constants): if cost == 0: - if edit_cost_list[-2][i] > self.__epsilon_ec: + if edit_cost_list[-2][i] > self._epsilon_ec: ec_changed = True break - elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec: + elif abs(cost - edit_cost_list[-2][i]) / cost > self._epsilon_ec: ec_changed = True break -# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: +# if abs(cost - edit_cost_list[-2][i]) > self._epsilon_ec: # ec_changed = True # break residual_changed = False if residual_list[-1] == 0: - if residual_list[-2] > self.__epsilon_residual: + if residual_list[-2] > self._epsilon_residual: residual_changed = True - elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual: + elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self._epsilon_residual: residual_changed = True - self.__converged = not (ec_changed or residual_changed) - if self.__converged: + self._converged = not (ec_changed or residual_changed) + if self._converged: itrs_without_update += 1 else: itrs_without_update = 0 - self.__num_updates_ecc += 1 + self._num_updates_ecc += 1 # print current states. if self._verbose >= 2: print() print('-------------------------------------------------------------------------') - print('States of iteration', self.__itrs + 1) + print('States of iteration', self._itrs + 1) print('-------------------------------------------------------------------------') -# print('Time spend:', self.__runtime_optimize_ec) - print('Total number of iterations for optimizing:', self.__itrs + 1) - print('Total number of updating edit costs:', self.__num_updates_ecc) - print('Was optimization of edit costs converged:', self.__converged) +# print('Time spend:', self._runtime_optimize_ec) + print('Total number of iterations for optimizing:', self._itrs + 1) + print('Total number of updating edit costs:', self._num_updates_ecc) + print('Was optimization of edit costs converged:', self._converged) print('Did edit costs change:', ec_changed) print('Did residual change:', residual_changed) print('Iterations without update:', itrs_without_update) - print('Current edit cost constants:', self.__edit_cost_constants) + print('Current edit cost constants:', self._edit_cost_constants) print('Residual list:', residual_list) print('-------------------------------------------------------------------------') - self.__itrs += 1 + self._itrs += 1 - def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): - if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): -# if self.__state == AlgorithmState.TERMINATED: -# self.__state = AlgorithmState.INITIALIZED + def _termination_criterion_met(self, converged, timer, itr, itrs_without_update): + if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): +# if self._state == AlgorithmState.TERMINATED: +# self._state = AlgorithmState.INITIALIZED return True - return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) + return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) - def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): - # if self.__ds_name == 'Letter-high': - if self.__ged_options['edit_cost'] == 'LETTER': + def _update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): + # if self._ds_name == 'Letter-high': + if self._ged_options['edit_cost'] == 'LETTER': raise Exception('Cannot compute for cost "LETTER".') pass # # method 1: set alpha automatically, just tune c_vir and c_eir by @@ -429,7 +430,7 @@ class MedianPreimageGenerator(PreimageGenerator): # res = optimize.root(func, [0.9, 1.7, 0.75, 100]) # edit_costs_new = res.x # residual = None - elif self.__ged_options['edit_cost'] == 'LETTER2': + elif self._ged_options['edit_cost'] == 'LETTER2': # # 1. if c_vi != c_vr, c_ei != c_er. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] # x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -456,7 +457,7 @@ class MedianPreimageGenerator(PreimageGenerator): # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] # edit_costs_new = np.array(edit_costs_new) # residual = np.sqrt(prob.value) - if not self.__triangle_rule and self.__allow_zeros: + if not self._triangle_rule and self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -466,10 +467,10 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -480,10 +481,10 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -505,7 +506,7 @@ class MedianPreimageGenerator(PreimageGenerator): # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] # edit_costs_new = np.array(edit_costs_new) # residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: # c_vs <= c_vi + c_vr. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -513,7 +514,7 @@ class MedianPreimageGenerator(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif rw_constraints == '2constraints': # @todo: rearrange it later. @@ -530,11 +531,11 @@ class MedianPreimageGenerator(PreimageGenerator): edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': is_n_attr = np.count_nonzero(nb_cost_mat[:,2]) is_e_attr = np.count_nonzero(nb_cost_mat[:,5]) - if self.__ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. + if self._ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. # nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] nb_cost_mat_new = nb_cost_mat[:,[2,3,4]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -551,7 +552,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0]))) residual = np.sqrt(prob.value) - elif not self.__triangle_rule and self.__allow_zeros: + elif not self._triangle_rule and self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -562,7 +563,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -575,7 +576,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -588,7 +589,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -597,11 +598,11 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -614,7 +615,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -628,7 +629,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -642,7 +643,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -651,18 +652,18 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -671,7 +672,7 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -680,7 +681,7 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -689,11 +690,11 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: # c_vs <= c_vi + c_vr. if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] @@ -703,7 +704,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -713,7 +714,7 @@ class MedianPreimageGenerator(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -723,7 +724,7 @@ class MedianPreimageGenerator(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -732,13 +733,13 @@ class MedianPreimageGenerator(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. - if not self.__triangle_rule and self.__allow_zeros: + elif self._ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. + if not self._triangle_rule and self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], @@ -747,10 +748,10 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], @@ -761,29 +762,29 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) else: - raise Exception('The edit cost "', self.__ged_options['edit_cost'], '" is not supported for update progress.') + raise Exception('The edit cost "', self._ged_options['edit_cost'], '" is not supported for update progress.') # # method 1: simple least square method. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, # rcond=None) @@ -814,7 +815,7 @@ class MedianPreimageGenerator(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) @@ -823,7 +824,7 @@ class MedianPreimageGenerator(PreimageGenerator): return edit_costs_new, residual - def __execute_cvx(self, prob): + def _execute_cvx(self, prob): try: prob.solve(verbose=(self._verbose>=2)) except MemoryError as error0: @@ -851,7 +852,7 @@ class MedianPreimageGenerator(PreimageGenerator): print() - def __gmg_bcu(self): + def _gmg_bcu(self): """ The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). @@ -863,70 +864,70 @@ class MedianPreimageGenerator(PreimageGenerator): # Set up the ged environment. ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible. # gedlibpy.restart_env() - ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constant=self.__edit_cost_constants) - graphs = [self.__clean_graph(g) for g in self._dataset.graphs] + ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constant=self._edit_cost_constants) + graphs = [self._clean_graph(g) for g in self._dataset.graphs] for g in graphs: ged_env.add_nx_graph(g, '') graph_ids = ged_env.get_all_graph_ids() set_median_id = ged_env.add_graph('set_median') gen_median_id = ged_env.add_graph('gen_median') - ged_env.init(init_option=self.__ged_options['init_option']) + ged_env.init(init_option=self._ged_options['init_option']) # Set up the madian graph estimator. - self.__mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) - self.__mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options)) - options = self.__mge_options.copy() + self._mge = MedianGraphEstimator(ged_env, constant_node_costs(self._ged_options['edit_cost'])) + self._mge.set_refine_method(self._ged_options['method'], ged_options_to_string(self._ged_options)) + options = self._mge_options.copy() if not 'seed' in options: options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. - options['parallel'] = self.__parallel + options['parallel'] = self._parallel # Select the GED algorithm. - self.__mge.set_options(mge_options_to_string(options)) - self.__mge.set_label_names(node_labels=self._dataset.node_labels, + self._mge.set_options(mge_options_to_string(options)) + self._mge.set_label_names(node_labels=self._dataset.node_labels, edge_labels=self._dataset.edge_labels, node_attrs=self._dataset.node_attrs, edge_attrs=self._dataset.edge_attrs) - ged_options = self.__ged_options.copy() - if self.__parallel: + ged_options = self._ged_options.copy() + if self._parallel: ged_options['threads'] = 1 - self.__mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options)) - self.__mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options)) + self._mge.set_init_method(ged_options['method'], ged_options_to_string(ged_options)) + self._mge.set_descent_method(ged_options['method'], ged_options_to_string(ged_options)) # Run the estimator. - self.__mge.run(graph_ids, set_median_id, gen_median_id) + self._mge.run(graph_ids, set_median_id, gen_median_id) # Get SODs. - self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') - self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') + self._sod_set_median = self._mge.get_sum_of_distances('initialized') + self._sod_gen_median = self._mge.get_sum_of_distances('converged') # Get median graphs. - self.__set_median = ged_env.get_nx_graph(set_median_id) - self.__gen_median = ged_env.get_nx_graph(gen_median_id) + self._set_median = ged_env.get_nx_graph(set_median_id) + self._gen_median = ged_env.get_nx_graph(gen_median_id) - def __compute_distances_to_true_median(self): + def _compute_distances_to_true_median(self): # compute distance in kernel space for set median. - kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) - kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) + kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options) + kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize + kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize kernel_sm = 1 # @todo: not correct kernel value gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) - self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_sm, withterm3=False) # compute distance in kernel space for generalized median. - kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) - kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) + kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options) + kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize + kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize kernel_gm = 1 gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) - self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False) @@ -937,19 +938,19 @@ class MedianPreimageGenerator(PreimageGenerator): [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False)) idx_k_dis_median_set_min = np.argmin(k_dis_median_set) - self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] - self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() + self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] + self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() if self._verbose >= 2: print() - print('distance in kernel space for set median:', self.__k_dis_set_median) - print('distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) + print('distance in kernel space for set median:', self._k_dis_set_median) + print('distance in kernel space for generalized median:', self._k_dis_gen_median) + print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) print('distance in kernel space for each graph in median set:', k_dis_median_set) -# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): - def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated. +# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): + def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated. """ Cleans node and edge labels and attributes of the given graph. """ @@ -971,63 +972,63 @@ class MedianPreimageGenerator(PreimageGenerator): @property def mge(self): - return self.__mge + return self._mge @property def ged_options(self): - return self.__ged_options + return self._ged_options @ged_options.setter def ged_options(self, value): - self.__ged_options = value + self._ged_options = value @property def mge_options(self): - return self.__mge_options + return self._mge_options @mge_options.setter def mge_options(self, value): - self.__mge_options = value + self._mge_options = value @property def fit_method(self): - return self.__fit_method + return self._fit_method @fit_method.setter def fit_method(self, value): - self.__fit_method = value + self._fit_method = value @property def init_ecc(self): - return self.__init_ecc + return self._init_ecc @init_ecc.setter def init_ecc(self, value): - self.__init_ecc = value + self._init_ecc = value @property def set_median(self): - return self.__set_median + return self._set_median @property def gen_median(self): - return self.__gen_median + return self._gen_median @property def best_from_dataset(self): - return self.__best_from_dataset + return self._best_from_dataset @property def gram_matrix_unnorm(self): - return self.__gram_matrix_unnorm + return self._gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): - self.__gram_matrix_unnorm = value \ No newline at end of file + self._gram_matrix_unnorm = value \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator_cml.py b/gklearn/preimage/median_preimage_generator_cml.py index e6bca92..e56b894 100644 --- a/gklearn/preimage/median_preimage_generator_cml.py +++ b/gklearn/preimage/median_preimage_generator_cml.py @@ -27,69 +27,69 @@ class MedianPreimageGeneratorCML(PreimageGenerator): def __init__(self, dataset=None): PreimageGenerator.__init__(self, dataset=dataset) ### arguments to set. - self.__mge = None - self.__ged_options = {} - self.__mge_options = {} -# self.__fit_method = 'k-graphs' - self.__init_method = 'random' - self.__init_ecc = None - self.__parallel = True - self.__n_jobs = multiprocessing.cpu_count() - self.__ds_name = None + self._mge = None + self._ged_options = {} + self._mge_options = {} +# self._fit_method = 'k-graphs' + self._init_method = 'random' + self._init_ecc = None + self._parallel = True + self._n_jobs = multiprocessing.cpu_count() + self._ds_name = None # for cml. - self.__time_limit_in_sec = 0 - self.__max_itrs = 100 - self.__max_itrs_without_update = 3 - self.__epsilon_residual = 0.01 - self.__epsilon_ec = 0.1 - self.__allow_zeros = True -# self.__triangle_rule = True + self._time_limit_in_sec = 0 + self._max_itrs = 100 + self._max_itrs_without_update = 3 + self._epsilon_residual = 0.01 + self._epsilon_ec = 0.1 + self._allow_zeros = True +# self._triangle_rule = True ### values to compute. - self.__runtime_optimize_ec = None - self.__runtime_generate_preimage = None - self.__runtime_total = None - self.__set_median = None - self.__gen_median = None - self.__best_from_dataset = None - self.__sod_set_median = None - self.__sod_gen_median = None - self.__k_dis_set_median = None - self.__k_dis_gen_median = None - self.__k_dis_dataset = None - self.__node_label_costs = None - self.__edge_label_costs = None + self._runtime_optimize_ec = None + self._runtime_generate_preimage = None + self._runtime_total = None + self._set_median = None + self._gen_median = None + self._best_from_dataset = None + self._sod_set_median = None + self._sod_gen_median = None + self._k_dis_set_median = None + self._k_dis_gen_median = None + self._k_dis_dataset = None + self._node_label_costs = None + self._edge_label_costs = None # for cml. - self.__itrs = 0 - self.__converged = False - self.__num_updates_ecs = 0 + self._itrs = 0 + self._converged = False + self._num_updates_ecs = 0 ### values that can be set or to be computed. - self.__edit_cost_constants = [] - self.__gram_matrix_unnorm = None - self.__runtime_precompute_gm = None + self._edit_cost_constants = [] + self._gram_matrix_unnorm = None + self._runtime_precompute_gm = None def set_options(self, **kwargs): self._kernel_options = kwargs.get('kernel_options', {}) self._graph_kernel = kwargs.get('graph_kernel', None) self._verbose = kwargs.get('verbose', 2) - self.__ged_options = kwargs.get('ged_options', {}) - self.__mge_options = kwargs.get('mge_options', {}) -# self.__fit_method = kwargs.get('fit_method', 'k-graphs') - self.__init_method = kwargs.get('init_method', 'random') - self.__init_ecc = kwargs.get('init_ecc', None) - self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) - self.__parallel = kwargs.get('parallel', True) - self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) - self.__ds_name = kwargs.get('ds_name', None) - self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) - self.__max_itrs = kwargs.get('max_itrs', 100) - self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) - self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) - self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) - self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) - self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) - self.__allow_zeros = kwargs.get('allow_zeros', True) -# self.__triangle_rule = kwargs.get('triangle_rule', True) + self._ged_options = kwargs.get('ged_options', {}) + self._mge_options = kwargs.get('mge_options', {}) +# self._fit_method = kwargs.get('fit_method', 'k-graphs') + self._init_method = kwargs.get('init_method', 'random') + self._init_ecc = kwargs.get('init_ecc', None) + self._edit_cost_constants = kwargs.get('edit_cost_constants', []) + self._parallel = kwargs.get('parallel', True) + self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) + self._ds_name = kwargs.get('ds_name', None) + self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) + self._max_itrs = kwargs.get('max_itrs', 100) + self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) + self._epsilon_residual = kwargs.get('epsilon_residual', 0.01) + self._epsilon_ec = kwargs.get('epsilon_ec', 0.1) + self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) + self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) + self._allow_zeros = kwargs.get('allow_zeros', True) +# self._triangle_rule = kwargs.get('triangle_rule', True) def run(self): @@ -105,48 +105,48 @@ class MedianPreimageGeneratorCML(PreimageGenerator): start = time.time() # 1. precompute gram matrix. - if self.__gram_matrix_unnorm is None: + if self._gram_matrix_unnorm is None: gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) - self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm + self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm end_precompute_gm = time.time() - self.__runtime_precompute_gm = end_precompute_gm - start + self._runtime_precompute_gm = end_precompute_gm - start else: - if self.__runtime_precompute_gm is None: + if self._runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') - self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm + self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm if self._kernel_options['normalize']: - self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) else: - self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) + self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) end_precompute_gm = time.time() - start -= self.__runtime_precompute_gm + start -= self._runtime_precompute_gm -# if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': +# if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset': # start = time.time() -# self.__runtime_precompute_gm = 0 +# self._runtime_precompute_gm = 0 # end_precompute_gm = start # 2. optimize edit cost constants. - self.__optimize_edit_cost_vector() + self._optimize_edit_cost_vector() end_optimize_ec = time.time() - self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm + self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm # 3. compute set median and gen median using optimized edit costs. if self._verbose >= 2: print('\nstart computing set median and gen median using optimized edit costs...\n') - self.__gmg_bcu() + self._gmg_bcu() end_generate_preimage = time.time() - self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec - self.__runtime_total = end_generate_preimage - start + self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec + self._runtime_total = end_generate_preimage - start if self._verbose >= 2: print('medians computed.') - print('SOD of the set median: ', self.__sod_set_median) - print('SOD of the generalized median: ', self.__sod_gen_median) + print('SOD of the set median: ', self._sod_set_median) + print('SOD of the generalized median: ', self._sod_gen_median) # 4. compute kernel distances to the true median. if self._verbose >= 2: print('\nstart computing distances to true median....\n') - self.__compute_distances_to_true_median() + self._compute_distances_to_true_median() # 5. print out results. if self._verbose: @@ -154,145 +154,145 @@ class MedianPreimageGeneratorCML(PreimageGenerator): print('================================================================================') print('Finished generation of preimages.') print('--------------------------------------------------------------------------------') - print('The optimized edit costs:', self.__edit_cost_constants) - print('SOD of the set median:', self.__sod_set_median) - print('SOD of the generalized median:', self.__sod_gen_median) - print('Distance in kernel space for set median:', self.__k_dis_set_median) - print('Distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) - print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) - print('Time to optimize edit costs:', self.__runtime_optimize_ec) - print('Time to generate pre-images:', self.__runtime_generate_preimage) - print('Total time:', self.__runtime_total) - print('Total number of iterations for optimizing:', self.__itrs) - print('Total number of updating edit costs:', self.__num_updates_ecs) - print('Is optimization of edit costs converged:', self.__converged) + print('The optimized edit costs:', self._edit_cost_constants) + print('SOD of the set median:', self._sod_set_median) + print('SOD of the generalized median:', self._sod_gen_median) + print('Distance in kernel space for set median:', self._k_dis_set_median) + print('Distance in kernel space for generalized median:', self._k_dis_gen_median) + print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) + print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) + print('Time to optimize edit costs:', self._runtime_optimize_ec) + print('Time to generate pre-images:', self._runtime_generate_preimage) + print('Total time:', self._runtime_total) + print('Total number of iterations for optimizing:', self._itrs) + print('Total number of updating edit costs:', self._num_updates_ecs) + print('Is optimization of edit costs converged:', self._converged) print('================================================================================') print() def get_results(self): results = {} - results['edit_cost_constants'] = self.__edit_cost_constants - results['runtime_precompute_gm'] = self.__runtime_precompute_gm - results['runtime_optimize_ec'] = self.__runtime_optimize_ec - results['runtime_generate_preimage'] = self.__runtime_generate_preimage - results['runtime_total'] = self.__runtime_total - results['sod_set_median'] = self.__sod_set_median - results['sod_gen_median'] = self.__sod_gen_median - results['k_dis_set_median'] = self.__k_dis_set_median - results['k_dis_gen_median'] = self.__k_dis_gen_median - results['k_dis_dataset'] = self.__k_dis_dataset - results['itrs'] = self.__itrs - results['converged'] = self.__converged - results['num_updates_ecc'] = self.__num_updates_ecs + results['edit_cost_constants'] = self._edit_cost_constants + results['runtime_precompute_gm'] = self._runtime_precompute_gm + results['runtime_optimize_ec'] = self._runtime_optimize_ec + results['runtime_generate_preimage'] = self._runtime_generate_preimage + results['runtime_total'] = self._runtime_total + results['sod_set_median'] = self._sod_set_median + results['sod_gen_median'] = self._sod_gen_median + results['k_dis_set_median'] = self._k_dis_set_median + results['k_dis_gen_median'] = self._k_dis_gen_median + results['k_dis_dataset'] = self._k_dis_dataset + results['itrs'] = self._itrs + results['converged'] = self._converged + results['num_updates_ecc'] = self._num_updates_ecs results['mge'] = {} - results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() - results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() - results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() + results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased() + results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased() + results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents() return results - def __optimize_edit_cost_vector(self): + def _optimize_edit_cost_vector(self): """Learn edit cost vector. """ # Initialize label costs randomly. - if self.__init_method == 'random': + if self._init_method == 'random': # Initialize label costs. - self.__initialize_label_costs() + self._initialize_label_costs() # Optimize edit cost matrices. - self.__optimize_ecm_by_kernel_distances() + self._optimize_ecm_by_kernel_distances() # Initialize all label costs with the same value. - elif self.__init_method == 'uniform': # random + elif self._init_method == 'uniform': # random pass - elif self.__fit_method == 'random': # random - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = random.sample(range(1, 1000), 3) - self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'LETTER2': + elif self._fit_method == 'random': # random + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = random.sample(range(1, 1000), 3) + self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'LETTER2': random.seed(time.time()) - self.__edit_cost_constants = random.sample(range(1, 1000), 5) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 5) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._dataset.node_attrs == []: - self.__edit_cost_constants[2] = 0 + self._edit_cost_constants[2] = 0 if self._dataset.edge_attrs == []: - self.__edit_cost_constants[5] = 0 + self._edit_cost_constants[5] = 0 else: - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._verbose >= 2: - print('edit cost constants used:', self.__edit_cost_constants) - elif self.__fit_method == 'expert': # expert - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] + print('edit cost constants used:', self._edit_cost_constants) + elif self._fit_method == 'expert': # expert + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] + self._edit_cost_constants = [3, 3, 1, 3, 3, 1] else: - self.__edit_cost_constants = self.__init_ecc - elif self.__fit_method == 'k-graphs': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__init_ecc = [0, 0, 1, 1, 1, 0] + self._edit_cost_constants = self._init_ecc + elif self._fit_method == 'k-graphs': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._init_ecc = [0, 0, 1, 1, 1, 0] if self._dataset.node_attrs == []: - self.__init_ecc[2] = 0 + self._init_ecc[2] = 0 if self._dataset.edge_attrs == []: - self.__init_ecc[5] = 0 + self._init_ecc[5] = 0 else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimize on the k-graph subset. - self.__optimize_ecm_by_kernel_distances() - elif self.__fit_method == 'whole-dataset': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + self._optimize_ecm_by_kernel_distances() + elif self._fit_method == 'whole-dataset': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimizeon the whole set. - self.__optimize_ecc_by_kernel_distances() - elif self.__fit_method == 'precomputed': + self._optimize_ecc_by_kernel_distances() + elif self._fit_method == 'precomputed': pass - def __initialize_label_costs(self): - self.__initialize_node_label_costs() - self.__initialize_edge_label_costs() + def _initialize_label_costs(self): + self._initialize_node_label_costs() + self._initialize_edge_label_costs() - def __initialize_node_label_costs(self): + def _initialize_node_label_costs(self): # Get list of node labels. nls = self._dataset.get_all_node_labels() # Generate random costs. nb_nl = int((len(nls) * (len(nls) - 1)) / 2 + 2 * len(nls)) rand_costs = random.sample(range(1, 10 * nb_nl + 1), nb_nl) rand_costs /= np.max(rand_costs) # @todo: maybe not needed. - self.__node_label_costs = rand_costs + self._node_label_costs = rand_costs - def __initialize_edge_label_costs(self): + def _initialize_edge_label_costs(self): # Get list of edge labels. els = self._dataset.get_all_edge_labels() # Generate random costs. nb_el = int((len(els) * (len(els) - 1)) / 2 + 2 * len(els)) rand_costs = random.sample(range(1, 10 * nb_el + 1), nb_el) rand_costs /= np.max(rand_costs) # @todo: maybe not needed. - self.__edge_label_costs = rand_costs + self._edge_label_costs = rand_costs - def __optimize_ecm_by_kernel_distances(self): + def _optimize_ecm_by_kernel_distances(self): # compute distances in feature space. dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_vec = [] @@ -303,35 +303,35 @@ class MedianPreimageGeneratorCML(PreimageGenerator): dis_k_vec = np.array(dis_k_vec) # Set GEDEnv options. -# graphs = [self.__clean_graph(g) for g in self._dataset.graphs] -# self.__edit_cost_constants = self.__init_ecc - options = self.__ged_options.copy() - options['edit_cost_constants'] = self.__edit_cost_constants # @todo: not needed. +# graphs = [self._clean_graph(g) for g in self._dataset.graphs] +# self._edit_cost_constants = self._init_ecc + options = self._ged_options.copy() + options['edit_cost_constants'] = self._edit_cost_constants # @todo: not needed. options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels # options['node_attrs'] = self._dataset.node_attrs # options['edge_attrs'] = self._dataset.edge_attrs - options['node_label_costs'] = self.__node_label_costs - options['edge_label_costs'] = self.__edge_label_costs + options['node_label_costs'] = self._node_label_costs + options['edge_label_costs'] = self._edge_label_costs # Learner cost matrices. # Initialize cost learner. - cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self.__parallel, verbose=self._verbose) # @todo - cml.set_update_params(time_limit_in_sec=self.__time_limit_in_sec, max_itrs=self.__max_itrs, max_itrs_without_update=self.__max_itrs_without_update, epsilon_residual=self.__epsilon_residual, epsilon_ec=self.__epsilon_ec) + cml = CostMatricesLearner(edit_cost='CONSTANT', triangle_rule=False, allow_zeros=True, parallel=self._parallel, verbose=self._verbose) # @todo + cml.set_update_params(time_limit_in_sec=self._time_limit_in_sec, max_itrs=self._max_itrs, max_itrs_without_update=self._max_itrs_without_update, epsilon_residual=self._epsilon_residual, epsilon_ec=self._epsilon_ec) # Run cost learner. cml.update(dis_k_vec, self._dataset.graphs, options) # Get results. results = cml.get_results() - self.__converged = results['converged'] - self.__itrs = results['itrs'] - self.__num_updates_ecs = results['num_updates_ecs'] + self._converged = results['converged'] + self._itrs = results['itrs'] + self._num_updates_ecs = results['num_updates_ecs'] cost_list = results['cost_list'] - self.__node_label_costs = cost_list[-1][0:len(self.__node_label_costs)] - self.__edge_label_costs = cost_list[-1][len(self.__node_label_costs):] + self._node_label_costs = cost_list[-1][0:len(self._node_label_costs)] + self._edge_label_costs = cost_list[-1][len(self._node_label_costs):] - def __gmg_bcu(self): + def _gmg_bcu(self): """ The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). @@ -343,77 +343,77 @@ class MedianPreimageGeneratorCML(PreimageGenerator): # Set up the ged environment. ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. # gedlibpy.restart_env() - ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants) - graphs = [self.__clean_graph(g) for g in self._dataset.graphs] + ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants) + graphs = [self._clean_graph(g) for g in self._dataset.graphs] for g in graphs: ged_env.add_nx_graph(g, '') graph_ids = ged_env.get_all_graph_ids() node_labels = ged_env.get_all_node_labels() edge_labels = ged_env.get_all_edge_labels() - node_label_costs = label_costs_to_matrix(self.__node_label_costs, len(node_labels)) - edge_label_costs = label_costs_to_matrix(self.__edge_label_costs, len(edge_labels)) + node_label_costs = label_costs_to_matrix(self._node_label_costs, len(node_labels)) + edge_label_costs = label_costs_to_matrix(self._edge_label_costs, len(edge_labels)) ged_env.set_label_costs(node_label_costs, edge_label_costs) set_median_id = ged_env.add_graph('set_median') gen_median_id = ged_env.add_graph('gen_median') - ged_env.init(init_type=self.__ged_options['init_option']) + ged_env.init(init_type=self._ged_options['init_option']) # Set up the madian graph estimator. - self.__mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) - self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) - options = self.__mge_options.copy() + self._mge = MedianGraphEstimatorCML(ged_env, constant_node_costs(self._ged_options['edit_cost'])) + self._mge.set_refine_method(self._ged_options['method'], self._ged_options) + options = self._mge_options.copy() if not 'seed' in options: options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. - options['parallel'] = self.__parallel + options['parallel'] = self._parallel # Select the GED algorithm. - self.__mge.set_options(mge_options_to_string(options)) - self.__mge.set_label_names(node_labels=self._dataset.node_labels, + self._mge.set_options(mge_options_to_string(options)) + self._mge.set_label_names(node_labels=self._dataset.node_labels, edge_labels=self._dataset.edge_labels, node_attrs=self._dataset.node_attrs, edge_attrs=self._dataset.edge_attrs) - ged_options = self.__ged_options.copy() - if self.__parallel: + ged_options = self._ged_options.copy() + if self._parallel: ged_options['threads'] = 1 - self.__mge.set_init_method(ged_options['method'], ged_options) - self.__mge.set_descent_method(ged_options['method'], ged_options) + self._mge.set_init_method(ged_options['method'], ged_options) + self._mge.set_descent_method(ged_options['method'], ged_options) # Run the estimator. - self.__mge.run(graph_ids, set_median_id, gen_median_id) + self._mge.run(graph_ids, set_median_id, gen_median_id) # Get SODs. - self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') - self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') + self._sod_set_median = self._mge.get_sum_of_distances('initialized') + self._sod_gen_median = self._mge.get_sum_of_distances('converged') # Get median graphs. - self.__set_median = ged_env.get_nx_graph(set_median_id) - self.__gen_median = ged_env.get_nx_graph(gen_median_id) + self._set_median = ged_env.get_nx_graph(set_median_id) + self._gen_median = ged_env.get_nx_graph(gen_median_id) - def __compute_distances_to_true_median(self): + def _compute_distances_to_true_median(self): # compute distance in kernel space for set median. - kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) - kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) + kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options) + kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize + kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize kernel_sm = 1 # @todo: not correct kernel value gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) - self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_sm, withterm3=False) # compute distance in kernel space for generalized median. - kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) - kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) + kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options) + kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize + kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize kernel_gm = 1 gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) - self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False) @@ -424,19 +424,19 @@ class MedianPreimageGeneratorCML(PreimageGenerator): [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False)) idx_k_dis_median_set_min = np.argmin(k_dis_median_set) - self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] - self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() + self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] + self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() if self._verbose >= 2: print() - print('distance in kernel space for set median:', self.__k_dis_set_median) - print('distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) + print('distance in kernel space for set median:', self._k_dis_set_median) + print('distance in kernel space for generalized median:', self._k_dis_gen_median) + print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) print('distance in kernel space for each graph in median set:', k_dis_median_set) -# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): - def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated. +# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): + def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated. """ Cleans node and edge labels and attributes of the given graph. """ @@ -458,63 +458,63 @@ class MedianPreimageGeneratorCML(PreimageGenerator): @property def mge(self): - return self.__mge + return self._mge @property def ged_options(self): - return self.__ged_options + return self._ged_options @ged_options.setter def ged_options(self, value): - self.__ged_options = value + self._ged_options = value @property def mge_options(self): - return self.__mge_options + return self._mge_options @mge_options.setter def mge_options(self, value): - self.__mge_options = value + self._mge_options = value @property def fit_method(self): - return self.__fit_method + return self._fit_method @fit_method.setter def fit_method(self, value): - self.__fit_method = value + self._fit_method = value @property def init_ecc(self): - return self.__init_ecc + return self._init_ecc @init_ecc.setter def init_ecc(self, value): - self.__init_ecc = value + self._init_ecc = value @property def set_median(self): - return self.__set_median + return self._set_median @property def gen_median(self): - return self.__gen_median + return self._gen_median @property def best_from_dataset(self): - return self.__best_from_dataset + return self._best_from_dataset @property def gram_matrix_unnorm(self): - return self.__gram_matrix_unnorm + return self._gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): - self.__gram_matrix_unnorm = value \ No newline at end of file + self._gram_matrix_unnorm = value \ No newline at end of file diff --git a/gklearn/preimage/median_preimage_generator_py.py b/gklearn/preimage/median_preimage_generator_py.py index cdc7a3c..cd26f7e 100644 --- a/gklearn/preimage/median_preimage_generator_py.py +++ b/gklearn/preimage/median_preimage_generator_py.py @@ -28,63 +28,63 @@ class MedianPreimageGeneratorPy(PreimageGenerator): def __init__(self, dataset=None): PreimageGenerator.__init__(self, dataset=dataset) # arguments to set. - self.__mge = None - self.__ged_options = {} - self.__mge_options = {} - self.__fit_method = 'k-graphs' - self.__init_ecc = None - self.__parallel = True - self.__n_jobs = multiprocessing.cpu_count() - self.__ds_name = None - self.__time_limit_in_sec = 0 - self.__max_itrs = 100 - self.__max_itrs_without_update = 3 - self.__epsilon_residual = 0.01 - self.__epsilon_ec = 0.1 - self.__allow_zeros = False - self.__triangle_rule = True + self._mge = None + self._ged_options = {} + self._mge_options = {} + self._fit_method = 'k-graphs' + self._init_ecc = None + self._parallel = True + self._n_jobs = multiprocessing.cpu_count() + self._ds_name = None + self._time_limit_in_sec = 0 + self._max_itrs = 100 + self._max_itrs_without_update = 3 + self._epsilon_residual = 0.01 + self._epsilon_ec = 0.1 + self._allow_zeros = False + self._triangle_rule = True # values to compute. - self.__runtime_optimize_ec = None - self.__runtime_generate_preimage = None - self.__runtime_total = None - self.__set_median = None - self.__gen_median = None - self.__best_from_dataset = None - self.__sod_set_median = None - self.__sod_gen_median = None - self.__k_dis_set_median = None - self.__k_dis_gen_median = None - self.__k_dis_dataset = None - self.__itrs = 0 - self.__converged = False - self.__num_updates_ecc = 0 + self._runtime_optimize_ec = None + self._runtime_generate_preimage = None + self._runtime_total = None + self._set_median = None + self._gen_median = None + self._best_from_dataset = None + self._sod_set_median = None + self._sod_gen_median = None + self._k_dis_set_median = None + self._k_dis_gen_median = None + self._k_dis_dataset = None + self._itrs = 0 + self._converged = False + self._num_updates_ecc = 0 # values that can be set or to be computed. - self.__edit_cost_constants = [] - self.__gram_matrix_unnorm = None - self.__runtime_precompute_gm = None + self._edit_cost_constants = [] + self._gram_matrix_unnorm = None + self._runtime_precompute_gm = None def set_options(self, **kwargs): self._kernel_options = kwargs.get('kernel_options', {}) self._graph_kernel = kwargs.get('graph_kernel', None) self._verbose = kwargs.get('verbose', 2) - self.__ged_options = kwargs.get('ged_options', {}) - self.__mge_options = kwargs.get('mge_options', {}) - self.__fit_method = kwargs.get('fit_method', 'k-graphs') - self.__init_ecc = kwargs.get('init_ecc', None) - self.__edit_cost_constants = kwargs.get('edit_cost_constants', []) - self.__parallel = kwargs.get('parallel', True) - self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) - self.__ds_name = kwargs.get('ds_name', None) - self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) - self.__max_itrs = kwargs.get('max_itrs', 100) - self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) - self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01) - self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1) - self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) - self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) - self.__allow_zeros = kwargs.get('allow_zeros', False) - self.__triangle_rule = kwargs.get('triangle_rule', True) + self._ged_options = kwargs.get('ged_options', {}) + self._mge_options = kwargs.get('mge_options', {}) + self._fit_method = kwargs.get('fit_method', 'k-graphs') + self._init_ecc = kwargs.get('init_ecc', None) + self._edit_cost_constants = kwargs.get('edit_cost_constants', []) + self._parallel = kwargs.get('parallel', True) + self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) + self._ds_name = kwargs.get('ds_name', None) + self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) + self._max_itrs = kwargs.get('max_itrs', 100) + self._max_itrs_without_update = kwargs.get('max_itrs_without_update', 3) + self._epsilon_residual = kwargs.get('epsilon_residual', 0.01) + self._epsilon_ec = kwargs.get('epsilon_ec', 0.1) + self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) + self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) + self._allow_zeros = kwargs.get('allow_zeros', False) + self._triangle_rule = kwargs.get('triangle_rule', True) def run(self): @@ -100,48 +100,48 @@ class MedianPreimageGeneratorPy(PreimageGenerator): start = time.time() # 1. precompute gram matrix. - if self.__gram_matrix_unnorm is None: + if self._gram_matrix_unnorm is None: gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) - self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm + self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm end_precompute_gm = time.time() - self.__runtime_precompute_gm = end_precompute_gm - start + self._runtime_precompute_gm = end_precompute_gm - start else: - if self.__runtime_precompute_gm is None: + if self._runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') - self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm + self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm if self._kernel_options['normalize']: - self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) else: - self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) + self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) end_precompute_gm = time.time() - start -= self.__runtime_precompute_gm + start -= self._runtime_precompute_gm - if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset': + if self._fit_method != 'k-graphs' and self._fit_method != 'whole-dataset': start = time.time() - self.__runtime_precompute_gm = 0 + self._runtime_precompute_gm = 0 end_precompute_gm = start # 2. optimize edit cost constants. - self.__optimize_edit_cost_constants() + self._optimize_edit_cost_constants() end_optimize_ec = time.time() - self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm + self._runtime_optimize_ec = end_optimize_ec - end_precompute_gm # 3. compute set median and gen median using optimized edit costs. if self._verbose >= 2: print('\nstart computing set median and gen median using optimized edit costs...\n') - self.__gmg_bcu() + self._gmg_bcu() end_generate_preimage = time.time() - self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec - self.__runtime_total = end_generate_preimage - start + self._runtime_generate_preimage = end_generate_preimage - end_optimize_ec + self._runtime_total = end_generate_preimage - start if self._verbose >= 2: print('medians computed.') - print('SOD of the set median: ', self.__sod_set_median) - print('SOD of the generalized median: ', self.__sod_gen_median) + print('SOD of the set median: ', self._sod_set_median) + print('SOD of the generalized median: ', self._sod_gen_median) # 4. compute kernel distances to the true median. if self._verbose >= 2: print('\nstart computing distances to true median....\n') - self.__compute_distances_to_true_median() + self._compute_distances_to_true_median() # 5. print out results. if self._verbose: @@ -149,109 +149,109 @@ class MedianPreimageGeneratorPy(PreimageGenerator): print('================================================================================') print('Finished generation of preimages.') print('--------------------------------------------------------------------------------') - print('The optimized edit cost constants:', self.__edit_cost_constants) - print('SOD of the set median:', self.__sod_set_median) - print('SOD of the generalized median:', self.__sod_gen_median) - print('Distance in kernel space for set median:', self.__k_dis_set_median) - print('Distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) - print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) - print('Time to optimize edit costs:', self.__runtime_optimize_ec) - print('Time to generate pre-images:', self.__runtime_generate_preimage) - print('Total time:', self.__runtime_total) - print('Total number of iterations for optimizing:', self.__itrs) - print('Total number of updating edit costs:', self.__num_updates_ecc) - print('Is optimization of edit costs converged:', self.__converged) + print('The optimized edit cost constants:', self._edit_cost_constants) + print('SOD of the set median:', self._sod_set_median) + print('SOD of the generalized median:', self._sod_gen_median) + print('Distance in kernel space for set median:', self._k_dis_set_median) + print('Distance in kernel space for generalized median:', self._k_dis_gen_median) + print('Minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) + print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) + print('Time to optimize edit costs:', self._runtime_optimize_ec) + print('Time to generate pre-images:', self._runtime_generate_preimage) + print('Total time:', self._runtime_total) + print('Total number of iterations for optimizing:', self._itrs) + print('Total number of updating edit costs:', self._num_updates_ecc) + print('Is optimization of edit costs converged:', self._converged) print('================================================================================') print() def get_results(self): results = {} - results['edit_cost_constants'] = self.__edit_cost_constants - results['runtime_precompute_gm'] = self.__runtime_precompute_gm - results['runtime_optimize_ec'] = self.__runtime_optimize_ec - results['runtime_generate_preimage'] = self.__runtime_generate_preimage - results['runtime_total'] = self.__runtime_total - results['sod_set_median'] = self.__sod_set_median - results['sod_gen_median'] = self.__sod_gen_median - results['k_dis_set_median'] = self.__k_dis_set_median - results['k_dis_gen_median'] = self.__k_dis_gen_median - results['k_dis_dataset'] = self.__k_dis_dataset - results['itrs'] = self.__itrs - results['converged'] = self.__converged - results['num_updates_ecc'] = self.__num_updates_ecc + results['edit_cost_constants'] = self._edit_cost_constants + results['runtime_precompute_gm'] = self._runtime_precompute_gm + results['runtime_optimize_ec'] = self._runtime_optimize_ec + results['runtime_generate_preimage'] = self._runtime_generate_preimage + results['runtime_total'] = self._runtime_total + results['sod_set_median'] = self._sod_set_median + results['sod_gen_median'] = self._sod_gen_median + results['k_dis_set_median'] = self._k_dis_set_median + results['k_dis_gen_median'] = self._k_dis_gen_median + results['k_dis_dataset'] = self._k_dis_dataset + results['itrs'] = self._itrs + results['converged'] = self._converged + results['num_updates_ecc'] = self._num_updates_ecc results['mge'] = {} - results['mge']['num_decrease_order'] = self.__mge.get_num_times_order_decreased() - results['mge']['num_increase_order'] = self.__mge.get_num_times_order_increased() - results['mge']['num_converged_descents'] = self.__mge.get_num_converged_descents() + results['mge']['num_decrease_order'] = self._mge.get_num_times_order_decreased() + results['mge']['num_increase_order'] = self._mge.get_num_times_order_increased() + results['mge']['num_converged_descents'] = self._mge.get_num_converged_descents() return results - def __optimize_edit_cost_constants(self): + def _optimize_edit_cost_constants(self): """fit edit cost constants. """ - if self.__fit_method == 'random': # random - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = random.sample(range(1, 1000), 3) - self.__edit_cost_constants = [item * 0.001 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'LETTER2': + if self._fit_method == 'random': # random + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = random.sample(range(1, 1000), 3) + self._edit_cost_constants = [item * 0.001 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'LETTER2': random.seed(time.time()) - self.__edit_cost_constants = random.sample(range(1, 1000), 5) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 5) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._dataset.node_attrs == []: - self.__edit_cost_constants[2] = 0 + self._edit_cost_constants[2] = 0 if self._dataset.edge_attrs == []: - self.__edit_cost_constants[5] = 0 + self._edit_cost_constants[5] = 0 else: - self.__edit_cost_constants = random.sample(range(1, 1000), 6) - self.__edit_cost_constants = [item * 0.01 for item in self.__edit_cost_constants] + self._edit_cost_constants = random.sample(range(1, 1000), 6) + self._edit_cost_constants = [item * 0.01 for item in self._edit_cost_constants] if self._verbose >= 2: - print('edit cost constants used:', self.__edit_cost_constants) - elif self.__fit_method == 'expert': # expert - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__edit_cost_constants = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] + print('edit cost constants used:', self._edit_cost_constants) + elif self._fit_method == 'expert': # expert + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._edit_cost_constants = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] + self._edit_cost_constants = [3, 3, 1, 3, 3, 1] else: - self.__edit_cost_constants = self.__init_ecc - elif self.__fit_method == 'k-graphs': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': - self.__init_ecc = [0, 0, 1, 1, 1, 0] + self._edit_cost_constants = self._init_ecc + elif self._fit_method == 'k-graphs': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': + self._init_ecc = [0, 0, 1, 1, 1, 0] if self._dataset.node_attrs == []: - self.__init_ecc[2] = 0 + self._init_ecc[2] = 0 if self._dataset.edge_attrs == []: - self.__init_ecc[5] = 0 + self._init_ecc[5] = 0 else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimize on the k-graph subset. - self.__optimize_ecc_by_kernel_distances() - elif self.__fit_method == 'whole-dataset': - if self.__init_ecc is None: - if self.__ged_options['edit_cost'] == 'LETTER': - self.__init_ecc = [0.9, 1.7, 0.75] - elif self.__ged_options['edit_cost'] == 'LETTER2': - self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] + self._optimize_ecc_by_kernel_distances() + elif self._fit_method == 'whole-dataset': + if self._init_ecc is None: + if self._ged_options['edit_cost'] == 'LETTER': + self._init_ecc = [0.9, 1.7, 0.75] + elif self._ged_options['edit_cost'] == 'LETTER2': + self._init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425] else: - self.__init_ecc = [3, 3, 1, 3, 3, 1] + self._init_ecc = [3, 3, 1, 3, 3, 1] # optimizeon the whole set. - self.__optimize_ecc_by_kernel_distances() - elif self.__fit_method == 'precomputed': + self._optimize_ecc_by_kernel_distances() + elif self._fit_method == 'precomputed': pass - def __optimize_ecc_by_kernel_distances(self): + def _optimize_ecc_by_kernel_distances(self): # compute distances in feature space. dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix() dis_k_vec = [] @@ -265,120 +265,120 @@ class MedianPreimageGeneratorPy(PreimageGenerator): if self._verbose >= 2: print('\ninitial:') time0 = time.time() - graphs = [self.__clean_graph(g) for g in self._dataset.graphs] - self.__edit_cost_constants = self.__init_ecc - options = self.__ged_options.copy() - options['edit_cost_constants'] = self.__edit_cost_constants # @todo + graphs = [self._clean_graph(g) for g in self._dataset.graphs] + self._edit_cost_constants = self._init_ecc + options = self._ged_options.copy() + options['edit_cost_constants'] = self._edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs - ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) + ged_vec_init, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1)) residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))] time_list = [time.time() - time0] - edit_cost_list = [self.__init_ecc] + edit_cost_list = [self._init_ecc] nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list = [nb_cost_mat] if self._verbose >= 2: - print('Current edit cost constants:', self.__edit_cost_constants) + print('Current edit cost constants:', self._edit_cost_constants) print('Residual list:', residual_list) # run iteration from initial edit costs. - self.__converged = False + self._converged = False itrs_without_update = 0 - self.__itrs = 0 - self.__num_updates_ecc = 0 - timer = Timer(self.__time_limit_in_sec) - while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update): + self._itrs = 0 + self._num_updates_ecc = 0 + timer = Timer(self._time_limit_in_sec) + while not self._termination_criterion_met(self._converged, timer, self._itrs, itrs_without_update): if self._verbose >= 2: - print('\niteration', self.__itrs + 1) + print('\niteration', self._itrs + 1) time0 = time.time() # "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method. -# np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', +# np.savez('results/xp_fit_method/fit_data_debug' + str(self._itrs) + '.gm', # nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, # n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init, # ged_mat=ged_mat) - self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec) - for i in range(len(self.__edit_cost_constants)): - if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9: - self.__edit_cost_constants[i] = 0 - if self.__edit_cost_constants[i] < 0: + self._edit_cost_constants, _ = self._update_ecc(nb_cost_mat, dis_k_vec) + for i in range(len(self._edit_cost_constants)): + if -1e-9 <= self._edit_cost_constants[i] <= 1e-9: + self._edit_cost_constants[i] = 0 + if self._edit_cost_constants[i] < 0: raise ValueError('The edit cost is negative.') - # for i in range(len(self.__edit_cost_constants)): - # if self.__edit_cost_constants[i] < 0: - # self.__edit_cost_constants[i] = 0 + # for i in range(len(self._edit_cost_constants)): + # if self._edit_cost_constants[i] < 0: + # self._edit_cost_constants[i] = 0 # compute new GEDs and numbers of edit operations. - options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75]) - options['edit_cost_constants'] = self.__edit_cost_constants # @todo + options = self._ged_options.copy() # np.array([self._edit_cost_constants[0], self._edit_cost_constants[1], 0.75]) + options['edit_cost_constants'] = self._edit_cost_constants # @todo options['node_labels'] = self._dataset.node_labels options['edge_labels'] = self._dataset.edge_labels options['node_attrs'] = self._dataset.node_attrs options['edge_attrs'] = self._dataset.edge_attrs - ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self.__parallel, verbose=(self._verbose > 1)) + ged_vec, ged_mat, n_edit_operations = compute_geds_cml(graphs, options=options, parallel=self._parallel, verbose=(self._verbose > 1)) residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec)))) time_list.append(time.time() - time0) - edit_cost_list.append(self.__edit_cost_constants) + edit_cost_list.append(self._edit_cost_constants) nb_cost_mat = np.array(n_edit_operations) nb_cost_mat_list.append(nb_cost_mat) # check convergency. ec_changed = False - for i, cost in enumerate(self.__edit_cost_constants): + for i, cost in enumerate(self._edit_cost_constants): if cost == 0: - if edit_cost_list[-2][i] > self.__epsilon_ec: + if edit_cost_list[-2][i] > self._epsilon_ec: ec_changed = True break - elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec: + elif abs(cost - edit_cost_list[-2][i]) / cost > self._epsilon_ec: ec_changed = True break -# if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec: +# if abs(cost - edit_cost_list[-2][i]) > self._epsilon_ec: # ec_changed = True # break residual_changed = False if residual_list[-1] == 0: - if residual_list[-2] > self.__epsilon_residual: + if residual_list[-2] > self._epsilon_residual: residual_changed = True - elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual: + elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self._epsilon_residual: residual_changed = True - self.__converged = not (ec_changed or residual_changed) - if self.__converged: + self._converged = not (ec_changed or residual_changed) + if self._converged: itrs_without_update += 1 else: itrs_without_update = 0 - self.__num_updates_ecc += 1 + self._num_updates_ecc += 1 # print current states. if self._verbose >= 2: print() print('-------------------------------------------------------------------------') - print('States of iteration', self.__itrs + 1) + print('States of iteration', self._itrs + 1) print('-------------------------------------------------------------------------') -# print('Time spend:', self.__runtime_optimize_ec) - print('Total number of iterations for optimizing:', self.__itrs + 1) - print('Total number of updating edit costs:', self.__num_updates_ecc) - print('Was optimization of edit costs converged:', self.__converged) +# print('Time spend:', self._runtime_optimize_ec) + print('Total number of iterations for optimizing:', self._itrs + 1) + print('Total number of updating edit costs:', self._num_updates_ecc) + print('Was optimization of edit costs converged:', self._converged) print('Did edit costs change:', ec_changed) print('Did residual change:', residual_changed) print('Iterations without update:', itrs_without_update) - print('Current edit cost constants:', self.__edit_cost_constants) + print('Current edit cost constants:', self._edit_cost_constants) print('Residual list:', residual_list) print('-------------------------------------------------------------------------') - self.__itrs += 1 + self._itrs += 1 - def __termination_criterion_met(self, converged, timer, itr, itrs_without_update): - if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): -# if self.__state == AlgorithmState.TERMINATED: -# self.__state = AlgorithmState.INITIALIZED + def _termination_criterion_met(self, converged, timer, itr, itrs_without_update): + if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): +# if self._state == AlgorithmState.TERMINATED: +# self._state = AlgorithmState.INITIALIZED return True - return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) + return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) - def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): - # if self.__ds_name == 'Letter-high': - if self.__ged_options['edit_cost'] == 'LETTER': + def _update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'): + # if self._ds_name == 'Letter-high': + if self._ged_options['edit_cost'] == 'LETTER': raise Exception('Cannot compute for cost "LETTER".') pass # # method 1: set alpha automatically, just tune c_vir and c_eir by @@ -431,7 +431,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): # res = optimize.root(func, [0.9, 1.7, 0.75, 100]) # edit_costs_new = res.x # residual = None - elif self.__ged_options['edit_cost'] == 'LETTER2': + elif self._ged_options['edit_cost'] == 'LETTER2': # # 1. if c_vi != c_vr, c_ei != c_er. # nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] # x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -458,7 +458,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] # edit_costs_new = np.array(edit_costs_new) # residual = np.sqrt(prob.value) - if not self.__triangle_rule and self.__allow_zeros: + if not self._triangle_rule and self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -468,10 +468,10 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -482,10 +482,10 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) @@ -507,7 +507,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): # edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]] # edit_costs_new = np.array(edit_costs_new) # residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: # c_vs <= c_vi + c_vr. nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -515,7 +515,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif rw_constraints == '2constraints': # @todo: rearrange it later. @@ -532,11 +532,11 @@ class MedianPreimageGeneratorPy(PreimageGenerator): edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC': + elif self._ged_options['edit_cost'] == 'NON_SYMBOLIC': is_n_attr = np.count_nonzero(nb_cost_mat[:,2]) is_e_attr = np.count_nonzero(nb_cost_mat[:,5]) - if self.__ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. + if self._ds_name == 'SYNTHETICnew': # @todo: rearrenge this later. # nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]] nb_cost_mat_new = nb_cost_mat[:,[2,3,4]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -553,7 +553,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0]))) residual = np.sqrt(prob.value) - elif not self.__triangle_rule and self.__allow_zeros: + elif not self._triangle_rule and self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -564,7 +564,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -577,7 +577,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -590,7 +590,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -599,11 +599,11 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) @@ -616,7 +616,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -630,7 +630,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 0.0, 1.0]).T@x >= 0.01, np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -644,7 +644,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -653,18 +653,18 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] x = cp.Variable(nb_cost_mat_new.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -673,7 +673,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -682,7 +682,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -691,11 +691,11 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: # c_vs <= c_vi + c_vr. if is_n_attr and is_e_attr: nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]] @@ -705,7 +705,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) elif is_n_attr and not is_e_attr: @@ -715,7 +715,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value, np.array([0.0]))) residual = np.sqrt(prob.value) elif not is_n_attr and is_e_attr: @@ -725,7 +725,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])], np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:])) residual = np.sqrt(prob.value) else: @@ -734,13 +734,13 @@ class MedianPreimageGeneratorPy(PreimageGenerator): cost_fun = cp.sum_squares(nb_cost_mat_new @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:], np.array([0.0]))) residual = np.sqrt(prob.value) - elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. - if not self.__triangle_rule and self.__allow_zeros: + elif self._ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled. + if not self._triangle_rule and self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], @@ -749,10 +749,10 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([0.0, 0.0, 0.0, 1.0, 0.0, 0.0]).T@x >= 0.01, np.array([0.0, 0.0, 0.0, 0.0, 1.0, 0.0]).T@x >= 0.01] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and self.__allow_zeros: + elif self._triangle_rule and self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])], @@ -763,29 +763,29 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif not self.__triangle_rule and not self.__allow_zeros: + elif not self._triangle_rule and not self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])]] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) - elif self.__triangle_rule and not self.__allow_zeros: + elif self._triangle_rule and not self._allow_zeros: x = cp.Variable(nb_cost_mat.shape[1]) cost_fun = cp.sum_squares(nb_cost_mat @ x - dis_k_vec) constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])], np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) else: - raise Exception('The edit cost "', self.__ged_options['edit_cost'], '" is not supported for update progress.') + raise Exception('The edit cost "', self._ged_options['edit_cost'], '" is not supported for update progress.') # # method 1: simple least square method. # edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec, # rcond=None) @@ -816,7 +816,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0, np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0] prob = cp.Problem(cp.Minimize(cost_fun), constraints) - self.__execute_cvx(prob) + self._execute_cvx(prob) edit_costs_new = x.value residual = np.sqrt(prob.value) @@ -825,7 +825,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): return edit_costs_new, residual - def __execute_cvx(self, prob): + def _execute_cvx(self, prob): try: prob.solve(verbose=(self._verbose>=2)) except MemoryError as error0: @@ -853,7 +853,7 @@ class MedianPreimageGeneratorPy(PreimageGenerator): print() - def __gmg_bcu(self): + def _gmg_bcu(self): """ The local search algorithm based on block coordinate update (BCU) for estimating a generalized median graph (GMG). @@ -865,70 +865,70 @@ class MedianPreimageGeneratorPy(PreimageGenerator): # Set up the ged environment. ged_env = GEDEnv() # @todo: maybe create a ged_env as a private varible. # gedlibpy.restart_env() - ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constants=self.__edit_cost_constants) - graphs = [self.__clean_graph(g) for g in self._dataset.graphs] + ged_env.set_edit_cost(self._ged_options['edit_cost'], edit_cost_constants=self._edit_cost_constants) + graphs = [self._clean_graph(g) for g in self._dataset.graphs] for g in graphs: ged_env.add_nx_graph(g, '') graph_ids = ged_env.get_all_graph_ids() set_median_id = ged_env.add_graph('set_median') gen_median_id = ged_env.add_graph('gen_median') - ged_env.init(init_type=self.__ged_options['init_option']) + ged_env.init(init_type=self._ged_options['init_option']) # Set up the madian graph estimator. - self.__mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self.__ged_options['edit_cost'])) - self.__mge.set_refine_method(self.__ged_options['method'], self.__ged_options) - options = self.__mge_options.copy() + self._mge = MedianGraphEstimatorPy(ged_env, constant_node_costs(self._ged_options['edit_cost'])) + self._mge.set_refine_method(self._ged_options['method'], self._ged_options) + options = self._mge_options.copy() if not 'seed' in options: options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage. - options['parallel'] = self.__parallel + options['parallel'] = self._parallel # Select the GED algorithm. - self.__mge.set_options(mge_options_to_string(options)) - self.__mge.set_label_names(node_labels=self._dataset.node_labels, + self._mge.set_options(mge_options_to_string(options)) + self._mge.set_label_names(node_labels=self._dataset.node_labels, edge_labels=self._dataset.edge_labels, node_attrs=self._dataset.node_attrs, edge_attrs=self._dataset.edge_attrs) - ged_options = self.__ged_options.copy() - if self.__parallel: + ged_options = self._ged_options.copy() + if self._parallel: ged_options['threads'] = 1 - self.__mge.set_init_method(ged_options['method'], ged_options) - self.__mge.set_descent_method(ged_options['method'], ged_options) + self._mge.set_init_method(ged_options['method'], ged_options) + self._mge.set_descent_method(ged_options['method'], ged_options) # Run the estimator. - self.__mge.run(graph_ids, set_median_id, gen_median_id) + self._mge.run(graph_ids, set_median_id, gen_median_id) # Get SODs. - self.__sod_set_median = self.__mge.get_sum_of_distances('initialized') - self.__sod_gen_median = self.__mge.get_sum_of_distances('converged') + self._sod_set_median = self._mge.get_sum_of_distances('initialized') + self._sod_gen_median = self._mge.get_sum_of_distances('converged') # Get median graphs. - self.__set_median = ged_env.get_nx_graph(set_median_id) - self.__gen_median = ged_env.get_nx_graph(gen_median_id) + self._set_median = ged_env.get_nx_graph(set_median_id) + self._gen_median = ged_env.get_nx_graph(gen_median_id) - def __compute_distances_to_true_median(self): + def _compute_distances_to_true_median(self): # compute distance in kernel space for set median. - kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options) - kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options) + kernels_to_sm, _ = self._graph_kernel.compute(self._set_median, self._dataset.graphs, **self._kernel_options) + kernel_sm, _ = self._graph_kernel.compute(self._set_median, self._set_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize + kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize kernel_sm = 1 # @todo: not correct kernel value gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_sm = np.concatenate((np.array([[kernel_sm] + kernels_to_sm]).T, gram_with_sm), axis=1) - self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_sm, withterm3=False) # compute distance in kernel space for generalized median. - kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options) - kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options) + kernels_to_gm, _ = self._graph_kernel.compute(self._gen_median, self._dataset.graphs, **self._kernel_options) + kernel_gm, _ = self._graph_kernel.compute(self._gen_median, self._gen_median, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize + kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize kernel_gm = 1 gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gm = np.concatenate((np.array([[kernel_gm] + kernels_to_gm]).T, gram_with_gm), axis=1) - self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), + self._k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False) @@ -939,19 +939,19 @@ class MedianPreimageGeneratorPy(PreimageGenerator): [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), gram_with_gm, withterm3=False)) idx_k_dis_median_set_min = np.argmin(k_dis_median_set) - self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] - self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() + self._k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min] + self._best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy() if self._verbose >= 2: print() - print('distance in kernel space for set median:', self.__k_dis_set_median) - print('distance in kernel space for generalized median:', self.__k_dis_gen_median) - print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset) + print('distance in kernel space for set median:', self._k_dis_set_median) + print('distance in kernel space for generalized median:', self._k_dis_gen_median) + print('minimum distance in kernel space for each graph in median set:', self._k_dis_dataset) print('distance in kernel space for each graph in median set:', k_dis_median_set) -# def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): - def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated. +# def _clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): + def _clean_graph(self, G): # @todo: this may not be needed when datafile is updated. """ Cleans node and edge labels and attributes of the given graph. """ @@ -973,63 +973,63 @@ class MedianPreimageGeneratorPy(PreimageGenerator): @property def mge(self): - return self.__mge + return self._mge @property def ged_options(self): - return self.__ged_options + return self._ged_options @ged_options.setter def ged_options(self, value): - self.__ged_options = value + self._ged_options = value @property def mge_options(self): - return self.__mge_options + return self._mge_options @mge_options.setter def mge_options(self, value): - self.__mge_options = value + self._mge_options = value @property def fit_method(self): - return self.__fit_method + return self._fit_method @fit_method.setter def fit_method(self, value): - self.__fit_method = value + self._fit_method = value @property def init_ecc(self): - return self.__init_ecc + return self._init_ecc @init_ecc.setter def init_ecc(self, value): - self.__init_ecc = value + self._init_ecc = value @property def set_median(self): - return self.__set_median + return self._set_median @property def gen_median(self): - return self.__gen_median + return self._gen_median @property def best_from_dataset(self): - return self.__best_from_dataset + return self._best_from_dataset @property def gram_matrix_unnorm(self): - return self.__gram_matrix_unnorm + return self._gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): - self.__gram_matrix_unnorm = value \ No newline at end of file + self._gram_matrix_unnorm = value \ No newline at end of file diff --git a/gklearn/preimage/random_preimage_generator.py b/gklearn/preimage/random_preimage_generator.py index cb28519..c2210f5 100644 --- a/gklearn/preimage/random_preimage_generator.py +++ b/gklearn/preimage/random_preimage_generator.py @@ -26,43 +26,43 @@ class RandomPreimageGenerator(PreimageGenerator): def __init__(self, dataset=None): PreimageGenerator.__init__(self, dataset=dataset) # arguments to set. - self.__k = 5 # number of nearest neighbors of phi in D_N. - self.__r_max = 10 # maximum number of iterations. - self.__l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. - self.__alphas = None # weights of linear combinations of points in kernel space. - self.__parallel = True - self.__n_jobs = multiprocessing.cpu_count() - self.__time_limit_in_sec = 0 - self.__max_itrs = 20 + self._k = 5 # number of nearest neighbors of phi in D_N. + self._r_max = 10 # maximum number of iterations. + self._l = 500 # numbers of graphs generated for each graph in D_k U {g_i_hat}. + self._alphas = None # weights of linear combinations of points in kernel space. + self._parallel = True + self._n_jobs = multiprocessing.cpu_count() + self._time_limit_in_sec = 0 + self._max_itrs = 20 # values to compute. - self.__runtime_generate_preimage = None - self.__runtime_total = None - self.__preimage = None - self.__best_from_dataset = None - self.__k_dis_preimage = None - self.__k_dis_dataset = None - self.__itrs = 0 - self.__converged = False # @todo - self.__num_updates = 0 + self._runtime_generate_preimage = None + self._runtime_total = None + self._preimage = None + self._best_from_dataset = None + self._k_dis_preimage = None + self._k_dis_dataset = None + self._itrs = 0 + self._converged = False # @todo + self._num_updates = 0 # values that can be set or to be computed. - self.__gram_matrix_unnorm = None - self.__runtime_precompute_gm = None + self._gram_matrix_unnorm = None + self._runtime_precompute_gm = None def set_options(self, **kwargs): self._kernel_options = kwargs.get('kernel_options', {}) self._graph_kernel = kwargs.get('graph_kernel', None) self._verbose = kwargs.get('verbose', 2) - self.__k = kwargs.get('k', 5) - self.__r_max = kwargs.get('r_max', 10) - self.__l = kwargs.get('l', 500) - self.__alphas = kwargs.get('alphas', None) - self.__parallel = kwargs.get('parallel', True) - self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) - self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) - self.__max_itrs = kwargs.get('max_itrs', 20) - self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) - self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) + self._k = kwargs.get('k', 5) + self._r_max = kwargs.get('r_max', 10) + self._l = kwargs.get('l', 500) + self._alphas = kwargs.get('alphas', None) + self._parallel = kwargs.get('parallel', True) + self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) + self._time_limit_in_sec = kwargs.get('time_limit_in_sec', 0) + self._max_itrs = kwargs.get('max_itrs', 20) + self._gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None) + self._runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None) def run(self): @@ -78,65 +78,65 @@ class RandomPreimageGenerator(PreimageGenerator): start = time.time() # 1. precompute gram matrix. - if self.__gram_matrix_unnorm is None: + if self._gram_matrix_unnorm is None: gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options) - self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm + self._gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm end_precompute_gm = time.time() - self.__runtime_precompute_gm = end_precompute_gm - start + self._runtime_precompute_gm = end_precompute_gm - start else: - if self.__runtime_precompute_gm is None: + if self._runtime_precompute_gm is None: raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.') - self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm + self._graph_kernel.gram_matrix_unnorm = self._gram_matrix_unnorm if self._kernel_options['normalize']: - self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm)) + self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self._gram_matrix_unnorm)) else: - self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm) + self._graph_kernel.gram_matrix = np.copy(self._gram_matrix_unnorm) end_precompute_gm = time.time() - start -= self.__runtime_precompute_gm + start -= self._runtime_precompute_gm # 2. compute k nearest neighbors of phi in D_N. if self._verbose >= 2: print('\nstart computing k nearest neighbors of phi in D_N...\n') D_N = self._dataset.graphs - if self.__alphas is None: - self.__alphas = [1 / len(D_N)] * len(D_N) + if self._alphas is None: + self._alphas = [1 / len(D_N)] * len(D_N) k_dis_list = [] # distance between g_star and each graph. term3 = 0 - for i1, a1 in enumerate(self.__alphas): - for i2, a2 in enumerate(self.__alphas): + for i1, a1 in enumerate(self._alphas): + for i2, a2 in enumerate(self._alphas): term3 += a1 * a2 * self._graph_kernel.gram_matrix[i1, i2] for idx in range(len(D_N)): - k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self.__alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) + k_dis_list.append(compute_k_dis(idx, range(0, len(D_N)), self._alphas, self._graph_kernel.gram_matrix, term3=term3, withterm3=True)) # sort. sort_idx = np.argsort(k_dis_list) - dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self.__k]] # the k shortest distances. + dis_gs = [k_dis_list[idis] for idis in sort_idx[0:self._k]] # the k shortest distances. nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist()) g0hat_list = [D_N[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in D_N - self.__best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. - self.__k_dis_dataset = dis_gs[0] + self._best_from_dataset = g0hat_list[0] # get the first best graph if there are muitlple. + self._k_dis_dataset = dis_gs[0] - if self.__k_dis_dataset == 0: # get the exact pre-image. + if self._k_dis_dataset == 0: # get the exact pre-image. end_generate_preimage = time.time() - self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm - self.__runtime_total = end_generate_preimage - start - self.__preimage = self.__best_from_dataset.copy() - self.__k_dis_preimage = self.__k_dis_dataset + self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm + self._runtime_total = end_generate_preimage - start + self._preimage = self._best_from_dataset.copy() + self._k_dis_preimage = self._k_dis_dataset if self._verbose: print() print('=============================================================================') print('The exact pre-image is found from the input dataset.') print('-----------------------------------------------------------------------------') - print('Distance in kernel space for the best graph from dataset and for preimage:', self.__k_dis_dataset) - print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) - print('Time to generate pre-images:', self.__runtime_generate_preimage) - print('Total time:', self.__runtime_total) + print('Distance in kernel space for the best graph from dataset and for preimage:', self._k_dis_dataset) + print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) + print('Time to generate pre-images:', self._runtime_generate_preimage) + print('Total time:', self._runtime_total) print('=============================================================================') print() return dhat = dis_gs[0] # the nearest distance - Gk = [D_N[ig].copy() for ig in sort_idx[0:self.__k]] # the k nearest neighbors + Gk = [D_N[ig].copy() for ig in sort_idx[0:self._k]] # the k nearest neighbors Gs_nearest = [nx.convert_node_labels_to_integers(g) for g in Gk] # [g.copy() for g in Gk] # 3. start iterations. @@ -146,12 +146,12 @@ class RandomPreimageGenerator(PreimageGenerator): dihat_list = [] r = 0 dis_of_each_itr = [dhat] - if self.__parallel: + if self._parallel: self._kernel_options['parallel'] = None - self.__itrs = 0 - self.__num_updates = 0 - timer = Timer(self.__time_limit_in_sec) - while not self.__termination_criterion_met(timer, self.__itrs, r): + self._itrs = 0 + self._num_updates = 0 + timer = Timer(self._time_limit_in_sec) + while not self._termination_criterion_met(timer, self._itrs, r): print('\n- r =', r) found = False dis_bests = dis_gs + dihat_list @@ -173,7 +173,7 @@ class RandomPreimageGenerator(PreimageGenerator): nb_modif = 1 for idx, nb in enumerate(range(nb_vpairs_min, nb_vpairs_min - fdgs_max, -1)): nb_modif *= nb / (fdgs_max - idx) - while fdgs_max < nb_vpairs_min and nb_modif < self.__l: + while fdgs_max < nb_vpairs_min and nb_modif < self._l: fdgs_max += 1 nb_modif *= (nb_vpairs_min - fdgs_max + 1) / fdgs_max nb_increase = int(fdgs_max - fdgs_max_old) @@ -184,7 +184,7 @@ class RandomPreimageGenerator(PreimageGenerator): for ig, gs in enumerate(Gs_nearest + gihat_list): if self._verbose >= 2: print('-- computing', ig + 1, 'graphs out of', len(Gs_nearest) + len(gihat_list)) - gnew, dhat, found = self.__generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) + gnew, dhat, found = self._generate_l_graphs(gs, fdgs_list[ig], dhat, ig, found, term3) if found: r = 0 @@ -194,51 +194,51 @@ class RandomPreimageGenerator(PreimageGenerator): r += 1 dis_of_each_itr.append(dhat) - self.__itrs += 1 + self._itrs += 1 if self._verbose >= 2: - print('Total number of iterations is', self.__itrs, '.') - print('The preimage is updated', self.__num_updates, 'times.') + print('Total number of iterations is', self._itrs, '.') + print('The preimage is updated', self._num_updates, 'times.') print('The shortest distances for previous iterations are', dis_of_each_itr, '.') # get results and print. end_generate_preimage = time.time() - self.__runtime_generate_preimage = end_generate_preimage - end_precompute_gm - self.__runtime_total = end_generate_preimage - start - self.__preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) - self.__k_dis_preimage = dhat + self._runtime_generate_preimage = end_generate_preimage - end_precompute_gm + self._runtime_total = end_generate_preimage - start + self._preimage = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0]) + self._k_dis_preimage = dhat if self._verbose: print() print('=============================================================================') print('Finished generation of preimages.') print('-----------------------------------------------------------------------------') - print('Distance in kernel space for the best graph from dataset:', self.__k_dis_dataset) - print('Distance in kernel space for the preimage:', self.__k_dis_preimage) - print('Total number of iterations for optimizing:', self.__itrs) - print('Total number of updating preimage:', self.__num_updates) - print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm) - print('Time to generate pre-images:', self.__runtime_generate_preimage) - print('Total time:', self.__runtime_total) + print('Distance in kernel space for the best graph from dataset:', self._k_dis_dataset) + print('Distance in kernel space for the preimage:', self._k_dis_preimage) + print('Total number of iterations for optimizing:', self._itrs) + print('Total number of updating preimage:', self._num_updates) + print('Time to pre-compute Gram matrix:', self._runtime_precompute_gm) + print('Time to generate pre-images:', self._runtime_generate_preimage) + print('Total time:', self._runtime_total) print('=============================================================================') print() - def __generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): - if self.__parallel: - gnew, dhat, found = self.__generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) + def _generate_l_graphs(self, g_init, fdgs, dhat, ig, found, term3): + if self._parallel: + gnew, dhat, found = self._generate_l_graphs_parallel(g_init, fdgs, dhat, ig, found, term3) else: - gnew, dhat, found = self.__generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) + gnew, dhat, found = self._generate_l_graphs_series(g_init, fdgs, dhat, ig, found, term3) return gnew, dhat, found - def __generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): + def _generate_l_graphs_series(self, g_init, fdgs, dhat, ig, found, term3): gnew = None updated = False - for trial in range(0, self.__l): + for trial in range(0, self._l): if self._verbose >= 2: - print('---', trial + 1, 'trial out of', self.__l) + print('---', trial + 1, 'trial out of', self._l) - gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) + gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) # get the better graph preimage. if dnew <= dhat: # @todo: the new distance is smaller or also equal? @@ -257,14 +257,14 @@ class RandomPreimageGenerator(PreimageGenerator): found = True # found better or equally good graph. if updated: - self.__num_updates += 1 + self._num_updates += 1 return gnew, dhat, found - def __generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): + def _generate_l_graphs_parallel(self, g_init, fdgs, dhat, ig, found, term3): gnew = None - len_itr = self.__l + len_itr = self._l gnew_list = [None] * len_itr dnew_list = [None] * len_itr itr = range(0, len_itr) @@ -295,7 +295,7 @@ class RandomPreimageGenerator(PreimageGenerator): print('I am smaller!') print('index (as in D_k U {gihat}) =', str(ig)) print('distance:', dhat, '->', dnew, '\n') - self.__num_updates += 1 + self._num_updates += 1 else: if self._verbose >= 2: print('I am equal!') @@ -308,11 +308,11 @@ class RandomPreimageGenerator(PreimageGenerator): def _generate_graph_parallel(self, g_init, fdgs, term3, itr): trial = itr - gtemp, dnew = self.__do_trial(g_init, fdgs, term3, trial) + gtemp, dnew = self._do_trial(g_init, fdgs, term3, trial) return trial, gtemp, dnew - def __do_trial(self, g_init, fdgs, term3, trial): + def _do_trial(self, g_init, fdgs, term3, trial): # add and delete edges. gtemp = g_init.copy() seed = (trial + int(time.time())) % (2 ** 32 - 1) @@ -339,51 +339,51 @@ class RandomPreimageGenerator(PreimageGenerator): kernels_to_gtmp, _ = self._graph_kernel.compute(gtemp, self._dataset.graphs, **self._kernel_options) kernel_gtmp, _ = self._graph_kernel.compute(gtemp, gtemp, **self._kernel_options) if self._kernel_options['normalize']: - kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize + kernels_to_gtmp = [kernels_to_gtmp[i] / np.sqrt(self._gram_matrix_unnorm[i, i] * kernel_gtmp) for i in range(len(kernels_to_gtmp))] # normalize kernel_gtmp = 1 # @todo: not correct kernel value gram_with_gtmp = np.concatenate((np.array([kernels_to_gtmp]), np.copy(self._graph_kernel.gram_matrix)), axis=0) gram_with_gtmp = np.concatenate((np.array([[kernel_gtmp] + kernels_to_gtmp]).T, gram_with_gtmp), axis=1) - dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self.__alphas, gram_with_gtmp, term3=term3, withterm3=True) + dnew = compute_k_dis(0, range(1, 1 + len(self._dataset.graphs)), self._alphas, gram_with_gtmp, term3=term3, withterm3=True) return gtemp, dnew def get_results(self): results = {} - results['runtime_precompute_gm'] = self.__runtime_precompute_gm - results['runtime_generate_preimage'] = self.__runtime_generate_preimage - results['runtime_total'] = self.__runtime_total - results['k_dis_dataset'] = self.__k_dis_dataset - results['k_dis_preimage'] = self.__k_dis_preimage - results['itrs'] = self.__itrs - results['num_updates'] = self.__num_updates + results['runtime_precompute_gm'] = self._runtime_precompute_gm + results['runtime_generate_preimage'] = self._runtime_generate_preimage + results['runtime_total'] = self._runtime_total + results['k_dis_dataset'] = self._k_dis_dataset + results['k_dis_preimage'] = self._k_dis_preimage + results['itrs'] = self._itrs + results['num_updates'] = self._num_updates return results - def __termination_criterion_met(self, timer, itr, r): - if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False): -# if self.__state == AlgorithmState.TERMINATED: -# self.__state = AlgorithmState.INITIALIZED + def _termination_criterion_met(self, timer, itr, r): + if timer.expired() or (itr >= self._max_itrs if self._max_itrs >= 0 else False): +# if self._state == AlgorithmState.TERMINATED: +# self._state = AlgorithmState.INITIALIZED return True - return (r >= self.__r_max if self.__r_max >= 0 else False) -# return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False) + return (r >= self._r_max if self._r_max >= 0 else False) +# return converged or (itrs_without_update > self._max_itrs_without_update if self._max_itrs_without_update >= 0 else False) @property def preimage(self): - return self.__preimage + return self._preimage @property def best_from_dataset(self): - return self.__best_from_dataset + return self._best_from_dataset @property def gram_matrix_unnorm(self): - return self.__gram_matrix_unnorm + return self._gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): - self.__gram_matrix_unnorm = value \ No newline at end of file + self._gram_matrix_unnorm = value \ No newline at end of file diff --git a/gklearn/preimage/remove_best_graph.py b/gklearn/preimage/remove_best_graph.py index d6be2a6..48b2b25 100644 --- a/gklearn/preimage/remove_best_graph.py +++ b/gklearn/preimage/remove_best_graph.py @@ -35,13 +35,13 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt if save_results: # create result files. print('creating output files...') - fn_output_detail, fn_output_summary = __init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) + fn_output_detail, fn_output_summary = _init_output_file(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) else: fn_output_detail, fn_output_summary = None, None # 2. compute/load Gram matrix a priori. print('2. computing/loading Gram matrix...') - gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) + gram_matrix_unnorm_list, time_precompute_gm_list = _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets) sod_sm_list = [] sod_gm_list = [] @@ -82,7 +82,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt # 3. get the best graph and remove it from median set. print('3. getting and removing the best graph...') gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset] - best_index, best_dis, best_graph = __get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) + best_index, best_dis, best_graph = _get_best_graph([g.copy() for g in dataset.graphs], normalize_gram_matrix(gram_matrix_unnorm.copy())) median_set_new = [dataset.graphs[i] for i in range(len(dataset.graphs)) if i != best_index] num_graphs -= 1 if num_graphs == 1: @@ -294,7 +294,7 @@ def remove_best_graph(ds_name, mpg_options, kernel_options, ged_options, mge_opt print('\ncomplete.\n') -def __get_best_graph(Gn, gram_matrix): +def _get_best_graph(Gn, gram_matrix): k_dis_list = [] for idx in range(len(Gn)): k_dis_list.append(compute_k_dis(idx, range(0, len(Gn)), [1 / len(Gn)] * len(Gn), gram_matrix, withterm3=False)) @@ -313,7 +313,7 @@ def get_relations(sign): return 'worse' -def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): +def _get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): if load_gm == 'auto': gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz' gmfile_exist = os.path.isfile(os.path.abspath(gm_fname)) @@ -325,7 +325,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): gram_matrix_unnorm_list = [] time_precompute_gm_list = [] for dataset in datasets: - gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) + gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) gram_matrix_unnorm_list.append(gram_matrix_unnorm) time_precompute_gm_list.append(time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) @@ -333,7 +333,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): gram_matrix_unnorm_list = [] time_precompute_gm_list = [] for dataset in datasets: - gram_matrix_unnorm, time_precompute_gm = __compute_gram_matrix_unnorm(dataset, kernel_options) + gram_matrix_unnorm, time_precompute_gm = _compute_gram_matrix_unnorm(dataset, kernel_options) gram_matrix_unnorm_list.append(gram_matrix_unnorm) time_precompute_gm_list.append(time_precompute_gm) np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm', gram_matrix_unnorm_list=gram_matrix_unnorm_list, run_time_list=time_precompute_gm_list) @@ -346,7 +346,7 @@ def __get_gram_matrix(load_gm, dir_save, ds_name, kernel_options, datasets): return gram_matrix_unnorm_list, time_precompute_gm_list -def __get_graph_kernel(dataset, kernel_options): +def _get_graph_kernel(dataset, kernel_options): from gklearn.utils.utils import get_graph_kernel_by_name graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, @@ -358,7 +358,7 @@ def __get_graph_kernel(dataset, kernel_options): return graph_kernel -def __compute_gram_matrix_unnorm(dataset, kernel_options): +def _compute_gram_matrix_unnorm(dataset, kernel_options): from gklearn.utils.utils import get_graph_kernel_by_name graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, @@ -374,7 +374,7 @@ def __compute_gram_matrix_unnorm(dataset, kernel_options): return gram_matrix_unnorm, run_time -def __init_output_file(ds_name, gkernel, fit_method, dir_output): +def _init_output_file(ds_name, gkernel, fit_method, dir_output): if not os.path.exists(dir_output): os.makedirs(dir_output) fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv' diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py index d4d5d05..0cdfddb 100644 --- a/gklearn/preimage/utils.py +++ b/gklearn/preimage/utils.py @@ -45,7 +45,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged if save_results: # create result files. print('creating output files...') - fn_output_detail, fn_output_summary = __init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) + fn_output_detail, fn_output_summary = _init_output_file_preimage(ds_name, kernel_options['name'], mpg_options['fit_method'], dir_save) sod_sm_list = [] sod_gm_list = [] @@ -307,7 +307,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged print('\ncomplete.\n') -def __init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): +def _init_output_file_preimage(ds_name, gkernel, fit_method, dir_output): if not os.path.exists(dir_output): os.makedirs(dir_output) # fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv' diff --git a/gklearn/utils/dataset.py b/gklearn/utils/dataset.py index 3d68212..0343c0b 100644 --- a/gklearn/utils/dataset.py +++ b/gklearn/utils/dataset.py @@ -16,54 +16,54 @@ class Dataset(object): def __init__(self, filename=None, filename_targets=None, **kwargs): if filename is None: - self.__graphs = None - self.__targets = None - self.__node_labels = None - self.__edge_labels = None - self.__node_attrs = None - self.__edge_attrs = None + self._graphs = None + self._targets = None + self._node_labels = None + self._edge_labels = None + self._node_attrs = None + self._edge_attrs = None else: self.load_dataset(filename, filename_targets=filename_targets, **kwargs) - self.__substructures = None - self.__node_label_dim = None - self.__edge_label_dim = None - self.__directed = None - self.__dataset_size = None - self.__total_node_num = None - self.__ave_node_num = None - self.__min_node_num = None - self.__max_node_num = None - self.__total_edge_num = None - self.__ave_edge_num = None - self.__min_edge_num = None - self.__max_edge_num = None - self.__ave_node_degree = None - self.__min_node_degree = None - self.__max_node_degree = None - self.__ave_fill_factor = None - self.__min_fill_factor = None - self.__max_fill_factor = None - self.__node_label_nums = None - self.__edge_label_nums = None - self.__node_attr_dim = None - self.__edge_attr_dim = None - self.__class_number = None + self._substructures = None + self._node_label_dim = None + self._edge_label_dim = None + self._directed = None + self._dataset_size = None + self._total_node_num = None + self._ave_node_num = None + self._min_node_num = None + self._max_node_num = None + self._total_edge_num = None + self._ave_edge_num = None + self._min_edge_num = None + self._max_edge_num = None + self._ave_node_degree = None + self._min_node_degree = None + self._max_node_degree = None + self._ave_fill_factor = None + self._min_fill_factor = None + self._max_fill_factor = None + self._node_label_nums = None + self._edge_label_nums = None + self._node_attr_dim = None + self._edge_attr_dim = None + self._class_number = None def load_dataset(self, filename, filename_targets=None, **kwargs): - self.__graphs, self.__targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) - self.__node_labels = label_names['node_labels'] - self.__node_attrs = label_names['node_attrs'] - self.__edge_labels = label_names['edge_labels'] - self.__edge_attrs = label_names['edge_attrs'] + self._graphs, self._targets, label_names = load_dataset(filename, filename_targets=filename_targets, **kwargs) + self._node_labels = label_names['node_labels'] + self._node_attrs = label_names['node_attrs'] + self._edge_labels = label_names['edge_labels'] + self._edge_attrs = label_names['edge_attrs'] self.clean_labels() def load_graphs(self, graphs, targets=None): # this has to be followed by set_labels(). - self.__graphs = graphs - self.__targets = targets + self._graphs = graphs + self._targets = targets # self.set_labels_attrs() # @todo @@ -71,108 +71,108 @@ class Dataset(object): current_path = os.path.dirname(os.path.realpath(__file__)) + '/' if ds_name == 'Acyclic': ds_file = current_path + '../../datasets/Acyclic/dataset_bps.ds' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'AIDS': ds_file = current_path + '../../datasets/AIDS/AIDS_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Alkane': ds_file = current_path + '../../datasets/Alkane/dataset.ds' fn_targets = current_path + '../../datasets/Alkane/dataset_boiling_point_names.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) + self._graphs, self._targets, label_names = load_dataset(ds_file, filename_targets=fn_targets) elif ds_name == 'COIL-DEL': ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'COIL-RAG': ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'COLORS-3': ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Cuneiform': ds_file = current_path + '../../datasets/Cuneiform/Cuneiform_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'DD': ds_file = current_path + '../../datasets/DD/DD_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'ENZYMES': ds_file = current_path + '../../datasets/ENZYMES_txt/ENZYMES_A_sparse.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Fingerprint': ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'FRANKENSTEIN': ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Letter-high': # node non-symb ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Letter-low': # node non-symb ds_file = current_path + '../../datasets/Letter-low/Letter-low_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Letter-med': # node non-symb ds_file = current_path + '../../datasets/Letter-med/Letter-med_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'MAO': ds_file = current_path + '../../datasets/MAO/dataset.ds' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Monoterpenoides': ds_file = current_path + '../../datasets/Monoterpenoides/dataset_10+.ds' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'MUTAG': ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'NCI1': ds_file = current_path + '../../datasets/NCI1/NCI1_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'NCI109': ds_file = current_path + '../../datasets/NCI109/NCI109_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'PAH': ds_file = current_path + '../../datasets/PAH/dataset.ds' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'SYNTHETIC': pass elif ds_name == 'SYNTHETICnew': ds_file = current_path + '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt' - self.__graphs, self.__targets, label_names = load_dataset(ds_file) + self._graphs, self._targets, label_names = load_dataset(ds_file) elif ds_name == 'Synthie': pass else: raise Exception('The dataset name "', ds_name, '" is not pre-defined.') - self.__node_labels = label_names['node_labels'] - self.__node_attrs = label_names['node_attrs'] - self.__edge_labels = label_names['edge_labels'] - self.__edge_attrs = label_names['edge_attrs'] + self._node_labels = label_names['node_labels'] + self._node_attrs = label_names['node_attrs'] + self._edge_labels = label_names['edge_labels'] + self._edge_attrs = label_names['edge_attrs'] self.clean_labels() def set_labels(self, node_labels=[], node_attrs=[], edge_labels=[], edge_attrs=[]): - self.__node_labels = node_labels - self.__node_attrs = node_attrs - self.__edge_labels = edge_labels - self.__edge_attrs = edge_attrs + self._node_labels = node_labels + self._node_attrs = node_attrs + self._edge_labels = edge_labels + self._edge_attrs = edge_attrs def set_labels_attrs(self, node_labels=None, node_attrs=None, edge_labels=None, edge_attrs=None): # @todo: remove labels which have only one possible values. if node_labels is None: - self.__node_labels = self.__graphs[0].graph['node_labels'] + self._node_labels = self._graphs[0].graph['node_labels'] # # graphs are considered node unlabeled if all nodes have the same label. # infos.update({'node_labeled': is_nl if node_label_num > 1 else False}) if node_attrs is None: - self.__node_attrs = self.__graphs[0].graph['node_attrs'] + self._node_attrs = self._graphs[0].graph['node_attrs'] # for G in Gn: # for n in G.nodes(data=True): # if 'attributes' in n[1]: # return len(n[1]['attributes']) # return 0 if edge_labels is None: - self.__edge_labels = self.__graphs[0].graph['edge_labels'] + self._edge_labels = self._graphs[0].graph['edge_labels'] # # graphs are considered edge unlabeled if all edges have the same label. # infos.update({'edge_labeled': is_el if edge_label_num > 1 else False}) if edge_attrs is None: - self.__edge_attrs = self.__graphs[0].graph['edge_attrs'] + self._edge_attrs = self._graphs[0].graph['edge_attrs'] # for G in Gn: # if nx.number_of_edges(G) > 0: # for e in G.edges(data=True): @@ -291,145 +291,145 @@ class Dataset(object): # dataset size if 'dataset_size' in keys: - if self.__dataset_size is None: - self.__dataset_size = self.__get_dataset_size() - infos['dataset_size'] = self.__dataset_size + if self._dataset_size is None: + self._dataset_size = self._get_dataset_size() + infos['dataset_size'] = self._dataset_size # graph node number if any(i in keys for i in ['total_node_num', 'ave_node_num', 'min_node_num', 'max_node_num']): - all_node_nums = self.__get_all_node_nums() + all_node_nums = self._get_all_node_nums() if 'total_node_num' in keys: - if self.__total_node_num is None: - self.__total_node_num = self.__get_total_node_num(all_node_nums) - infos['total_node_num'] = self.__total_node_num + if self._total_node_num is None: + self._total_node_num = self._get_total_node_num(all_node_nums) + infos['total_node_num'] = self._total_node_num if 'ave_node_num' in keys: - if self.__ave_node_num is None: - self.__ave_node_num = self.__get_ave_node_num(all_node_nums) - infos['ave_node_num'] = self.__ave_node_num + if self._ave_node_num is None: + self._ave_node_num = self._get_ave_node_num(all_node_nums) + infos['ave_node_num'] = self._ave_node_num if 'min_node_num' in keys: - if self.__min_node_num is None: - self.__min_node_num = self.__get_min_node_num(all_node_nums) - infos['min_node_num'] = self.__min_node_num + if self._min_node_num is None: + self._min_node_num = self._get_min_node_num(all_node_nums) + infos['min_node_num'] = self._min_node_num if 'max_node_num' in keys: - if self.__max_node_num is None: - self.__max_node_num = self.__get_max_node_num(all_node_nums) - infos['max_node_num'] = self.__max_node_num + if self._max_node_num is None: + self._max_node_num = self._get_max_node_num(all_node_nums) + infos['max_node_num'] = self._max_node_num # graph edge number if any(i in keys for i in ['total_edge_num', 'ave_edge_num', 'min_edge_num', 'max_edge_num']): - all_edge_nums = self.__get_all_edge_nums() + all_edge_nums = self._get_all_edge_nums() if 'total_edge_num' in keys: - if self.__total_edge_num is None: - self.__total_edge_num = self.__get_total_edge_num(all_edge_nums) - infos['total_edge_num'] = self.__total_edge_num + if self._total_edge_num is None: + self._total_edge_num = self._get_total_edge_num(all_edge_nums) + infos['total_edge_num'] = self._total_edge_num if 'ave_edge_num' in keys: - if self.__ave_edge_num is None: - self.__ave_edge_num = self.__get_ave_edge_num(all_edge_nums) - infos['ave_edge_num'] = self.__ave_edge_num + if self._ave_edge_num is None: + self._ave_edge_num = self._get_ave_edge_num(all_edge_nums) + infos['ave_edge_num'] = self._ave_edge_num if 'max_edge_num' in keys: - if self.__max_edge_num is None: - self.__max_edge_num = self.__get_max_edge_num(all_edge_nums) - infos['max_edge_num'] = self.__max_edge_num + if self._max_edge_num is None: + self._max_edge_num = self._get_max_edge_num(all_edge_nums) + infos['max_edge_num'] = self._max_edge_num if 'min_edge_num' in keys: - if self.__min_edge_num is None: - self.__min_edge_num = self.__get_min_edge_num(all_edge_nums) - infos['min_edge_num'] = self.__min_edge_num + if self._min_edge_num is None: + self._min_edge_num = self._get_min_edge_num(all_edge_nums) + infos['min_edge_num'] = self._min_edge_num # label number if 'node_label_dim' in keys: - if self.__node_label_dim is None: - self.__node_label_dim = self.__get_node_label_dim() - infos['node_label_dim'] = self.__node_label_dim + if self._node_label_dim is None: + self._node_label_dim = self._get_node_label_dim() + infos['node_label_dim'] = self._node_label_dim if 'node_label_nums' in keys: - if self.__node_label_nums is None: - self.__node_label_nums = {} - for node_label in self.__node_labels: - self.__node_label_nums[node_label] = self.__get_node_label_num(node_label) - infos['node_label_nums'] = self.__node_label_nums + if self._node_label_nums is None: + self._node_label_nums = {} + for node_label in self._node_labels: + self._node_label_nums[node_label] = self._get_node_label_num(node_label) + infos['node_label_nums'] = self._node_label_nums if 'edge_label_dim' in keys: - if self.__edge_label_dim is None: - self.__edge_label_dim = self.__get_edge_label_dim() - infos['edge_label_dim'] = self.__edge_label_dim + if self._edge_label_dim is None: + self._edge_label_dim = self._get_edge_label_dim() + infos['edge_label_dim'] = self._edge_label_dim if 'edge_label_nums' in keys: - if self.__edge_label_nums is None: - self.__edge_label_nums = {} - for edge_label in self.__edge_labels: - self.__edge_label_nums[edge_label] = self.__get_edge_label_num(edge_label) - infos['edge_label_nums'] = self.__edge_label_nums + if self._edge_label_nums is None: + self._edge_label_nums = {} + for edge_label in self._edge_labels: + self._edge_label_nums[edge_label] = self._get_edge_label_num(edge_label) + infos['edge_label_nums'] = self._edge_label_nums if 'directed' in keys or 'substructures' in keys: - if self.__directed is None: - self.__directed = self.__is_directed() - infos['directed'] = self.__directed + if self._directed is None: + self._directed = self._is_directed() + infos['directed'] = self._directed # node degree if any(i in keys for i in ['ave_node_degree', 'max_node_degree', 'min_node_degree']): - all_node_degrees = self.__get_all_node_degrees() + all_node_degrees = self._get_all_node_degrees() if 'ave_node_degree' in keys: - if self.__ave_node_degree is None: - self.__ave_node_degree = self.__get_ave_node_degree(all_node_degrees) - infos['ave_node_degree'] = self.__ave_node_degree + if self._ave_node_degree is None: + self._ave_node_degree = self._get_ave_node_degree(all_node_degrees) + infos['ave_node_degree'] = self._ave_node_degree if 'max_node_degree' in keys: - if self.__max_node_degree is None: - self.__max_node_degree = self.__get_max_node_degree(all_node_degrees) - infos['max_node_degree'] = self.__max_node_degree + if self._max_node_degree is None: + self._max_node_degree = self._get_max_node_degree(all_node_degrees) + infos['max_node_degree'] = self._max_node_degree if 'min_node_degree' in keys: - if self.__min_node_degree is None: - self.__min_node_degree = self.__get_min_node_degree(all_node_degrees) - infos['min_node_degree'] = self.__min_node_degree + if self._min_node_degree is None: + self._min_node_degree = self._get_min_node_degree(all_node_degrees) + infos['min_node_degree'] = self._min_node_degree # fill factor if any(i in keys for i in ['ave_fill_factor', 'max_fill_factor', 'min_fill_factor']): - all_fill_factors = self.__get_all_fill_factors() + all_fill_factors = self._get_all_fill_factors() if 'ave_fill_factor' in keys: - if self.__ave_fill_factor is None: - self.__ave_fill_factor = self.__get_ave_fill_factor(all_fill_factors) - infos['ave_fill_factor'] = self.__ave_fill_factor + if self._ave_fill_factor is None: + self._ave_fill_factor = self._get_ave_fill_factor(all_fill_factors) + infos['ave_fill_factor'] = self._ave_fill_factor if 'max_fill_factor' in keys: - if self.__max_fill_factor is None: - self.__max_fill_factor = self.__get_max_fill_factor(all_fill_factors) - infos['max_fill_factor'] = self.__max_fill_factor + if self._max_fill_factor is None: + self._max_fill_factor = self._get_max_fill_factor(all_fill_factors) + infos['max_fill_factor'] = self._max_fill_factor if 'min_fill_factor' in keys: - if self.__min_fill_factor is None: - self.__min_fill_factor = self.__get_min_fill_factor(all_fill_factors) - infos['min_fill_factor'] = self.__min_fill_factor + if self._min_fill_factor is None: + self._min_fill_factor = self._get_min_fill_factor(all_fill_factors) + infos['min_fill_factor'] = self._min_fill_factor if 'substructures' in keys: - if self.__substructures is None: - self.__substructures = self.__get_substructures() - infos['substructures'] = self.__substructures + if self._substructures is None: + self._substructures = self._get_substructures() + infos['substructures'] = self._substructures if 'class_number' in keys: - if self.__class_number is None: - self.__class_number = self.__get_class_number() - infos['class_number'] = self.__class_number + if self._class_number is None: + self._class_number = self._get_class_number() + infos['class_number'] = self._class_number if 'node_attr_dim' in keys: - if self.__node_attr_dim is None: - self.__node_attr_dim = self.__get_node_attr_dim() - infos['node_attr_dim'] = self.__node_attr_dim + if self._node_attr_dim is None: + self._node_attr_dim = self._get_node_attr_dim() + infos['node_attr_dim'] = self._node_attr_dim if 'edge_attr_dim' in keys: - if self.__edge_attr_dim is None: - self.__edge_attr_dim = self.__get_edge_attr_dim() - infos['edge_attr_dim'] = self.__edge_attr_dim + if self._edge_attr_dim is None: + self._edge_attr_dim = self._get_edge_attr_dim() + infos['edge_attr_dim'] = self._edge_attr_dim # entropy of degree distribution. @@ -438,14 +438,14 @@ class Dataset(object): base = params['all_degree_entropy']['base'] else: base = None - infos['all_degree_entropy'] = self.__compute_all_degree_entropy(base=base) + infos['all_degree_entropy'] = self._compute_all_degree_entropy(base=base) if 'ave_degree_entropy' in keys: if params is not None and ('ave_degree_entropy' in params) and ('base' in params['ave_degree_entropy']): base = params['ave_degree_entropy']['base'] else: base = None - infos['ave_degree_entropy'] = np.mean(self.__compute_all_degree_entropy(base=base)) + infos['ave_degree_entropy'] = np.mean(self._compute_all_degree_entropy(base=base)) return infos @@ -457,12 +457,12 @@ class Dataset(object): def remove_labels(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]): - node_labels = [item for item in node_labels if item in self.__node_labels] - edge_labels = [item for item in edge_labels if item in self.__edge_labels] - node_attrs = [item for item in node_attrs if item in self.__node_attrs] - edge_attrs = [item for item in edge_attrs if item in self.__edge_attrs] + node_labels = [item for item in node_labels if item in self._node_labels] + edge_labels = [item for item in edge_labels if item in self._edge_labels] + node_attrs = [item for item in node_attrs if item in self._node_attrs] + edge_attrs = [item for item in edge_attrs if item in self._edge_attrs] - for g in self.__graphs: + for g in self._graphs: for nd in g.nodes(): for nl in node_labels: del g.nodes[nd][nl] @@ -474,99 +474,99 @@ class Dataset(object): for ea in edge_attrs: del g.edges[ed][ea] if len(node_labels) > 0: - self.__node_labels = [nl for nl in self.__node_labels if nl not in node_labels] + self._node_labels = [nl for nl in self._node_labels if nl not in node_labels] if len(edge_labels) > 0: - self.__edge_labels = [el for el in self.__edge_labels if el not in edge_labels] + self._edge_labels = [el for el in self._edge_labels if el not in edge_labels] if len(node_attrs) > 0: - self.__node_attrs = [na for na in self.__node_attrs if na not in node_attrs] + self._node_attrs = [na for na in self._node_attrs if na not in node_attrs] if len(edge_attrs) > 0: - self.__edge_attrs = [ea for ea in self.__edge_attrs if ea not in edge_attrs] + self._edge_attrs = [ea for ea in self._edge_attrs if ea not in edge_attrs] def clean_labels(self): labels = [] - for name in self.__node_labels: + for name in self._node_labels: label = set() - for G in self.__graphs: + for G in self._graphs: label = label | set(nx.get_node_attributes(G, name).values()) if len(label) > 1: labels.append(name) break if len(label) < 2: - for G in self.__graphs: + for G in self._graphs: for nd in G.nodes(): del G.nodes[nd][name] - self.__node_labels = labels + self._node_labels = labels labels = [] - for name in self.__edge_labels: + for name in self._edge_labels: label = set() - for G in self.__graphs: + for G in self._graphs: label = label | set(nx.get_edge_attributes(G, name).values()) if len(label) > 1: labels.append(name) break if len(label) < 2: - for G in self.__graphs: + for G in self._graphs: for ed in G.edges(): del G.edges[ed][name] - self.__edge_labels = labels + self._edge_labels = labels labels = [] - for name in self.__node_attrs: + for name in self._node_attrs: label = set() - for G in self.__graphs: + for G in self._graphs: label = label | set(nx.get_node_attributes(G, name).values()) if len(label) > 1: labels.append(name) break if len(label) < 2: - for G in self.__graphs: + for G in self._graphs: for nd in G.nodes(): del G.nodes[nd][name] - self.__node_attrs = labels + self._node_attrs = labels labels = [] - for name in self.__edge_attrs: + for name in self._edge_attrs: label = set() - for G in self.__graphs: + for G in self._graphs: label = label | set(nx.get_edge_attributes(G, name).values()) if len(label) > 1: labels.append(name) break if len(label) < 2: - for G in self.__graphs: + for G in self._graphs: for ed in G.edges(): del G.edges[ed][name] - self.__edge_attrs = labels + self._edge_attrs = labels def cut_graphs(self, range_): - self.__graphs = [self.__graphs[i] for i in range_] - if self.__targets is not None: - self.__targets = [self.__targets[i] for i in range_] + self._graphs = [self._graphs[i] for i in range_] + if self._targets is not None: + self._targets = [self._targets[i] for i in range_] self.clean_labels() def trim_dataset(self, edge_required=False): if edge_required: - trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] + trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if (nx.number_of_nodes(g) != 0 and nx.number_of_edges(g) != 0)] else: - trimed_pairs = [(idx, g) for idx, g in enumerate(self.__graphs) if nx.number_of_nodes(g) != 0] + trimed_pairs = [(idx, g) for idx, g in enumerate(self._graphs) if nx.number_of_nodes(g) != 0] idx = [p[0] for p in trimed_pairs] - self.__graphs = [p[1] for p in trimed_pairs] - self.__targets = [self.__targets[i] for i in idx] + self._graphs = [p[1] for p in trimed_pairs] + self._targets = [self._targets[i] for i in idx] self.clean_labels() def copy(self): dataset = Dataset() - graphs = [g.copy() for g in self.__graphs] if self.__graphs is not None else None - target = self.__targets.copy() if self.__targets is not None else None - node_labels = self.__node_labels.copy() if self.__node_labels is not None else None - node_attrs = self.__node_attrs.copy() if self.__node_attrs is not None else None - edge_labels = self.__edge_labels.copy() if self.__edge_labels is not None else None - edge_attrs = self.__edge_attrs.copy() if self.__edge_attrs is not None else None + graphs = [g.copy() for g in self._graphs] if self._graphs is not None else None + target = self._targets.copy() if self._targets is not None else None + node_labels = self._node_labels.copy() if self._node_labels is not None else None + node_attrs = self._node_attrs.copy() if self._node_attrs is not None else None + edge_labels = self._edge_labels.copy() if self._edge_labels is not None else None + edge_attrs = self._edge_attrs.copy() if self._edge_attrs is not None else None dataset.load_graphs(graphs, target) dataset.set_labels(node_labels=node_labels, node_attrs=node_attrs, edge_labels=edge_labels, edge_attrs=edge_attrs) # @todo: clean_labels and add other class members? @@ -575,7 +575,7 @@ class Dataset(object): def get_all_node_labels(self): node_labels = [] - for g in self.__graphs: + for g in self._graphs: for n in g.nodes(): nl = tuple(g.nodes[n].items()) if nl not in node_labels: @@ -585,7 +585,7 @@ class Dataset(object): def get_all_edge_labels(self): edge_labels = [] - for g in self.__graphs: + for g in self._graphs: for e in g.edges(): el = tuple(g.edges[e].items()) if el not in edge_labels: @@ -593,93 +593,93 @@ class Dataset(object): return edge_labels - def __get_dataset_size(self): - return len(self.__graphs) + def _get_dataset_size(self): + return len(self._graphs) - def __get_all_node_nums(self): - return [nx.number_of_nodes(G) for G in self.__graphs] + def _get_all_node_nums(self): + return [nx.number_of_nodes(G) for G in self._graphs] - def __get_total_node_nums(self, all_node_nums): + def _get_total_node_nums(self, all_node_nums): return np.sum(all_node_nums) - def __get_ave_node_num(self, all_node_nums): + def _get_ave_node_num(self, all_node_nums): return np.mean(all_node_nums) - def __get_min_node_num(self, all_node_nums): + def _get_min_node_num(self, all_node_nums): return np.amin(all_node_nums) - def __get_max_node_num(self, all_node_nums): + def _get_max_node_num(self, all_node_nums): return np.amax(all_node_nums) - def __get_all_edge_nums(self): - return [nx.number_of_edges(G) for G in self.__graphs] + def _get_all_edge_nums(self): + return [nx.number_of_edges(G) for G in self._graphs] - def __get_total_edge_nums(self, all_edge_nums): + def _get_total_edge_nums(self, all_edge_nums): return np.sum(all_edge_nums) - def __get_ave_edge_num(self, all_edge_nums): + def _get_ave_edge_num(self, all_edge_nums): return np.mean(all_edge_nums) - def __get_min_edge_num(self, all_edge_nums): + def _get_min_edge_num(self, all_edge_nums): return np.amin(all_edge_nums) - def __get_max_edge_num(self, all_edge_nums): + def _get_max_edge_num(self, all_edge_nums): return np.amax(all_edge_nums) - def __get_node_label_dim(self): - return len(self.__node_labels) + def _get_node_label_dim(self): + return len(self._node_labels) - def __get_node_label_num(self, node_label): + def _get_node_label_num(self, node_label): nl = set() - for G in self.__graphs: + for G in self._graphs: nl = nl | set(nx.get_node_attributes(G, node_label).values()) return len(nl) - def __get_edge_label_dim(self): - return len(self.__edge_labels) + def _get_edge_label_dim(self): + return len(self._edge_labels) - def __get_edge_label_num(self, edge_label): + def _get_edge_label_num(self, edge_label): el = set() - for G in self.__graphs: + for G in self._graphs: el = el | set(nx.get_edge_attributes(G, edge_label).values()) return len(el) - def __is_directed(self): - return nx.is_directed(self.__graphs[0]) + def _is_directed(self): + return nx.is_directed(self._graphs[0]) - def __get_all_node_degrees(self): - return [np.mean(list(dict(G.degree()).values())) for G in self.__graphs] + def _get_all_node_degrees(self): + return [np.mean(list(dict(G.degree()).values())) for G in self._graphs] - def __get_ave_node_degree(self, all_node_degrees): + def _get_ave_node_degree(self, all_node_degrees): return np.mean(all_node_degrees) - def __get_max_node_degree(self, all_node_degrees): + def _get_max_node_degree(self, all_node_degrees): return np.amax(all_node_degrees) - def __get_min_node_degree(self, all_node_degrees): + def _get_min_node_degree(self, all_node_degrees): return np.amin(all_node_degrees) - def __get_all_fill_factors(self): + def _get_all_fill_factors(self): """Get fill factor, the number of non-zero entries in the adjacency matrix. Returns @@ -687,24 +687,24 @@ class Dataset(object): list[float] List of fill factors for all graphs. """ - return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self.__graphs] + return [nx.number_of_edges(G) / (nx.number_of_nodes(G) ** 2) for G in self._graphs] - def __get_ave_fill_factor(self, all_fill_factors): + def _get_ave_fill_factor(self, all_fill_factors): return np.mean(all_fill_factors) - def __get_max_fill_factor(self, all_fill_factors): + def _get_max_fill_factor(self, all_fill_factors): return np.amax(all_fill_factors) - def __get_min_fill_factor(self, all_fill_factors): + def _get_min_fill_factor(self, all_fill_factors): return np.amin(all_fill_factors) - def __get_substructures(self): + def _get_substructures(self): subs = set() - for G in self.__graphs: + for G in self._graphs: degrees = list(dict(G.degree()).values()) if any(i == 2 for i in degrees): subs.add('linear') @@ -713,8 +713,8 @@ class Dataset(object): if 'linear' in subs and 'non linear' in subs: break - if self.__directed: - for G in self.__graphs: + if self._directed: + for G in self._graphs: if len(list(nx.find_cycle(G))) > 0: subs.add('cyclic') break @@ -737,19 +737,19 @@ class Dataset(object): return subs - def __get_class_num(self): - return len(set(self.__targets)) + def _get_class_num(self): + return len(set(self._targets)) - def __get_node_attr_dim(self): - return len(self.__node_attrs) + def _get_node_attr_dim(self): + return len(self._node_attrs) - def __get_edge_attr_dim(self): - return len(self.__edge_attrs) + def _get_edge_attr_dim(self): + return len(self._edge_attrs) - def __compute_all_degree_entropy(self, base=None): + def _compute_all_degree_entropy(self, base=None): """Compute the entropy of degree distribution of each graph. Parameters @@ -765,7 +765,7 @@ class Dataset(object): from gklearn.utils.stats import entropy degree_entropy = [] - for g in self.__graphs: + for g in self._graphs: degrees = list(dict(g.degree()).values()) en = entropy(degrees, base=base) degree_entropy.append(en) @@ -774,32 +774,32 @@ class Dataset(object): @property def graphs(self): - return self.__graphs + return self._graphs @property def targets(self): - return self.__targets + return self._targets @property def node_labels(self): - return self.__node_labels + return self._node_labels @property def edge_labels(self): - return self.__edge_labels + return self._edge_labels @property def node_attrs(self): - return self.__node_attrs + return self._node_attrs @property def edge_attrs(self): - return self.__edge_attrs + return self._edge_attrs def split_dataset_by_target(dataset): diff --git a/gklearn/utils/graph_files.py b/gklearn/utils/graph_files.py index 7de4ba0..ea2f516 100644 --- a/gklearn/utils/graph_files.py +++ b/gklearn/utils/graph_files.py @@ -692,7 +692,7 @@ def load_from_ds(filename, filename_targets): # remove the '#'s in file names g, l_names = load_file_fun(dirname_dataset + '/' + tmp[0].replace('#', '', 1)) data.append(g) - __append_label_names(label_names, l_names) + _append_label_names(label_names, l_names) y.append(float(tmp[1])) else: # targets in a seperate file for i in range(0, len(content)): @@ -700,7 +700,7 @@ def load_from_ds(filename, filename_targets): # remove the '#'s in file names g, l_names = load_file_fun(dirname_dataset + '/' + tmp.replace('#', '', 1)) data.append(g) - __append_label_names(label_names, l_names) + _append_label_names(label_names, l_names) with open(filename_targets) as fnt: content_y = fnt.read().splitlines() @@ -745,13 +745,13 @@ def load_from_xml(filename, dir_dataset=None): mol_class = graph.attrib['class'] g, l_names = load_gxl(dir_dataset + '/' + mol_filename) data.append(g) - __append_label_names(label_names, l_names) + _append_label_names(label_names, l_names) y.append(mol_class) return data, y, label_names -def __append_label_names(label_names, new_names): +def _append_label_names(label_names, new_names): for key, val in label_names.items(): label_names[key] += [name for name in new_names[key] if name not in val] diff --git a/gklearn/utils/knn.py b/gklearn/utils/knn.py index 81419be..7585b73 100644 --- a/gklearn/utils/knn.py +++ b/gklearn/utils/knn.py @@ -73,7 +73,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t y_all = dataset.targets # compute kernel distances. - dis_mat = __compute_kernel_distances(dataset, kernel_options, trainset=trainset) + dis_mat = _compute_kernel_distances(dataset, kernel_options, trainset=trainset) rs = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=0) @@ -121,7 +121,7 @@ def knn_cv(dataset, kernel_options, trainset=None, n_neighbors=1, n_splits=50, t return results -def __compute_kernel_distances(dataset, kernel_options, trainset=None): +def _compute_kernel_distances(dataset, kernel_options, trainset=None): graph_kernel = get_graph_kernel_by_name(kernel_options['name'], node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, diff --git a/gklearn/utils/timer.py b/gklearn/utils/timer.py index b1cecec..6b12f4a 100644 --- a/gklearn/utils/timer.py +++ b/gklearn/utils/timer.py @@ -23,8 +23,8 @@ class Timer(object): time_limit_in_sec : string The time limit in seconds. """ - self.__time_limit_in_sec = time_limit_in_sec - self.__start_time = time.time() + self._time_limit_in_sec = time_limit_in_sec + self._start_time = time.time() def expired(self): @@ -34,7 +34,7 @@ class Timer(object): ------ Boolean true if the time limit has expired and false otherwise. """ - if self.__time_limit_in_sec > 0: - runtime = time.time() - self.__start_time - return runtime >= self.__time_limit_in_sec + if self._time_limit_in_sec > 0: + runtime = time.time() - self._start_time + return runtime >= self._time_limit_in_sec return False \ No newline at end of file