diff --git a/lang/fr/gklearn/kernels/path_up_to_h.py b/lang/fr/gklearn/kernels/path_up_to_h.py index d8cc387..e9869ea 100644 --- a/lang/fr/gklearn/kernels/path_up_to_h.py +++ b/lang/fr/gklearn/kernels/path_up_to_h.py @@ -28,16 +28,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__depth = int(kwargs.get('depth', 10)) - self.__k_func = kwargs.get('k_func', 'MinMax') - self.__compute_method = kwargs.get('compute_method', 'trie') - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._depth = int(kwargs.get('depth', 10)) + self._k_func = kwargs.get('k_func', 'MinMax') + self._compute_method = kwargs.get('compute_method', 'trie') + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) from itertools import combinations_with_replacement itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) @@ -50,16 +50,16 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - if self.__compute_method == 'trie': - all_paths = [self.__find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] + if self._compute_method == 'trie': + all_paths = [self._find_all_path_as_trie(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: - kernel = self.__kernel_do_trie(all_paths[i], all_paths[j]) + kernel = self._kernel_do_trie(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: - all_paths = [self.__find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] + all_paths = [self._find_all_paths_until_length(self._graphs[i]) for i in iterator_ps] for i, j in iterator_kernel: - kernel = self.__kernel_do_naive(all_paths[i], all_paths[j]) + kernel = self._kernel_do_naive(all_paths[i], all_paths[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel @@ -67,7 +67,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _compute_gm_imap_unordered(self): - self.__add_dummy_labels(self._graphs) + self._add_dummy_labels(self._graphs) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. @@ -78,9 +78,9 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: chunksize = 100 all_paths = [[] for _ in range(len(self._graphs))] - if self.__compute_method == 'trie' and self.__k_func is not None: + if self._compute_method == 'trie' and self._k_func is not None: get_ps_fun = self._wrapper_find_all_path_as_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: + elif self._compute_method != 'trie' and self._k_func is not None: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) @@ -97,12 +97,12 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - if self.__compute_method == 'trie' and self.__k_func is not None: + if self._compute_method == 'trie' and self._k_func is not None: def init_worker(trie_toshare): global G_trie G_trie = trie_toshare do_fun = self._wrapper_kernel_do_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: + elif self._compute_method != 'trie' and self._k_func is not None: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare @@ -111,7 +111,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def init_worker(plist_toshare): global G_plist G_plist = plist_toshare - do_fun = self.__wrapper_kernel_do_kernelless # @todo: what is this? + do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose) @@ -119,7 +119,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _compute_kernel_list_series(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) if self._verbose >= 2: iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) @@ -130,24 +130,24 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel_list = [None] * len(g_list) - if self.__compute_method == 'trie': - paths_g1 = self.__find_all_path_as_trie(g1) - paths_g_list = [self.__find_all_path_as_trie(g) for g in iterator_ps] + if self._compute_method == 'trie': + paths_g1 = self._find_all_path_as_trie(g1) + paths_g_list = [self._find_all_path_as_trie(g) for g in iterator_ps] for i in iterator_kernel: - kernel = self.__kernel_do_trie(paths_g1, paths_g_list[i]) + kernel = self._kernel_do_trie(paths_g1, paths_g_list[i]) kernel_list[i] = kernel else: - paths_g1 = self.__find_all_paths_until_length(g1) - paths_g_list = [self.__find_all_paths_until_length(g) for g in iterator_ps] + paths_g1 = self._find_all_paths_until_length(g1) + paths_g_list = [self._find_all_paths_until_length(g) for g in iterator_ps] for i in iterator_kernel: - kernel = self.__kernel_do_naive(paths_g1, paths_g_list[i]) + kernel = self._kernel_do_naive(paths_g1, paths_g_list[i]) kernel_list[i] = kernel return kernel_list def _compute_kernel_list_imap_unordered(self, g1, g_list): - self.__add_dummy_labels(g_list + [g1]) + self._add_dummy_labels(g_list + [g1]) # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. @@ -158,14 +158,14 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: chunksize = 100 paths_g_list = [[] for _ in range(len(g_list))] - if self.__compute_method == 'trie' and self.__k_func is not None: - paths_g1 = self.__find_all_path_as_trie(g1) + if self._compute_method == 'trie' and self._k_func is not None: + paths_g1 = self._find_all_path_as_trie(g1) get_ps_fun = self._wrapper_find_all_path_as_trie - elif self.__compute_method != 'trie' and self.__k_func is not None: - paths_g1 = self.__find_all_paths_until_length(g1) + elif self._compute_method != 'trie' and self._k_func is not None: + paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, True) else: - paths_g1 = self.__find_all_paths_until_length(g1) + paths_g1 = self._find_all_paths_until_length(g1) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) if self._verbose >= 2: iterator = tqdm(pool.imap_unordered(get_ps_fun, itr, chunksize), @@ -196,28 +196,28 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_list_do(self, itr): - if self.__compute_method == 'trie' and self.__k_func is not None: - return itr, self.__kernel_do_trie(G_p1, G_plist[itr]) - elif self.__compute_method != 'trie' and self.__k_func is not None: - return itr, self.__kernel_do_naive(G_p1, G_plist[itr]) + if self._compute_method == 'trie' and self._k_func is not None: + return itr, self._kernel_do_trie(G_p1, G_plist[itr]) + elif self._compute_method != 'trie' and self._k_func is not None: + return itr, self._kernel_do_naive(G_p1, G_plist[itr]) else: - return itr, self.__kernel_do_kernelless(G_p1, G_plist[itr]) + return itr, self._kernel_do_kernelless(G_p1, G_plist[itr]) def _compute_single_kernel_series(self, g1, g2): - self.__add_dummy_labels([g1] + [g2]) - if self.__compute_method == 'trie': - paths_g1 = self.__find_all_path_as_trie(g1) - paths_g2 = self.__find_all_path_as_trie(g2) - kernel = self.__kernel_do_trie(paths_g1, paths_g2) + self._add_dummy_labels([g1] + [g2]) + if self._compute_method == 'trie': + paths_g1 = self._find_all_path_as_trie(g1) + paths_g2 = self._find_all_path_as_trie(g2) + kernel = self._kernel_do_trie(paths_g1, paths_g2) else: - paths_g1 = self.__find_all_paths_until_length(g1) - paths_g2 = self.__find_all_paths_until_length(g2) - kernel = self.__kernel_do_naive(paths_g1, paths_g2) + paths_g1 = self._find_all_paths_until_length(g1) + paths_g2 = self._find_all_paths_until_length(g2) + kernel = self._kernel_do_naive(paths_g1, paths_g2) return kernel - def __kernel_do_trie(self, trie1, trie2): + def _kernel_do_trie(self, trie1, trie2): """Compute path graph kernels up to depth d between 2 graphs using trie. Parameters @@ -233,7 +233,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel : float Path kernel up to h between 2 graphs. """ - if self.__k_func == 'tanimoto': + if self._k_func == 'tanimoto': # traverse all paths in graph1 and search them in graph2. Deep-first # search is applied. def traverseTrie1t(root, trie2, setlist, pcurrent=[]): @@ -278,7 +278,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # print(setlist) kernel = setlist[0] / setlist[1] - elif self.__k_func == 'MinMax': # MinMax kernel + elif self._k_func == 'MinMax': # MinMax kernel # traverse all paths in graph1 and search them in graph2. Deep-first # search is applied. def traverseTrie1m(root, trie2, sumlist, pcurrent=[]): @@ -331,10 +331,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_do_trie(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_trie(G_trie[i], G_trie[j]) + return i, j, self._kernel_do_trie(G_trie[i], G_trie[j]) - def __kernel_do_naive(self, paths1, paths2): + def _kernel_do_naive(self, paths1, paths2): """Compute path graph kernels up to depth d between 2 graphs naively. Parameters @@ -355,7 +355,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None """ all_paths = list(set(paths1 + paths2)) - if self.__k_func == 'tanimoto': + if self._k_func == 'tanimoto': length_union = len(set(paths1 + paths2)) kernel = (len(set(paths1)) + len(set(paths2)) - length_union) / length_union @@ -364,7 +364,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # kernel_uv = np.dot(vector1, vector2) # kernel = kernel_uv / (len(set(paths1)) + len(set(paths2)) - kernel_uv) - elif self.__k_func == 'MinMax': # MinMax kernel + elif self._k_func == 'MinMax': # MinMax kernel path_count1 = Counter(paths1) path_count2 = Counter(paths2) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) @@ -374,7 +374,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None kernel = np.sum(np.minimum(vector1, vector2)) / \ np.sum(np.maximum(vector1, vector2)) - elif self.__k_func is None: # no sub-kernel used; compare paths directly. + elif self._k_func is None: # no sub-kernel used; compare paths directly. path_count1 = Counter(paths1) path_count2 = Counter(paths2) vector1 = [(path_count1[key] if (key in path_count1.keys()) else 0) @@ -392,10 +392,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_kernel_do_naive(self, itr): i = itr[0] j = itr[1] - return i, j, self.__kernel_do_naive(G_plist[i], G_plist[j]) + return i, j, self._kernel_do_naive(G_plist[i], G_plist[j]) - def __find_all_path_as_trie(self, G): + def _find_all_path_as_trie(self, G): # all_path = find_all_paths_until_length(G, length, ds_attrs, # node_label=node_label, # edge_label=edge_label) @@ -431,11 +431,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # them. Deep-first search is applied. Notice the reverse of each path is # also stored to the trie. def traverseGraph(root, ptrie, G, pcurrent=[]): - if len(pcurrent) < self.__depth + 1: + if len(pcurrent) < self._depth + 1: for neighbor in G[root]: if neighbor not in pcurrent: pcurrent.append(neighbor) - plstr = self.__paths2labelseqs([pcurrent], G) + plstr = self._paths2labelseqs([pcurrent], G) ptrie.insertWord(plstr[0]) traverseGraph(neighbor, ptrie, G, pcurrent) del pcurrent[-1] @@ -443,7 +443,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None ptrie = Trie() path_l = [[n] for n in G.nodes] # paths of length l - path_l_str = self.__paths2labelseqs(path_l, G) + path_l_str = self._paths2labelseqs(path_l, G) for p in path_l_str: ptrie.insertWord(p) for n in G.nodes: @@ -480,11 +480,11 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None def _wrapper_find_all_path_as_trie(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, self.__find_all_path_as_trie(g) + return i, self._find_all_path_as_trie(g) # @todo: (can be removed maybe) this method find paths repetively, it could be faster. - def __find_all_paths_until_length(self, G, tolabelseqs=True): + def _find_all_paths_until_length(self, G, tolabelseqs=True): """Find all paths no longer than a certain maximum length in a graph. A recursive depth first search is applied. @@ -511,7 +511,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None """ # path_l = [tuple([n]) for n in G.nodes] # paths of length l # all_paths = path_l[:] - # for l in range(1, self.__depth + 1): + # for l in range(1, self._depth + 1): # path_l_new = [] # for path in path_l: # for neighbor in G[path[-1]]: @@ -525,7 +525,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None path_l = [[n] for n in G.nodes] # paths of length l all_paths = [p.copy() for p in path_l] - for l in range(1, self.__depth + 1): + for l in range(1, self._depth + 1): path_lplus1 = [] for path in path_l: for neighbor in G[path[-1]]: @@ -537,7 +537,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None all_paths += path_lplus1 path_l = [p.copy() for p in path_lplus1] - # for i in range(0, self.__depth + 1): + # for i in range(0, self._depth + 1): # new_paths = find_all_paths(G, i) # if new_paths == []: # break @@ -546,36 +546,36 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # consider labels # print(paths2labelseqs(all_paths, G, ds_attrs, node_label, edge_label)) # print() - return (self.__paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) + return (self._paths2labelseqs(all_paths, G) if tolabelseqs else all_paths) def _wrapper_find_all_paths_until_length(self, tolabelseqs, itr_item): g = itr_item[0] i = itr_item[1] - return i, self.__find_all_paths_until_length(g, tolabelseqs=tolabelseqs) + return i, self._find_all_paths_until_length(g, tolabelseqs=tolabelseqs) - def __paths2labelseqs(self, plist, G): - if len(self.__node_labels) > 0: - if len(self.__edge_labels) > 0: + def _paths2labelseqs(self, plist, G): + if len(self._node_labels) > 0: + if len(self._edge_labels) > 0: path_strs = [] for path in plist: pths_tmp = [] for idx, node in enumerate(path[:-1]): - pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) - pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) - pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self.__node_labels)) + pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) + pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) + pths_tmp.append(tuple(G.nodes[path[-1]][nl] for nl in self._node_labels)) path_strs.append(tuple(pths_tmp)) else: path_strs = [] for path in plist: pths_tmp = [] for node in path: - pths_tmp.append(tuple(G.nodes[node][nl] for nl in self.__node_labels)) + pths_tmp.append(tuple(G.nodes[node][nl] for nl in self._node_labels)) path_strs.append(tuple(pths_tmp)) return path_strs else: - if len(self.__edge_labels) > 0: + if len(self._edge_labels) > 0: path_strs = [] for path in plist: if len(path) == 1: @@ -583,7 +583,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None else: pths_tmp = [] for idx, node in enumerate(path[:-1]): - pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self.__edge_labels)) + pths_tmp.append(tuple(G[node][path[idx + 1]][el] for el in self._edge_labels)) path_strs.append(tuple(pths_tmp)) return path_strs else: @@ -591,13 +591,13 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None # return [tuple([len(path)]) for path in all_paths] - def __add_dummy_labels(self, Gn): - if self.__k_func is not None: - if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + def _add_dummy_labels(self, Gn): + if self._k_func is not None: + if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__node_labels = [SpecialLabel.DUMMY] - if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + self._node_labels = [SpecialLabel.DUMMY] + if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) - self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file + self._edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file