diff --git a/lang/zh/gklearn/kernels/structural_sp.py b/lang/zh/gklearn/kernels/structural_sp.py index 4b9fb26..19322a7 100644 --- a/lang/zh/gklearn/kernels/structural_sp.py +++ b/lang/zh/gklearn/kernels/structural_sp.py @@ -18,7 +18,7 @@ from tqdm import tqdm # import networkx as nx import numpy as np from gklearn.utils.parallel import parallel_gm, parallel_me -from gklearn.utils.utils import get_shortest_paths +from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels from gklearn.kernels import GraphKernel @@ -26,15 +26,15 @@ class StructuralSP(GraphKernel): def __init__(self, **kwargs): GraphKernel.__init__(self) - self.__node_labels = kwargs.get('node_labels', []) - self.__edge_labels = kwargs.get('edge_labels', []) - self.__node_attrs = kwargs.get('node_attrs', []) - self.__edge_attrs = kwargs.get('edge_attrs', []) - self.__edge_weight = kwargs.get('edge_weight', None) - self.__node_kernels = kwargs.get('node_kernels', None) - self.__edge_kernels = kwargs.get('edge_kernels', None) - self.__compute_method = kwargs.get('compute_method', 'naive') - self.__ds_infos = kwargs.get('ds_infos', {}) + self._node_labels = kwargs.get('node_labels', []) + self._edge_labels = kwargs.get('edge_labels', []) + self._node_attrs = kwargs.get('node_attrs', []) + self._edge_attrs = kwargs.get('edge_attrs', []) + self._edge_weight = kwargs.get('edge_weight', None) + self._node_kernels = kwargs.get('node_kernels', None) + self._edge_kernels = kwargs.get('edge_kernels', None) + self._compute_method = kwargs.get('compute_method', 'naive') + self._ds_infos = kwargs.get('ds_infos', {}) def _compute_gm_series(self): @@ -44,12 +44,12 @@ class StructuralSP(GraphKernel): iterator = tqdm(self._graphs, desc='getting sp graphs', file=sys.stdout) else: iterator = self._graphs - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for g in iterator: - splist.append(self.__get_sps_as_trie(g)) + splist.append(self._get_sps_as_trie(g)) else: for g in iterator: - splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) + splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) @@ -57,17 +57,17 @@ class StructuralSP(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for i, j in iterator: - kernel = self.__ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) + kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel else: for i, j in iterator: - kernel = self.__ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) + kernel = self._ssp_do_naive(self._graphs[i], self._graphs[j], splist[i], splist[j]) # if(kernel > 1): # print("error here ") gram_matrix[i][j] = kernel @@ -86,7 +86,7 @@ class StructuralSP(GraphKernel): else: chunksize = 100 # get shortest path graphs of self._graphs - if self.__compute_method == 'trie': + if self._compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive @@ -107,8 +107,8 @@ class StructuralSP(GraphKernel): global G_spl, G_gs G_spl = spl_toshare G_gs = gs_toshare - if self.__compute_method == 'trie': - do_fun = self.__wrapper_ssp_do_trie + if self._compute_method == 'trie': + do_fun = self._wrapper_ssp_do_trie else: do_fun = self._wrapper_ssp_do_naive parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, @@ -119,32 +119,32 @@ class StructuralSP(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [] if self._verbose >= 2: iterator = tqdm(g_list, desc='getting sp graphs', file=sys.stdout) else: iterator = g_list - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for g in iterator: - splist.append(self.__get_sps_as_trie(g)) + splist.append(self._get_sps_as_trie(g)) else: for g in iterator: - splist.append(get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed'])) + splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) - if self.__compute_method == 'trie': + if self._compute_method == 'trie': for i in iterator: - kernel = self.__ssp_do_trie(g1, g_list[i], sp1, splist[i]) + kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel else: for i in iterator: - kernel = self.__ssp_do_naive(g1, g_list[i], sp1, splist[i]) + kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel return kernel_list @@ -152,7 +152,7 @@ class StructuralSP(GraphKernel): def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) splist = [None] * len(g_list) pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) @@ -161,7 +161,7 @@ class StructuralSP(GraphKernel): else: chunksize = 100 # get shortest path graphs of g_list - if self.__compute_method == 'trie': + if self._compute_method == 'trie': get_sps_fun = self._wrapper_get_sps_trie else: get_sps_fun = self._wrapper_get_sps_naive @@ -184,8 +184,8 @@ class StructuralSP(GraphKernel): G_spl = spl_toshare G_g1 = g1_toshare G_gl = gl_toshare - if self.__compute_method == 'trie': - do_fun = self.__wrapper_ssp_do_trie + if self._compute_method == 'trie': + do_fun = self._wrapper_ssp_do_trie else: do_fun = self._wrapper_kernel_list_do def func_assign(result, var_to_assign): @@ -193,42 +193,42 @@ class StructuralSP(GraphKernel): itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, - init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list def _wrapper_kernel_list_do(self, itr): - return itr, self.__ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) + return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) def _compute_single_kernel_series(self, g1, g2): - sp1 = get_shortest_paths(g1, self.__edge_weight, self.__ds_infos['directed']) - sp2 = get_shortest_paths(g2, self.__edge_weight, self.__ds_infos['directed']) - if self.__compute_method == 'trie': - kernel = self.__ssp_do_trie(g1, g2, sp1, sp2) + sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) + sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed']) + if self._compute_method == 'trie': + kernel = self._ssp_do_trie(g1, g2, sp1, sp2) else: - kernel = self.__ssp_do_naive(g1, g2, sp1, sp2) + kernel = self._ssp_do_naive(g1, g2, sp1, sp2) return kernel def _wrapper_get_sps_naive(self, itr_item): g = itr_item[0] i = itr_item[1] - return i, get_shortest_paths(g, self.__edge_weight, self.__ds_infos['directed']) + return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']) - def __ssp_do_naive(self, g1, g2, spl1, spl2): + def _ssp_do_naive(self, g1, g2, spl1, spl2): kernel = 0 # First, compute shortest path matrices, method borrowed from FCSP. - vk_dict = self.__get_all_node_kernels(g1, g2) + vk_dict = self._get_all_node_kernels(g1, g2) # Then, compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we # went though. For dense graphs, this would be slow. - ek_dict = self.__get_all_edge_kernels(g1, g2) + ek_dict = self._get_all_edge_kernels(g1, g2) # compute graph kernels if vk_dict: @@ -273,7 +273,7 @@ class StructuralSP(GraphKernel): if len(p1) == len(p2): kernel += 1 try: - kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average + kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average except ZeroDivisionError: print(spl1, spl2) print(g1.nodes(data=True)) @@ -314,60 +314,27 @@ class StructuralSP(GraphKernel): def _wrapper_ssp_do_naive(self, itr): i = itr[0] j = itr[1] - return i, j, self.__ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) + return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) - def __get_all_node_kernels(self, g1, g2): - # compute shortest path matrices, method borrowed from FCSP. - vk_dict = {} # shortest path matrices dict - if len(self.__node_labels) > 0: - # node symb and non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['mix'] - for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] - vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) - # node symb labeled - else: - kn = self.__node_kernels['symb'] - for n1 in g1.nodes(data=True): - for n2 in g2.nodes(data=True): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] - vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) - else: - # node non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['nsymb'] - for n1 in g1.nodes(data=True): - for n2 in g2.nodes(data=True): - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] - vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) - # node unlabeled - else: - pass - - return vk_dict + def _get_all_node_kernels(self, g1, g2): + return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) - def __get_all_edge_kernels(self, g1, g2): + def _get_all_edge_kernels(self, g1, g2): # compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we # went though. For dense graphs, this would be slow. ek_dict = {} # dict of edge kernels - if len(self.__edge_labels) > 0: + if len(self._edge_labels) > 0: # edge symb and non-synb labeled - if len(self.__edge_attrs) > 0: - ke = self.__edge_kernels['mix'] + if len(self._edge_attrs) > 0: + ke = self._edge_kernels['mix'] for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): - e1_labels = [e1[2][el] for el in self.__edge_labels] - e2_labels = [e2[2][el] for el in self.__edge_labels] - e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] - e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] + e1_labels = [e1[2][el] for el in self._edge_labels] + e2_labels = [e2[2][el] for el in self._edge_labels] + e1_attrs = [e1[2][ea] for ea in self._edge_attrs] + e2_attrs = [e2[2][ea] for ea in self._edge_attrs] ek_temp = ke(e1_labels, e2_labels, e1_attrs, e2_attrs) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp @@ -375,11 +342,11 @@ class StructuralSP(GraphKernel): ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp # edge symb labeled else: - ke = self.__edge_kernels['symb'] + ke = self._edge_kernels['symb'] for e1 in g1.edges(data=True): for e2 in g2.edges(data=True): - e1_labels = [e1[2][el] for el in self.__edge_labels] - e2_labels = [e2[2][el] for el in self.__edge_labels] + e1_labels = [e1[2][el] for el in self._edge_labels] + e2_labels = [e2[2][el] for el in self._edge_labels] ek_temp = ke(e1_labels, e2_labels) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp @@ -387,12 +354,12 @@ class StructuralSP(GraphKernel): ek_dict[((e1[1], e1[0]), (e2[1], e2[0]))] = ek_temp else: # edge non-synb labeled - if len(self.__edge_attrs) > 0: - ke = self.__edge_kernels['nsymb'] + if len(self._edge_attrs) > 0: + ke = self._edge_kernels['nsymb'] for e1 in g1.edges(data=True): for e2 in g2.edges(data=True): - e1_attrs = [e1[2][ea] for ea in self.__edge_attrs] - e2_attrs = [e2[2][ea] for ea in self.__edge_attrs] + e1_attrs = [e1[2][ea] for ea in self._edge_attrs] + e2_attrs = [e2[2][ea] for ea in self._edge_attrs] ek_temp = ke(e1_attrs, e2_attrs) ek_dict[((e1[0], e1[1]), (e2[0], e2[1]))] = ek_temp ek_dict[((e1[1], e1[0]), (e2[0], e2[1]))] = ek_temp