From 320964dd169b6b7155e42824bca8c99aa6147983 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Thu, 15 Oct 2020 16:13:10 +0200 Subject: [PATCH] Update comments, minor bugs for graph kernels. --- gklearn/kernels/commonWalkKernel.py | 24 ++++----- gklearn/kernels/common_walk.py | 14 ++--- gklearn/kernels/graph_kernel.py | 4 +- gklearn/kernels/marginalized.py | 22 ++++---- gklearn/kernels/marginalizedKernel.py | 32 ++++++------ gklearn/kernels/path_up_to_h.py | 16 +++--- gklearn/kernels/randomWalkKernel.py | 86 +++++++++++++++---------------- gklearn/kernels/shortest_path.py | 6 +-- gklearn/kernels/spKernel.py | 10 ++-- gklearn/kernels/structural_sp.py | 45 +++------------- gklearn/kernels/structuralspKernel.py | 26 +++++----- gklearn/kernels/treelet.py | 16 +++--- gklearn/kernels/treeletKernel.py | 16 +++--- gklearn/kernels/untilHPathKernel.py | 34 ++++++------ gklearn/kernels/weisfeilerLehmanKernel.py | 34 ++++++------ gklearn/kernels/weisfeiler_lehman.py | 26 +++++----- gklearn/utils/parallel.py | 2 +- 17 files changed, 190 insertions(+), 223 deletions(-) diff --git a/gklearn/kernels/commonWalkKernel.py b/gklearn/kernels/commonWalkKernel.py index a5f9cb1..fb6bd10 100644 --- a/gklearn/kernels/commonWalkKernel.py +++ b/gklearn/kernels/commonWalkKernel.py @@ -30,15 +30,15 @@ def commonwalkkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate common walk graph kernels between graphs. + """Compute common walk graph kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as symbolic label. The default node label is 'atom'. edge_label : string @@ -133,7 +133,7 @@ def commonwalkkernel(*args, # # for i, j, kernel in tqdm( # pool.imap_unordered(do_partial, itr, chunksize), -# desc='calculating kernels', +# desc='computing kernels', # file=sys.stdout): # Kmatrix[i][j] = kernel # Kmatrix[j][i] = kernel @@ -145,14 +145,14 @@ def commonwalkkernel(*args, # # direct product graph method - exponential # itr = combinations_with_replacement(range(0, len(Gn)), 2) # if compute_method == 'exp': -# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): +# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): # Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, # edge_label, weight) # Kmatrix[j][i] = Kmatrix[i][j] # # # direct product graph method - geometric # elif compute_method == 'geo': -# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): +# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): # Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, # edge_label, weight) # Kmatrix[j][i] = Kmatrix[i][j] @@ -161,7 +161,7 @@ def commonwalkkernel(*args, # # search all paths use brute force. # elif compute_method == 'brute': # n = int(n) -# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset. +# # get all paths of all graphs before computing kernels to save time, but this may cost a lot of memory for large dataset. # all_walks = [ # find_all_walks_until_length(Gn[i], n, node_label, edge_label) # for i in range(0, len(Gn)) @@ -185,13 +185,13 @@ def commonwalkkernel(*args, def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): - """Calculate walk graph kernels up to n between 2 graphs using exponential + """Compute walk graph kernels up to n between 2 graphs using exponential series. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string Node attribute used as label. edge_label : string @@ -259,13 +259,13 @@ def wrapper_cw_exp(node_label, edge_label, beta, itr): def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): - """Calculate common walk graph kernels up to n between 2 graphs using + """Compute common walk graph kernels up to n between 2 graphs using geometric series. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string Node attribute used as label. edge_label : string @@ -304,7 +304,7 @@ def _commonwalkkernel_brute(walks1, node_label='atom', edge_label='bond_type', labeled=True): - """Calculate walk graph kernels up to n between 2 graphs. + """Compute walk graph kernels up to n between 2 graphs. Parameters ---------- diff --git a/gklearn/kernels/common_walk.py b/gklearn/kernels/common_walk.py index 0aeb3ee..6372200 100644 --- a/gklearn/kernels/common_walk.py +++ b/gklearn/kernels/common_walk.py @@ -46,7 +46,7 @@ class CommonWalk(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr @@ -102,7 +102,7 @@ class CommonWalk(GraphKernel): # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) @@ -148,7 +148,7 @@ class CommonWalk(GraphKernel): len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', - n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list @@ -179,13 +179,13 @@ class CommonWalk(GraphKernel): def __kernel_do_exp(self, g1, g2, beta): - """Calculate common walk graph kernel between 2 graphs using exponential + """Compute common walk graph kernel between 2 graphs using exponential series. Parameters ---------- g1, g2 : NetworkX graphs - Graphs between which the kernels are calculated. + Graphs between which the kernels are computed. beta : integer Weight. @@ -231,13 +231,13 @@ class CommonWalk(GraphKernel): def __kernel_do_geo(self, g1, g2, gamma): - """Calculate common walk graph kernel between 2 graphs using geometric + """Compute common walk graph kernel between 2 graphs using geometric series. Parameters ---------- g1, g2 : NetworkX graphs - Graphs between which the kernels are calculated. + Graphs between which the kernels are computed. gamma : integer Weight. diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py index 7c6afde..a8dbd32 100644 --- a/gklearn/kernels/graph_kernel.py +++ b/gklearn/kernels/graph_kernel.py @@ -104,7 +104,7 @@ class GraphKernel(object): if self._parallel == 'imap_unordered': gram_matrix = self._compute_gm_imap_unordered() - elif self._parallel == None: + elif self._parallel is None: gram_matrix = self._compute_gm_series() else: raise Exception('Parallel mode is not set correctly.') @@ -130,7 +130,7 @@ class GraphKernel(object): if self._parallel == 'imap_unordered': kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) - elif self._parallel == None: + elif self._parallel is None: kernel_list = self._compute_kernel_list_series(g1, g_list) else: raise Exception('Parallel mode is not set correctly.') diff --git a/gklearn/kernels/marginalized.py b/gklearn/kernels/marginalized.py index 6910468..499d51b 100644 --- a/gklearn/kernels/marginalized.py +++ b/gklearn/kernels/marginalized.py @@ -59,7 +59,7 @@ class Marginalized(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr for i, j in iterator: @@ -119,7 +119,7 @@ class Marginalized(GraphKernel): # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) for i in iterator: @@ -165,7 +165,7 @@ class Marginalized(GraphKernel): len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', - n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list @@ -184,12 +184,12 @@ class Marginalized(GraphKernel): def __kernel_do(self, g1, g2): - """Calculate marginalized graph kernel between 2 graphs. + """Compute marginalized graph kernel between 2 graphs. Parameters ---------- g1, g2 : NetworkX graphs - 2 graphs between which the kernel is calculated. + 2 graphs between which the kernel is computed. Return ------ @@ -212,12 +212,12 @@ class Marginalized(GraphKernel): # # matrix to save all the R_inf for all pairs of nodes # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # - # # calculate R_inf with a simple interative method + # # Compute R_inf with a simple interative method # for i in range(1, n_iteration): # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf_new.fill(r1) # - # # calculate R_inf for each pair of nodes + # # Compute R_inf for each pair of nodes # for node1 in g1.nodes(data=True): # neighbor_n1 = g1[node1[0]] # # the transition probability distribution in the random walks @@ -243,7 +243,7 @@ class Marginalized(GraphKernel): # neighbor2] # ref [1] equation (8) # R_inf[:] = R_inf_new # - # # add elements of R_inf up and calculate kernel + # # add elements of R_inf up and compute kernel # for node1 in g1.nodes(data=True): # for node2 in g2.nodes(data=True): # s = p_init_G1 * p_init_G2 * deltakernel( @@ -288,11 +288,11 @@ class Marginalized(GraphKernel): deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) - # calculate R_inf with a simple interative method + # Compute R_inf with a simple interative method for i in range(2, self.__n_iteration + 1): R_inf_old = R_inf.copy() - # calculate R_inf for each pair of nodes + # Compute R_inf for each pair of nodes for node1 in g1.nodes(): neighbor_n1 = g1[node1] # the transition probability distribution in the random walks @@ -309,7 +309,7 @@ class Marginalized(GraphKernel): (t_dict[(node1, node2, neighbor1, neighbor2)] * \ R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) - # add elements of R_inf up and calculate kernel + # add elements of R_inf up and compute kernel. for (n1, n2), value in R_inf.items(): s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) kernel += s * value # ref [1] equation (6) diff --git a/gklearn/kernels/marginalizedKernel.py b/gklearn/kernels/marginalizedKernel.py index 950f1a6..b6d7fb0 100644 --- a/gklearn/kernels/marginalizedKernel.py +++ b/gklearn/kernels/marginalizedKernel.py @@ -39,15 +39,15 @@ def marginalizedkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate marginalized graph kernels between graphs. + """Compute marginalized graph kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as symbolic label. The default node label is 'atom'. @@ -59,7 +59,7 @@ def marginalizedkernel(*args, The termination probability in the random walks generating step. n_iteration : integer - Time of iterations to calculate R_inf. + Time of iterations to compute R_inf. remove_totters : boolean Whether to remove totterings by method introduced in [2]. The default @@ -83,11 +83,11 @@ def marginalizedkernel(*args, Gn, attr_names=['node_labeled', 'edge_labeled', 'is_directed'], node_label=node_label, edge_label=edge_label) - if not ds_attrs['node_labeled'] or node_label == None: + if not ds_attrs['node_labeled'] or node_label is None: node_label = 'atom' for G in Gn: nx.set_node_attributes(G, '0', 'atom') - if not ds_attrs['edge_labeled'] or edge_label == None: + if not ds_attrs['edge_labeled'] or edge_label is None: edge_label = 'bond_type' for G in Gn: nx.set_edge_attributes(G, '0', 'bond_type') @@ -133,7 +133,7 @@ def marginalizedkernel(*args, # # ---- direct running, normally use single CPU core. ---- ## pbar = tqdm( ## total=(1 + len(Gn)) * len(Gn) / 2, -## desc='calculating kernels', +## desc='Computing kernels', ## file=sys.stdout) # for i in range(0, len(Gn)): # for j in range(i, len(Gn)): @@ -152,12 +152,12 @@ def marginalizedkernel(*args, def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): - """Calculate marginalized graph kernel between 2 graphs. + """Compute marginalized graph kernel between 2 graphs. Parameters ---------- G1, G2 : NetworkX graphs - 2 graphs between which the kernel is calculated. + 2 graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -165,7 +165,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): p_quit : integer the termination probability in the random walks generating step. n_iteration : integer - time of iterations to calculate R_inf. + time of iterations to compute R_inf. Return ------ @@ -188,12 +188,12 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): # # matrix to save all the R_inf for all pairs of nodes # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) # -# # calculate R_inf with a simple interative method +# # Compute R_inf with a simple interative method # for i in range(1, n_iteration): # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) # R_inf_new.fill(r1) # -# # calculate R_inf for each pair of nodes +# # Compute R_inf for each pair of nodes # for node1 in g1.nodes(data=True): # neighbor_n1 = g1[node1[0]] # # the transition probability distribution in the random walks @@ -219,7 +219,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): # neighbor2] # ref [1] equation (8) # R_inf[:] = R_inf_new # -# # add elements of R_inf up and calculate kernel +# # add elements of R_inf up and compute kernel. # for node1 in g1.nodes(data=True): # for node2 in g2.nodes(data=True): # s = p_init_G1 * p_init_G2 * deltakernel( @@ -267,11 +267,11 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): neighbor_n1[neighbor1][edge_label], neighbor_n2[neighbor2][edge_label]) - # calculate R_inf with a simple interative method + # Compute R_inf with a simple interative method for i in range(2, n_iteration + 1): R_inf_old = R_inf.copy() - # calculate R_inf for each pair of nodes + # Compute R_inf for each pair of nodes for node1 in g1.nodes(): neighbor_n1 = g1[node1] # the transition probability distribution in the random walks @@ -288,7 +288,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): (t_dict[(node1, node2, neighbor1, neighbor2)] * \ R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) - # add elements of R_inf up and calculate kernel + # add elements of R_inf up and compute kernel. for (n1, n2), value in R_inf.items(): s = p_init_G1 * p_init_G2 * deltakernel( g1.nodes[n1][node_label], g2.nodes[n2][node_label]) diff --git a/gklearn/kernels/path_up_to_h.py b/gklearn/kernels/path_up_to_h.py index 1c8b5e2..d8cc387 100644 --- a/gklearn/kernels/path_up_to_h.py +++ b/gklearn/kernels/path_up_to_h.py @@ -24,7 +24,7 @@ from gklearn.kernels import GraphKernel from gklearn.utils import Trie -class PathUpToH(GraphKernel): # @todo: add function for k_func == None +class PathUpToH(GraphKernel): # @todo: add function for k_func is None def __init__(self, **kwargs): GraphKernel.__init__(self) @@ -43,7 +43,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) - iterator_kernel = tqdm(itr_kernel, desc='calculating kernels', file=sys.stdout) + iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout) else: iterator_ps = range(0, len(self._graphs)) iterator_kernel = itr_kernel @@ -69,7 +69,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None def _compute_gm_imap_unordered(self): self.__add_dummy_labels(self._graphs) - # get all paths of all graphs before calculating kernels to save time, + # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(self._n_jobs) itr = zip(self._graphs, range(0, len(self._graphs))) @@ -123,7 +123,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None if self._verbose >= 2: iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) - iterator_kernel = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator_ps = g_list iterator_kernel = range(len(g_list)) @@ -149,7 +149,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None def _compute_kernel_list_imap_unordered(self, g1, g_list): self.__add_dummy_labels(g_list + [g1]) - # get all paths of all graphs before calculating kernels to save time, + # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(self._n_jobs) itr = zip(g_list, range(0, len(g_list))) @@ -190,7 +190,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, - init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list @@ -218,7 +218,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None def __kernel_do_trie(self, trie1, trie2): - """Calculate path graph kernels up to depth d between 2 graphs using trie. + """Compute path graph kernels up to depth d between 2 graphs using trie. Parameters ---------- @@ -335,7 +335,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None def __kernel_do_naive(self, paths1, paths2): - """Calculate path graph kernels up to depth d between 2 graphs naively. + """Compute path graph kernels up to depth d between 2 graphs naively. Parameters ---------- diff --git a/gklearn/kernels/randomWalkKernel.py b/gklearn/kernels/randomWalkKernel.py index 346bc98..65bf63c 100644 --- a/gklearn/kernels/randomWalkKernel.py +++ b/gklearn/kernels/randomWalkKernel.py @@ -37,15 +37,15 @@ def randomwalkkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate random walk graph kernels. + """Compute random walk graph kernels. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. compute_method : string Method used to compute kernel. The Following choices are @@ -125,7 +125,7 @@ def randomwalkkernel(*args, Gn = [g.copy() for g in Gn] eweight = None - if edge_weight == None: + if edge_weight is None: if verbose: print('\n None edge weight specified. Set all weight to 1.\n') else: @@ -212,12 +212,12 @@ def randomwalkkernel(*args, ############################################################################### def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True): - """Calculate walk graph kernels up to n between 2 graphs using Sylvester method. + """Compute walk graph kernels up to n between 2 graphs using Sylvester method. Parameters ---------- G1, G2 : NetworkX graph - Graphs between which the kernel is calculated. + Graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -230,7 +230,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True """ Kmatrix = np.zeros((len(Gn), len(Gn))) - if q == None: + if q is None: # don't normalize adjacency matrices if q is a uniform vector. Note # A_wave_list actually contains the transposes of the adjacency matrices. A_wave_list = [ @@ -245,7 +245,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True # norm = A_tilde.sum(axis=0) # norm[norm == 0] = 1 # A_wave_list.append(A_tilde / norm) - if p == None: # p is uniform distribution as default. + if p is None: # p is uniform distribution as default. def init_worker(Awl_toshare): global G_Awl G_Awl = Awl_toshare @@ -255,7 +255,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, -# desc='calculating kernels', +# desc='Computing kernels', # file=sys.stdout) # for i in range(0, len(Gn)): # for j in range(i, len(Gn)): @@ -300,12 +300,12 @@ def _se_do(A_wave1, A_wave2, lmda): ############################################################################### def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): - """Calculate walk graph kernels up to n between 2 graphs using conjugate method. + """Compute walk graph kernels up to n between 2 graphs using conjugate method. Parameters ---------- G1, G2 : NetworkX graph - Graphs between which the kernel is calculated. + Graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -321,14 +321,14 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: # # this is faster from unlabeled graphs. @todo: why? -# if q == None: +# if q is None: # # don't normalize adjacency matrices if q is a uniform vector. Note # # A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list = [ # nx.adjacency_matrix(G, eweight).todense().transpose() for G in # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) # ] -# if p == None: # p is uniform distribution as default. +# if p is None: # p is uniform distribution as default. # def init_worker(Awl_toshare): # global G_Awl # G_Awl = Awl_toshare @@ -336,23 +336,23 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, # glbv=(A_wave_list,), n_jobs=n_jobs) # else: - # reindex nodes using consecutive integers for convenience of kernel calculation. + # reindex nodes using consecutive integers for convenience of kernel computation. Gn = [nx.convert_node_labels_to_integers( g, first_label=0, label_attribute='label_orignal') for g in (tqdm( Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] - if p == None and q == None: # p and q are uniform distributions as default. + if p is None and q is None: # p and q are uniform distributions as default. def init_worker(gn_toshare): global G_gn G_gn = gn_toshare - do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, + do_partial = partial(wrapper_cg_labeled_do, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, -# desc='calculating kernels', +# desc='Computing kernels', # file=sys.stdout) # for i in range(0, len(Gn)): # for j in range(i, len(Gn)): @@ -382,24 +382,24 @@ def _cg_unlabled_do(A_wave1, A_wave2, lmda): return np.dot(q_times, x) -def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, +def wrapper_cg_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda, itr): i = itr[0] j = itr[1] - return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, + return i, j, _cg_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) -def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, +def _cg_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda): - # Frist, compute kernels between all pairs of nodes, method borrowed + # Frist, compute kernels between all pairs of nodes using the method borrowed # from FCSP. It is faster than directly computing all edge kernels # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the # graphs compared, which is the most case we went though. For very # sparse graphs, this would be slow. vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) - # Compute weight matrix of the direct product graph. + # Compute the weight matrix of the direct product graph. w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label) # use uniform distribution if there is no prior knowledge. @@ -415,12 +415,12 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, ############################################################################### def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): - """Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. + """Compute walk graph kernels up to n between 2 graphs using Fixed-Point method. Parameters ---------- G1, G2 : NetworkX graph - Graphs between which the kernel is calculated. + Graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -438,17 +438,17 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: # # this is faster from unlabeled graphs. @todo: why? -# if q == None: +# if q is None: # # don't normalize adjacency matrices if q is a uniform vector. Note # # A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list = [ # nx.adjacency_matrix(G, eweight).todense().transpose() for G in # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) # ] -# if p == None: # p is uniform distribution as default. +# if p is None: # p is uniform distribution as default. # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, -# desc='calculating kernels', +# desc='Computing kernels', # file=sys.stdout) # for i in range(0, len(Gn)): # for j in range(i, len(Gn)): @@ -464,33 +464,33 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, # Kmatrix[j][i] = Kmatrix[i][j] # pbar.update(1) # else: - # reindex nodes using consecutive integers for convenience of kernel calculation. + # reindex nodes using consecutive integers for the convenience of kernel computation. Gn = [nx.convert_node_labels_to_integers( g, first_label=0, label_attribute='label_orignal') for g in (tqdm( Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] - if p == None and q == None: # p and q are uniform distributions as default. + if p is None and q is None: # p and q are uniform distributions as default. def init_worker(gn_toshare): global G_gn G_gn = gn_toshare - do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, + do_partial = partial(wrapper_fp_labeled_do, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) return Kmatrix -def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, +def wrapper_fp_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda, itr): i = itr[0] j = itr[1] - return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, + return i, j, _fp_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda) -def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, +def _fp_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, edge_kernels, edge_label, lmda): - # Frist, compute kernels between all pairs of nodes, method borrowed + # Frist, compute kernels between all pairs of nodes using the method borrowed # from FCSP. It is faster than directly computing all edge kernels # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the # graphs compared, which is the most case we went though. For very @@ -519,13 +519,13 @@ def func_fp(x, p_times, lmda, w_times): ############################################################################### def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True): - """Calculate walk graph kernels up to n between 2 unlabeled graphs using + """Compute walk graph kernels up to n between 2 unlabeled graphs using spectral decomposition method. Labels will be ignored. Parameters ---------- G1, G2 : NetworkX graph - Graphs between which the kernel is calculated. + Graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -538,7 +538,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk """ Kmatrix = np.zeros((len(Gn), len(Gn))) - if q == None: + if q is None: # precompute the spectral decomposition of each graph. P_list = [] D_list = [] @@ -552,7 +552,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk P_list.append(ev) # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? - if p == None: # p is uniform distribution as default. + if p is None: # p is uniform distribution as default. q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] # q_T_list = [q.T for q in q_list] def init_worker(q_T_toshare, P_toshare, D_toshare): @@ -568,7 +568,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk # pbar = tqdm( # total=(1 + len(Gn)) * len(Gn) / 2, -# desc='calculating kernels', +# desc='Computing kernels', # file=sys.stdout) # for i in range(0, len(Gn)): # for j in range(i, len(Gn)): @@ -605,12 +605,12 @@ def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): ############################################################################### def _randomwalkkernel_kron(G1, G2, node_label, edge_label): - """Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. + """Compute walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. Parameters ---------- G1, G2 : NetworkX graph - Graphs between which the kernel is calculated. + Graphs between which the kernel is computed. node_label : string node attribute used as label. edge_label : string @@ -692,8 +692,8 @@ def computeVK(g1, g2, ds_attrs, node_kernels, node_label): def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): - '''Compute weight matrix of the direct product graph. - ''' + """Compute the weight matrix of the direct product graph. + """ w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) w_times = np.zeros((w_dim, w_dim)) if vk_dict: # node labeled diff --git a/gklearn/kernels/shortest_path.py b/gklearn/kernels/shortest_path.py index 1923b00..b068e6e 100644 --- a/gklearn/kernels/shortest_path.py +++ b/gklearn/kernels/shortest_path.py @@ -47,7 +47,7 @@ class ShortestPath(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr for i, j in iterator: @@ -102,7 +102,7 @@ class ShortestPath(GraphKernel): # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) for i in iterator: @@ -145,7 +145,7 @@ class ShortestPath(GraphKernel): itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, - init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list diff --git a/gklearn/kernels/spKernel.py b/gklearn/kernels/spKernel.py index b48a905..eaf59df 100644 --- a/gklearn/kernels/spKernel.py +++ b/gklearn/kernels/spKernel.py @@ -29,15 +29,15 @@ def spkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate shortest-path kernels between graphs. + """Compute shortest-path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as label. The default node label is atom. @@ -179,7 +179,7 @@ def spkernel(*args, # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) # itr = combinations_with_replacement(range(0, len(Gn)), 2) # for i, j, kernel in tqdm( - # pool.map(do_partial, itr), desc='calculating kernels', + # pool.map(do_partial, itr), desc='Computing kernels', # file=sys.stdout): # Kmatrix[i][j] = kernel # Kmatrix[j][i] = kernel @@ -202,7 +202,7 @@ def spkernel(*args, # # ---- direct running, normally use single CPU core. ---- # from itertools import combinations_with_replacement # itr = combinations_with_replacement(range(0, len(Gn)), 2) -# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): +# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): # kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels) # Kmatrix[i][j] = kernel # Kmatrix[j][i] = kernel diff --git a/gklearn/kernels/structural_sp.py b/gklearn/kernels/structural_sp.py index 4b9fb26..254f2cc 100644 --- a/gklearn/kernels/structural_sp.py +++ b/gklearn/kernels/structural_sp.py @@ -18,7 +18,7 @@ from tqdm import tqdm # import networkx as nx import numpy as np from gklearn.utils.parallel import parallel_gm, parallel_me -from gklearn.utils.utils import get_shortest_paths +from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels from gklearn.kernels import GraphKernel @@ -57,7 +57,7 @@ class StructuralSP(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr if self.__compute_method == 'trie': @@ -135,7 +135,7 @@ class StructuralSP(GraphKernel): # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) if self.__compute_method == 'trie': @@ -193,7 +193,7 @@ class StructuralSP(GraphKernel): itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, - init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list @@ -273,7 +273,7 @@ class StructuralSP(GraphKernel): if len(p1) == len(p2): kernel += 1 try: - kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average + kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average except ZeroDivisionError: print(spl1, spl2) print(g1.nodes(data=True)) @@ -318,40 +318,7 @@ class StructuralSP(GraphKernel): def __get_all_node_kernels(self, g1, g2): - # compute shortest path matrices, method borrowed from FCSP. - vk_dict = {} # shortest path matrices dict - if len(self.__node_labels) > 0: - # node symb and non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['mix'] - for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] - vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) - # node symb labeled - else: - kn = self.__node_kernels['symb'] - for n1 in g1.nodes(data=True): - for n2 in g2.nodes(data=True): - n1_labels = [n1[1][nl] for nl in self.__node_labels] - n2_labels = [n2[1][nl] for nl in self.__node_labels] - vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) - else: - # node non-synb labeled - if len(self.__node_attrs) > 0: - kn = self.__node_kernels['nsymb'] - for n1 in g1.nodes(data=True): - for n2 in g2.nodes(data=True): - n1_attrs = [n1[1][na] for na in self.__node_attrs] - n2_attrs = [n2[1][na] for na in self.__node_attrs] - vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) - # node unlabeled - else: - pass - - return vk_dict + return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) def __get_all_edge_kernels(self, g1, g2): diff --git a/gklearn/kernels/structuralspKernel.py b/gklearn/kernels/structuralspKernel.py index fb8dbf9..cfafc8c 100644 --- a/gklearn/kernels/structuralspKernel.py +++ b/gklearn/kernels/structuralspKernel.py @@ -37,15 +37,15 @@ def structuralspkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate mean average structural shortest path kernels between graphs. + """Compute mean average structural shortest path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as label. The default node label is atom. @@ -215,7 +215,7 @@ def structuralspkernel(*args, from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(Gn)), 2) if verbose: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr if compute_method == 'trie': @@ -241,7 +241,7 @@ def structuralspkernel(*args, # combinations_with_replacement(splist, 2), # combinations_with_replacement(range(0, len(Gn)), 2)) # for i, j, kernel in tqdm( -# pool.map(do_partial, itr), desc='calculating kernels', +# pool.map(do_partial, itr), desc='Computing kernels', # file=sys.stdout): # Kmatrix[i][j] = kernel # Kmatrix[j][i] = kernel @@ -263,7 +263,7 @@ def structuralspkernel(*args, # with closing(Pool(n_jobs)) as pool: # for i, j, kernel in tqdm( # pool.imap_unordered(do_partial, itr, 1000), -# desc='calculating kernels', +# desc='Computing kernels', # file=sys.stdout): # Kmatrix[i][j] = kernel # Kmatrix[j][i] = kernel @@ -335,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, if len(p1) == len(p2): kernel += 1 try: - kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average + kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average except ZeroDivisionError: print(spl1, spl2) print(g1.nodes(data=True)) @@ -429,7 +429,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, # # compute graph kernels # traverseBothTrie(trie1[0].root, trie2[0], kernel) # -# kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average +# kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average # # traverse all paths in graph1. Deep-first search is applied. # def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]): @@ -485,7 +485,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, else: traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict) - kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average + kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average return kernel @@ -781,9 +781,9 @@ def get_shortest_paths(G, weight, directed): Parameters ---------- G : NetworkX graphs - The graphs whose paths are calculated. + The graphs whose paths are computed. weight : string/None - edge attribute used as weight to calculate the shortest path. + edge attribute used as weight to compute the shortest path. directed: boolean Whether graph is directed. @@ -822,9 +822,9 @@ def get_sps_as_trie(G, weight, directed): Parameters ---------- G : NetworkX graphs - The graphs whose paths are calculated. + The graphs whose paths are computed. weight : string/None - edge attribute used as weight to calculate the shortest path. + edge attribute used as weight to compute the shortest path. directed: boolean Whether graph is directed. diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py index c3204ec..61ffd47 100644 --- a/gklearn/kernels/treelet.py +++ b/gklearn/kernels/treelet.py @@ -39,7 +39,7 @@ class Treelet(GraphKernel): def _compute_gm_series(self): self.__add_dummy_labels(self._graphs) - # get all canonical keys of all graphs before calculating kernels to save + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys = [] if self._verbose >= 2: @@ -55,7 +55,7 @@ class Treelet(GraphKernel): from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: - iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) else: iterator = itr for i, j in iterator: @@ -69,7 +69,7 @@ class Treelet(GraphKernel): def _compute_gm_imap_unordered(self): self.__add_dummy_labels(self._graphs) - # get all canonical keys of all graphs before calculating kernels to save + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. pool = Pool(self._n_jobs) itr = zip(self._graphs, range(0, len(self._graphs))) @@ -105,7 +105,7 @@ class Treelet(GraphKernel): def _compute_kernel_list_series(self, g1, g_list): self.__add_dummy_labels(g_list + [g1]) - # get all canonical keys of all graphs before calculating kernels to save + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys_1 = self.__get_canonkeys(g1) canonkeys_list = [] @@ -119,7 +119,7 @@ class Treelet(GraphKernel): # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: - iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) else: iterator = range(len(g_list)) for i in iterator: @@ -132,7 +132,7 @@ class Treelet(GraphKernel): def _compute_kernel_list_imap_unordered(self, g1, g_list): self.__add_dummy_labels(g_list + [g1]) - # get all canonical keys of all graphs before calculating kernels to save + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys_1 = self.__get_canonkeys(g1) canonkeys_list = [[] for _ in range(len(g_list))] @@ -167,7 +167,7 @@ class Treelet(GraphKernel): len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', - n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) return kernel_list @@ -185,7 +185,7 @@ class Treelet(GraphKernel): def __kernel_do(self, canonkey1, canonkey2): - """Calculate treelet graph kernel between 2 graphs. + """Compute treelet graph kernel between 2 graphs. Parameters ---------- diff --git a/gklearn/kernels/treeletKernel.py b/gklearn/kernels/treeletKernel.py index 809a623..14577ff 100644 --- a/gklearn/kernels/treeletKernel.py +++ b/gklearn/kernels/treeletKernel.py @@ -29,15 +29,15 @@ def treeletkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate treelet graph kernels between graphs. + """Compute treelet graph kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. sub_kernel : function The sub-kernel between 2 real number vectors. Each vector counts the @@ -89,7 +89,7 @@ def treeletkernel(*args, # ---- use pool.imap_unordered to parallel and track progress. ---- if parallel == 'imap_unordered': - # get all canonical keys of all graphs before calculating kernels to save + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. pool = Pool(n_jobs) itr = zip(Gn, range(0, len(Gn))) @@ -120,8 +120,8 @@ def treeletkernel(*args, glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) # ---- do not use parallelization. ---- - elif parallel == None: - # get all canonical keys of all graphs before calculating kernels to save + elif parallel is None: + # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys = [] for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): @@ -148,7 +148,7 @@ def treeletkernel(*args, def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): - """Calculate treelet graph kernel between 2 graphs. + """Compute treelet graph kernel between 2 graphs. Parameters ---------- @@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): # n-star patterns patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] - patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] + patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop. patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] # n-star patterns canonkey['6'] = len(patterns['3star']) diff --git a/gklearn/kernels/untilHPathKernel.py b/gklearn/kernels/untilHPathKernel.py index 9bac28b..62c8626 100644 --- a/gklearn/kernels/untilHPathKernel.py +++ b/gklearn/kernels/untilHPathKernel.py @@ -34,15 +34,15 @@ def untilhpathkernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate path graph kernels up to depth/hight h between graphs. + """Compute path graph kernels up to depth/hight h between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as label. The default node label is atom. @@ -91,7 +91,7 @@ def untilhpathkernel(*args, attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', 'edge_attr_dim', 'is_directed'], node_label=node_label, edge_label=edge_label) - if k_func != None: + if k_func is not None: if not ds_attrs['node_labeled']: for G in Gn: nx.set_node_attributes(G, '0', 'atom') @@ -103,7 +103,7 @@ def untilhpathkernel(*args, if parallel == 'imap_unordered': # ---- use pool.imap_unordered to parallel and track progress. ---- - # get all paths of all graphs before calculating kernels to save time, + # get all paths of all graphs before computing kernels to save time, # but this may cost a lot of memory for large datasets. pool = Pool(n_jobs) itr = zip(Gn, range(0, len(Gn))) @@ -113,10 +113,10 @@ def untilhpathkernel(*args, else: chunksize = 100 all_paths = [[] for _ in range(len(Gn))] - if compute_method == 'trie' and k_func != None: + if compute_method == 'trie' and k_func is not None: getps_partial = partial(wrapper_find_all_path_as_trie, depth, ds_attrs, node_label, edge_label) - elif compute_method != 'trie' and k_func != None: + elif compute_method != 'trie' and k_func is not None: getps_partial = partial(wrapper_find_all_paths_until_length, depth, ds_attrs, node_label, edge_label, True) else: @@ -133,9 +133,9 @@ def untilhpathkernel(*args, pool.join() # for g in Gn: -# if compute_method == 'trie' and k_func != None: +# if compute_method == 'trie' and k_func is not None: # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) -# elif compute_method != 'trie' and k_func != None: +# elif compute_method != 'trie' and k_func is not None: # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) # else: # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) @@ -155,14 +155,14 @@ def untilhpathkernel(*args, ## all_paths[i] = ps ## print(time.time() - ttt) - if compute_method == 'trie' and k_func != None: + if compute_method == 'trie' and k_func is not None: def init_worker(trie_toshare): global G_trie G_trie = trie_toshare do_partial = partial(wrapper_uhpath_do_trie, k_func) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) - elif compute_method != 'trie' and k_func != None: + elif compute_method != 'trie' and k_func is not None: def init_worker(plist_toshare): global G_plist G_plist = plist_toshare @@ -177,7 +177,7 @@ def untilhpathkernel(*args, parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) - elif parallel == None: + elif parallel is None: # from pympler import asizeof # ---- direct running, normally use single CPU core. ---- # print(asizeof.asized(all_paths, detail=1).format()) @@ -195,7 +195,7 @@ def untilhpathkernel(*args, # print(sizeof_allpaths) pbar = tqdm( total=((len(Gn) + 1) * len(Gn) / 2), - desc='calculating kernels', + desc='Computing kernels', file=sys.stdout) for i in range(0, len(Gn)): for j in range(i, len(Gn)): @@ -217,7 +217,7 @@ def untilhpathkernel(*args, # print(sizeof_allpaths) pbar = tqdm( total=((len(Gn) + 1) * len(Gn) / 2), - desc='calculating kernels', + desc='Computing kernels', file=sys.stdout) for i in range(0, len(Gn)): for j in range(i, len(Gn)): @@ -236,7 +236,7 @@ def untilhpathkernel(*args, def _untilhpathkernel_do_trie(trie1, trie2, k_func): - """Calculate path graph kernels up to depth d between 2 graphs using trie. + """Compute path graph kernels up to depth d between 2 graphs using trie. Parameters ---------- @@ -351,7 +351,7 @@ def wrapper_uhpath_do_trie(k_func, itr): def _untilhpathkernel_do_naive(paths1, paths2, k_func): - """Calculate path graph kernels up to depth d between 2 graphs naively. + """Compute path graph kernels up to depth d between 2 graphs naively. Parameters ---------- @@ -400,7 +400,7 @@ def wrapper_uhpath_do_naive(k_func, itr): def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): - """Calculate path graph kernels up to depth d between 2 graphs naively. + """Compute path graph kernels up to depth d between 2 graphs naively. Parameters ---------- diff --git a/gklearn/kernels/weisfeilerLehmanKernel.py b/gklearn/kernels/weisfeilerLehmanKernel.py index 222f5c5..469dcd8 100644 --- a/gklearn/kernels/weisfeilerLehmanKernel.py +++ b/gklearn/kernels/weisfeilerLehmanKernel.py @@ -32,15 +32,15 @@ def weisfeilerlehmankernel(*args, n_jobs=None, chunksize=None, verbose=True): - """Calculate Weisfeiler-Lehman kernels between graphs. + """Compute Weisfeiler-Lehman kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. G1, G2 : NetworkX graphs - Two graphs between which the kernel is calculated. + Two graphs between which the kernel is computed. node_label : string Node attribute used as label. The default node label is atom. @@ -115,12 +115,12 @@ def weisfeilerlehmankernel(*args, def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose): - """Calculate Weisfeiler-Lehman kernels between graphs. + """Compute Weisfeiler-Lehman kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -146,7 +146,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz # number of occurence of each label in G all_num_of_each_label.append(dict(Counter(labels_ori))) - # calculate subtree kernel with the 0th iteration and add it to the final kernel + # Compute subtree kernel with the 0th iteration and add it to the final kernel compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) # iterate each height @@ -255,7 +255,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz # all_labels_ori.update(labels_comp) all_num_of_each_label.append(dict(Counter(labels_comp))) - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) return Kmatrix @@ -316,7 +316,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) - elif parallel == None: + elif parallel is None: for i in range(len(Kmatrix)): for j in range(i, len(Kmatrix)): Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], @@ -345,12 +345,12 @@ def wrapper_compute_subtree_kernel(Kmatrix, itr): def _wl_spkernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman shortest path kernels between graphs. + """Compute Weisfeiler-Lehman shortest path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -413,7 +413,7 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): @@ -427,12 +427,12 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): def _wl_edgekernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman edge kernels between graphs. + """Compute Weisfeiler-Lehman edge kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -491,7 +491,7 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): @@ -504,12 +504,12 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): - """Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. + """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -564,7 +564,7 @@ def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate kernel with h iterations and add it to the final kernel + # Compute kernel with h iterations and add it to the final kernel Kmatrix += base_kernel(Gn, node_label, edge_label) return Kmatrix diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py index f5f4145..8ab7634 100644 --- a/gklearn/kernels/weisfeiler_lehman.py +++ b/gklearn/kernels/weisfeiler_lehman.py @@ -125,12 +125,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def __subtree_kernel_do(self, Gn): - """Calculate Weisfeiler-Lehman kernels between graphs. + """Compute Weisfeiler-Lehman kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. Return ------ @@ -152,7 +152,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge # number of occurence of each label in G all_num_of_each_label.append(dict(Counter(labels_ori))) - # calculate subtree kernel with the 0th iteration and add it to the final kernel. + # Compute subtree kernel with the 0th iteration and add it to the final kernel. self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) # iterate each height @@ -198,7 +198,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge # all_labels_ori.update(labels_comp) all_num_of_each_label.append(dict(Counter(labels_comp))) - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) return gram_matrix @@ -244,12 +244,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _wl_spkernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman shortest path kernels between graphs. + """Compute Weisfeiler-Lehman shortest path kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -312,7 +312,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): @@ -326,12 +326,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _wl_edgekernel_do(Gn, node_label, edge_label, height): - """Calculate Weisfeiler-Lehman edge kernels between graphs. + """Compute Weisfeiler-Lehman edge kernels between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -390,7 +390,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate subtree kernel with h iterations and add it to the final kernel + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): @@ -403,12 +403,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): - """Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. + """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are calculated. + List of graphs between which the kernels are computed. node_label : string node attribute used as label. edge_label : string @@ -463,7 +463,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - # calculate kernel with h iterations and add it to the final kernel + # Compute kernel with h iterations and add it to the final kernel gram_matrix += base_kernel(Gn, node_label, edge_label) return gram_matrix diff --git a/gklearn/utils/parallel.py b/gklearn/utils/parallel.py index 71bb47c..a1862c0 100644 --- a/gklearn/utils/parallel.py +++ b/gklearn/utils/parallel.py @@ -63,4 +63,4 @@ def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None, len_itr = int(len(Gn) * (len(Gn) + 1) / 2) parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr, init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs, - chunksize=chunksize, itr_desc='calculating kernels', verbose=verbose) \ No newline at end of file + chunksize=chunksize, itr_desc='Computing kernels', verbose=verbose) \ No newline at end of file