@@ -30,15 +30,15 @@ def commonwalkkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate common walk graph kernels between graphs. | |||||
"""Compute common walk graph kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as symbolic label. The default node label is 'atom'. | Node attribute used as symbolic label. The default node label is 'atom'. | ||||
edge_label : string | edge_label : string | ||||
@@ -133,7 +133,7 @@ def commonwalkkernel(*args, | |||||
# | # | ||||
# for i, j, kernel in tqdm( | # for i, j, kernel in tqdm( | ||||
# pool.imap_unordered(do_partial, itr, chunksize), | # pool.imap_unordered(do_partial, itr, chunksize), | ||||
# desc='calculating kernels', | |||||
# desc='computing kernels', | |||||
# file=sys.stdout): | # file=sys.stdout): | ||||
# Kmatrix[i][j] = kernel | # Kmatrix[i][j] = kernel | ||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
@@ -145,14 +145,14 @@ def commonwalkkernel(*args, | |||||
# # direct product graph method - exponential | # # direct product graph method - exponential | ||||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | # itr = combinations_with_replacement(range(0, len(Gn)), 2) | ||||
# if compute_method == 'exp': | # if compute_method == 'exp': | ||||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||||
# Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, | # Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label, | ||||
# edge_label, weight) | # edge_label, weight) | ||||
# Kmatrix[j][i] = Kmatrix[i][j] | # Kmatrix[j][i] = Kmatrix[i][j] | ||||
# | # | ||||
# # direct product graph method - geometric | # # direct product graph method - geometric | ||||
# elif compute_method == 'geo': | # elif compute_method == 'geo': | ||||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||||
# Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, | # Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label, | ||||
# edge_label, weight) | # edge_label, weight) | ||||
# Kmatrix[j][i] = Kmatrix[i][j] | # Kmatrix[j][i] = Kmatrix[i][j] | ||||
@@ -161,7 +161,7 @@ def commonwalkkernel(*args, | |||||
# # search all paths use brute force. | # # search all paths use brute force. | ||||
# elif compute_method == 'brute': | # elif compute_method == 'brute': | ||||
# n = int(n) | # n = int(n) | ||||
# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset. | |||||
# # get all paths of all graphs before computing kernels to save time, but this may cost a lot of memory for large dataset. | |||||
# all_walks = [ | # all_walks = [ | ||||
# find_all_walks_until_length(Gn[i], n, node_label, edge_label) | # find_all_walks_until_length(Gn[i], n, node_label, edge_label) | ||||
# for i in range(0, len(Gn)) | # for i in range(0, len(Gn)) | ||||
@@ -185,13 +185,13 @@ def commonwalkkernel(*args, | |||||
def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): | def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta): | ||||
"""Calculate walk graph kernels up to n between 2 graphs using exponential | |||||
"""Compute walk graph kernels up to n between 2 graphs using exponential | |||||
series. | series. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. | Node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -259,13 +259,13 @@ def wrapper_cw_exp(node_label, edge_label, beta, itr): | |||||
def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): | def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma): | ||||
"""Calculate common walk graph kernels up to n between 2 graphs using | |||||
"""Compute common walk graph kernels up to n between 2 graphs using | |||||
geometric series. | geometric series. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. | Node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -304,7 +304,7 @@ def _commonwalkkernel_brute(walks1, | |||||
node_label='atom', | node_label='atom', | ||||
edge_label='bond_type', | edge_label='bond_type', | ||||
labeled=True): | labeled=True): | ||||
"""Calculate walk graph kernels up to n between 2 graphs. | |||||
"""Compute walk graph kernels up to n between 2 graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -46,7 +46,7 @@ class CommonWalk(GraphKernel): | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
@@ -102,7 +102,7 @@ class CommonWalk(GraphKernel): | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
@@ -148,7 +148,7 @@ class CommonWalk(GraphKernel): | |||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', | ||||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -179,13 +179,13 @@ class CommonWalk(GraphKernel): | |||||
def __kernel_do_exp(self, g1, g2, beta): | def __kernel_do_exp(self, g1, g2, beta): | ||||
"""Calculate common walk graph kernel between 2 graphs using exponential | |||||
"""Compute common walk graph kernel between 2 graphs using exponential | |||||
series. | series. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
Graphs between which the kernels are calculated. | |||||
Graphs between which the kernels are computed. | |||||
beta : integer | beta : integer | ||||
Weight. | Weight. | ||||
@@ -231,13 +231,13 @@ class CommonWalk(GraphKernel): | |||||
def __kernel_do_geo(self, g1, g2, gamma): | def __kernel_do_geo(self, g1, g2, gamma): | ||||
"""Calculate common walk graph kernel between 2 graphs using geometric | |||||
"""Compute common walk graph kernel between 2 graphs using geometric | |||||
series. | series. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
Graphs between which the kernels are calculated. | |||||
Graphs between which the kernels are computed. | |||||
gamma : integer | gamma : integer | ||||
Weight. | Weight. | ||||
@@ -104,7 +104,7 @@ class GraphKernel(object): | |||||
if self._parallel == 'imap_unordered': | if self._parallel == 'imap_unordered': | ||||
gram_matrix = self._compute_gm_imap_unordered() | gram_matrix = self._compute_gm_imap_unordered() | ||||
elif self._parallel == None: | |||||
elif self._parallel is None: | |||||
gram_matrix = self._compute_gm_series() | gram_matrix = self._compute_gm_series() | ||||
else: | else: | ||||
raise Exception('Parallel mode is not set correctly.') | raise Exception('Parallel mode is not set correctly.') | ||||
@@ -130,7 +130,7 @@ class GraphKernel(object): | |||||
if self._parallel == 'imap_unordered': | if self._parallel == 'imap_unordered': | ||||
kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) | kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) | ||||
elif self._parallel == None: | |||||
elif self._parallel is None: | |||||
kernel_list = self._compute_kernel_list_series(g1, g_list) | kernel_list = self._compute_kernel_list_series(g1, g_list) | ||||
else: | else: | ||||
raise Exception('Parallel mode is not set correctly.') | raise Exception('Parallel mode is not set correctly.') | ||||
@@ -59,7 +59,7 @@ class Marginalized(GraphKernel): | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
@@ -119,7 +119,7 @@ class Marginalized(GraphKernel): | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
@@ -165,7 +165,7 @@ class Marginalized(GraphKernel): | |||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', | ||||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -184,12 +184,12 @@ class Marginalized(GraphKernel): | |||||
def __kernel_do(self, g1, g2): | def __kernel_do(self, g1, g2): | ||||
"""Calculate marginalized graph kernel between 2 graphs. | |||||
"""Compute marginalized graph kernel between 2 graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
g1, g2 : NetworkX graphs | g1, g2 : NetworkX graphs | ||||
2 graphs between which the kernel is calculated. | |||||
2 graphs between which the kernel is computed. | |||||
Return | Return | ||||
------ | ------ | ||||
@@ -212,12 +212,12 @@ class Marginalized(GraphKernel): | |||||
# # matrix to save all the R_inf for all pairs of nodes | # # matrix to save all the R_inf for all pairs of nodes | ||||
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | ||||
# | # | ||||
# # calculate R_inf with a simple interative method | |||||
# # Compute R_inf with a simple interative method | |||||
# for i in range(1, n_iteration): | # for i in range(1, n_iteration): | ||||
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | ||||
# R_inf_new.fill(r1) | # R_inf_new.fill(r1) | ||||
# | # | ||||
# # calculate R_inf for each pair of nodes | |||||
# # Compute R_inf for each pair of nodes | |||||
# for node1 in g1.nodes(data=True): | # for node1 in g1.nodes(data=True): | ||||
# neighbor_n1 = g1[node1[0]] | # neighbor_n1 = g1[node1[0]] | ||||
# # the transition probability distribution in the random walks | # # the transition probability distribution in the random walks | ||||
@@ -243,7 +243,7 @@ class Marginalized(GraphKernel): | |||||
# neighbor2] # ref [1] equation (8) | # neighbor2] # ref [1] equation (8) | ||||
# R_inf[:] = R_inf_new | # R_inf[:] = R_inf_new | ||||
# | # | ||||
# # add elements of R_inf up and calculate kernel | |||||
# # add elements of R_inf up and compute kernel | |||||
# for node1 in g1.nodes(data=True): | # for node1 in g1.nodes(data=True): | ||||
# for node2 in g2.nodes(data=True): | # for node2 in g2.nodes(data=True): | ||||
# s = p_init_G1 * p_init_G2 * deltakernel( | # s = p_init_G1 * p_init_G2 * deltakernel( | ||||
@@ -288,11 +288,11 @@ class Marginalized(GraphKernel): | |||||
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \ | ||||
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels)) | ||||
# calculate R_inf with a simple interative method | |||||
# Compute R_inf with a simple interative method | |||||
for i in range(2, self.__n_iteration + 1): | for i in range(2, self.__n_iteration + 1): | ||||
R_inf_old = R_inf.copy() | R_inf_old = R_inf.copy() | ||||
# calculate R_inf for each pair of nodes | |||||
# Compute R_inf for each pair of nodes | |||||
for node1 in g1.nodes(): | for node1 in g1.nodes(): | ||||
neighbor_n1 = g1[node1] | neighbor_n1 = g1[node1] | ||||
# the transition probability distribution in the random walks | # the transition probability distribution in the random walks | ||||
@@ -309,7 +309,7 @@ class Marginalized(GraphKernel): | |||||
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ | (t_dict[(node1, node2, neighbor1, neighbor2)] * \ | ||||
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | ||||
# add elements of R_inf up and calculate kernel | |||||
# add elements of R_inf up and compute kernel. | |||||
for (n1, n2), value in R_inf.items(): | for (n1, n2), value in R_inf.items(): | ||||
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels)) | ||||
kernel += s * value # ref [1] equation (6) | kernel += s * value # ref [1] equation (6) | ||||
@@ -39,15 +39,15 @@ def marginalizedkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate marginalized graph kernels between graphs. | |||||
"""Compute marginalized graph kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as symbolic label. The default node label is 'atom'. | Node attribute used as symbolic label. The default node label is 'atom'. | ||||
@@ -59,7 +59,7 @@ def marginalizedkernel(*args, | |||||
The termination probability in the random walks generating step. | The termination probability in the random walks generating step. | ||||
n_iteration : integer | n_iteration : integer | ||||
Time of iterations to calculate R_inf. | |||||
Time of iterations to compute R_inf. | |||||
remove_totters : boolean | remove_totters : boolean | ||||
Whether to remove totterings by method introduced in [2]. The default | Whether to remove totterings by method introduced in [2]. The default | ||||
@@ -83,11 +83,11 @@ def marginalizedkernel(*args, | |||||
Gn, | Gn, | ||||
attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | attr_names=['node_labeled', 'edge_labeled', 'is_directed'], | ||||
node_label=node_label, edge_label=edge_label) | node_label=node_label, edge_label=edge_label) | ||||
if not ds_attrs['node_labeled'] or node_label == None: | |||||
if not ds_attrs['node_labeled'] or node_label is None: | |||||
node_label = 'atom' | node_label = 'atom' | ||||
for G in Gn: | for G in Gn: | ||||
nx.set_node_attributes(G, '0', 'atom') | nx.set_node_attributes(G, '0', 'atom') | ||||
if not ds_attrs['edge_labeled'] or edge_label == None: | |||||
if not ds_attrs['edge_labeled'] or edge_label is None: | |||||
edge_label = 'bond_type' | edge_label = 'bond_type' | ||||
for G in Gn: | for G in Gn: | ||||
nx.set_edge_attributes(G, '0', 'bond_type') | nx.set_edge_attributes(G, '0', 'bond_type') | ||||
@@ -133,7 +133,7 @@ def marginalizedkernel(*args, | |||||
# # ---- direct running, normally use single CPU core. ---- | # # ---- direct running, normally use single CPU core. ---- | ||||
## pbar = tqdm( | ## pbar = tqdm( | ||||
## total=(1 + len(Gn)) * len(Gn) / 2, | ## total=(1 + len(Gn)) * len(Gn) / 2, | ||||
## desc='calculating kernels', | |||||
## desc='Computing kernels', | |||||
## file=sys.stdout) | ## file=sys.stdout) | ||||
# for i in range(0, len(Gn)): | # for i in range(0, len(Gn)): | ||||
# for j in range(i, len(Gn)): | # for j in range(i, len(Gn)): | ||||
@@ -152,12 +152,12 @@ def marginalizedkernel(*args, | |||||
def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | ||||
"""Calculate marginalized graph kernel between 2 graphs. | |||||
"""Compute marginalized graph kernel between 2 graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
2 graphs between which the kernel is calculated. | |||||
2 graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -165,7 +165,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||||
p_quit : integer | p_quit : integer | ||||
the termination probability in the random walks generating step. | the termination probability in the random walks generating step. | ||||
n_iteration : integer | n_iteration : integer | ||||
time of iterations to calculate R_inf. | |||||
time of iterations to compute R_inf. | |||||
Return | Return | ||||
------ | ------ | ||||
@@ -188,12 +188,12 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||||
# # matrix to save all the R_inf for all pairs of nodes | # # matrix to save all the R_inf for all pairs of nodes | ||||
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | # R_inf = np.zeros([num_nodes_G1, num_nodes_G2]) | ||||
# | # | ||||
# # calculate R_inf with a simple interative method | |||||
# # Compute R_inf with a simple interative method | |||||
# for i in range(1, n_iteration): | # for i in range(1, n_iteration): | ||||
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | # R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2]) | ||||
# R_inf_new.fill(r1) | # R_inf_new.fill(r1) | ||||
# | # | ||||
# # calculate R_inf for each pair of nodes | |||||
# # Compute R_inf for each pair of nodes | |||||
# for node1 in g1.nodes(data=True): | # for node1 in g1.nodes(data=True): | ||||
# neighbor_n1 = g1[node1[0]] | # neighbor_n1 = g1[node1[0]] | ||||
# # the transition probability distribution in the random walks | # # the transition probability distribution in the random walks | ||||
@@ -219,7 +219,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||||
# neighbor2] # ref [1] equation (8) | # neighbor2] # ref [1] equation (8) | ||||
# R_inf[:] = R_inf_new | # R_inf[:] = R_inf_new | ||||
# | # | ||||
# # add elements of R_inf up and calculate kernel | |||||
# # add elements of R_inf up and compute kernel. | |||||
# for node1 in g1.nodes(data=True): | # for node1 in g1.nodes(data=True): | ||||
# for node2 in g2.nodes(data=True): | # for node2 in g2.nodes(data=True): | ||||
# s = p_init_G1 * p_init_G2 * deltakernel( | # s = p_init_G1 * p_init_G2 * deltakernel( | ||||
@@ -267,11 +267,11 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||||
neighbor_n1[neighbor1][edge_label], | neighbor_n1[neighbor1][edge_label], | ||||
neighbor_n2[neighbor2][edge_label]) | neighbor_n2[neighbor2][edge_label]) | ||||
# calculate R_inf with a simple interative method | |||||
# Compute R_inf with a simple interative method | |||||
for i in range(2, n_iteration + 1): | for i in range(2, n_iteration + 1): | ||||
R_inf_old = R_inf.copy() | R_inf_old = R_inf.copy() | ||||
# calculate R_inf for each pair of nodes | |||||
# Compute R_inf for each pair of nodes | |||||
for node1 in g1.nodes(): | for node1 in g1.nodes(): | ||||
neighbor_n1 = g1[node1] | neighbor_n1 = g1[node1] | ||||
# the transition probability distribution in the random walks | # the transition probability distribution in the random walks | ||||
@@ -288,7 +288,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration): | |||||
(t_dict[(node1, node2, neighbor1, neighbor2)] * \ | (t_dict[(node1, node2, neighbor1, neighbor2)] * \ | ||||
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8) | ||||
# add elements of R_inf up and calculate kernel | |||||
# add elements of R_inf up and compute kernel. | |||||
for (n1, n2), value in R_inf.items(): | for (n1, n2), value in R_inf.items(): | ||||
s = p_init_G1 * p_init_G2 * deltakernel( | s = p_init_G1 * p_init_G2 * deltakernel( | ||||
g1.nodes[n1][node_label], g2.nodes[n2][node_label]) | g1.nodes[n1][node_label], g2.nodes[n2][node_label]) | ||||
@@ -24,7 +24,7 @@ from gklearn.kernels import GraphKernel | |||||
from gklearn.utils import Trie | from gklearn.utils import Trie | ||||
class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
class PathUpToH(GraphKernel): # @todo: add function for k_func is None | |||||
def __init__(self, **kwargs): | def __init__(self, **kwargs): | ||||
GraphKernel.__init__(self) | GraphKernel.__init__(self) | ||||
@@ -43,7 +43,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) | iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout) | ||||
iterator_kernel = tqdm(itr_kernel, desc='calculating kernels', file=sys.stdout) | |||||
iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator_ps = range(0, len(self._graphs)) | iterator_ps = range(0, len(self._graphs)) | ||||
iterator_kernel = itr_kernel | iterator_kernel = itr_kernel | ||||
@@ -69,7 +69,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__add_dummy_labels(self._graphs) | self.__add_dummy_labels(self._graphs) | ||||
# get all paths of all graphs before calculating kernels to save time, | |||||
# get all paths of all graphs before computing kernels to save time, | |||||
# but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
@@ -123,7 +123,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout) | ||||
iterator_kernel = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator_ps = g_list | iterator_ps = g_list | ||||
iterator_kernel = range(len(g_list)) | iterator_kernel = range(len(g_list)) | ||||
@@ -149,7 +149,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | self.__add_dummy_labels(g_list + [g1]) | ||||
# get all paths of all graphs before calculating kernels to save time, | |||||
# get all paths of all graphs before computing kernels to save time, | |||||
# but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = zip(g_list, range(0, len(g_list))) | itr = zip(g_list, range(0, len(g_list))) | ||||
@@ -190,7 +190,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -218,7 +218,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
def __kernel_do_trie(self, trie1, trie2): | def __kernel_do_trie(self, trie1, trie2): | ||||
"""Calculate path graph kernels up to depth d between 2 graphs using trie. | |||||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -335,7 +335,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None | |||||
def __kernel_do_naive(self, paths1, paths2): | def __kernel_do_naive(self, paths1, paths2): | ||||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -37,15 +37,15 @@ def randomwalkkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate random walk graph kernels. | |||||
"""Compute random walk graph kernels. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
compute_method : string | compute_method : string | ||||
Method used to compute kernel. The Following choices are | Method used to compute kernel. The Following choices are | ||||
@@ -125,7 +125,7 @@ def randomwalkkernel(*args, | |||||
Gn = [g.copy() for g in Gn] | Gn = [g.copy() for g in Gn] | ||||
eweight = None | eweight = None | ||||
if edge_weight == None: | |||||
if edge_weight is None: | |||||
if verbose: | if verbose: | ||||
print('\n None edge weight specified. Set all weight to 1.\n') | print('\n None edge weight specified. Set all weight to 1.\n') | ||||
else: | else: | ||||
@@ -212,12 +212,12 @@ def randomwalkkernel(*args, | |||||
############################################################################### | ############################################################################### | ||||
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True): | def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True): | ||||
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method. | |||||
"""Compute walk graph kernels up to n between 2 graphs using Sylvester method. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graph | G1, G2 : NetworkX graph | ||||
Graphs between which the kernel is calculated. | |||||
Graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -230,7 +230,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||||
""" | """ | ||||
Kmatrix = np.zeros((len(Gn), len(Gn))) | Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
if q == None: | |||||
if q is None: | |||||
# don't normalize adjacency matrices if q is a uniform vector. Note | # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# A_wave_list actually contains the transposes of the adjacency matrices. | # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
A_wave_list = [ | A_wave_list = [ | ||||
@@ -245,7 +245,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||||
# norm = A_tilde.sum(axis=0) | # norm = A_tilde.sum(axis=0) | ||||
# norm[norm == 0] = 1 | # norm[norm == 0] = 1 | ||||
# A_wave_list.append(A_tilde / norm) | # A_wave_list.append(A_tilde / norm) | ||||
if p == None: # p is uniform distribution as default. | |||||
if p is None: # p is uniform distribution as default. | |||||
def init_worker(Awl_toshare): | def init_worker(Awl_toshare): | ||||
global G_Awl | global G_Awl | ||||
G_Awl = Awl_toshare | G_Awl = Awl_toshare | ||||
@@ -255,7 +255,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
# desc='calculating kernels', | |||||
# desc='Computing kernels', | |||||
# file=sys.stdout) | # file=sys.stdout) | ||||
# for i in range(0, len(Gn)): | # for i in range(0, len(Gn)): | ||||
# for j in range(i, len(Gn)): | # for j in range(i, len(Gn)): | ||||
@@ -300,12 +300,12 @@ def _se_do(A_wave1, A_wave2, lmda): | |||||
############################################################################### | ############################################################################### | ||||
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | ||||
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | ||||
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method. | |||||
"""Compute walk graph kernels up to n between 2 graphs using conjugate method. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graph | G1, G2 : NetworkX graph | ||||
Graphs between which the kernel is calculated. | |||||
Graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -321,14 +321,14 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | ||||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: | # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1: | ||||
# # this is faster from unlabeled graphs. @todo: why? | # # this is faster from unlabeled graphs. @todo: why? | ||||
# if q == None: | |||||
# if q is None: | |||||
# # don't normalize adjacency matrices if q is a uniform vector. Note | # # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# # A_wave_list actually contains the transposes of the adjacency matrices. | # # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
# A_wave_list = [ | # A_wave_list = [ | ||||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | # nx.adjacency_matrix(G, eweight).todense().transpose() for G in | ||||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | ||||
# ] | # ] | ||||
# if p == None: # p is uniform distribution as default. | |||||
# if p is None: # p is uniform distribution as default. | |||||
# def init_worker(Awl_toshare): | # def init_worker(Awl_toshare): | ||||
# global G_Awl | # global G_Awl | ||||
# G_Awl = Awl_toshare | # G_Awl = Awl_toshare | ||||
@@ -336,23 +336,23 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | # parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
# glbv=(A_wave_list,), n_jobs=n_jobs) | # glbv=(A_wave_list,), n_jobs=n_jobs) | ||||
# else: | # else: | ||||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||||
# reindex nodes using consecutive integers for convenience of kernel computation. | |||||
Gn = [nx.convert_node_labels_to_integers( | Gn = [nx.convert_node_labels_to_integers( | ||||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | ||||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | ||||
if p == None and q == None: # p and q are uniform distributions as default. | |||||
if p is None and q is None: # p and q are uniform distributions as default. | |||||
def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
global G_gn | global G_gn | ||||
G_gn = gn_toshare | G_gn = gn_toshare | ||||
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels, | |||||
do_partial = partial(wrapper_cg_labeled_do, ds_attrs, node_kernels, | |||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
# desc='calculating kernels', | |||||
# desc='Computing kernels', | |||||
# file=sys.stdout) | # file=sys.stdout) | ||||
# for i in range(0, len(Gn)): | # for i in range(0, len(Gn)): | ||||
# for j in range(i, len(Gn)): | # for j in range(i, len(Gn)): | ||||
@@ -382,24 +382,24 @@ def _cg_unlabled_do(A_wave1, A_wave2, lmda): | |||||
return np.dot(q_times, x) | return np.dot(q_times, x) | ||||
def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||||
def wrapper_cg_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||||
edge_label, lmda, itr): | edge_label, lmda, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||||
return i, j, _cg_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
def _cg_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
edge_kernels, edge_label, lmda): | edge_kernels, edge_label, lmda): | ||||
# Frist, compute kernels between all pairs of nodes, method borrowed | |||||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||||
# from FCSP. It is faster than directly computing all edge kernels | # from FCSP. It is faster than directly computing all edge kernels | ||||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | ||||
# graphs compared, which is the most case we went though. For very | # graphs compared, which is the most case we went though. For very | ||||
# sparse graphs, this would be slow. | # sparse graphs, this would be slow. | ||||
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) | vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label) | ||||
# Compute weight matrix of the direct product graph. | |||||
# Compute the weight matrix of the direct product graph. | |||||
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, | w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs, | ||||
edge_kernels, edge_label) | edge_kernels, edge_label) | ||||
# use uniform distribution if there is no prior knowledge. | # use uniform distribution if there is no prior knowledge. | ||||
@@ -415,12 +415,12 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
############################################################################### | ############################################################################### | ||||
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | ||||
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | node_label, edge_label, eweight, n_jobs, chunksize, verbose=True): | ||||
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||||
"""Compute walk graph kernels up to n between 2 graphs using Fixed-Point method. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graph | G1, G2 : NetworkX graph | ||||
Graphs between which the kernel is calculated. | |||||
Graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -438,17 +438,17 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | # if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \ | ||||
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: | # not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1: | ||||
# # this is faster from unlabeled graphs. @todo: why? | # # this is faster from unlabeled graphs. @todo: why? | ||||
# if q == None: | |||||
# if q is None: | |||||
# # don't normalize adjacency matrices if q is a uniform vector. Note | # # don't normalize adjacency matrices if q is a uniform vector. Note | ||||
# # A_wave_list actually contains the transposes of the adjacency matrices. | # # A_wave_list actually contains the transposes of the adjacency matrices. | ||||
# A_wave_list = [ | # A_wave_list = [ | ||||
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in | # nx.adjacency_matrix(G, eweight).todense().transpose() for G in | ||||
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | # tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout) | ||||
# ] | # ] | ||||
# if p == None: # p is uniform distribution as default. | |||||
# if p is None: # p is uniform distribution as default. | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
# desc='calculating kernels', | |||||
# desc='Computing kernels', | |||||
# file=sys.stdout) | # file=sys.stdout) | ||||
# for i in range(0, len(Gn)): | # for i in range(0, len(Gn)): | ||||
# for j in range(i, len(Gn)): | # for j in range(i, len(Gn)): | ||||
@@ -464,33 +464,33 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels, | |||||
# Kmatrix[j][i] = Kmatrix[i][j] | # Kmatrix[j][i] = Kmatrix[i][j] | ||||
# pbar.update(1) | # pbar.update(1) | ||||
# else: | # else: | ||||
# reindex nodes using consecutive integers for convenience of kernel calculation. | |||||
# reindex nodes using consecutive integers for the convenience of kernel computation. | |||||
Gn = [nx.convert_node_labels_to_integers( | Gn = [nx.convert_node_labels_to_integers( | ||||
g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | g, first_label=0, label_attribute='label_orignal') for g in (tqdm( | ||||
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)] | ||||
if p == None and q == None: # p and q are uniform distributions as default. | |||||
if p is None and q is None: # p and q are uniform distributions as default. | |||||
def init_worker(gn_toshare): | def init_worker(gn_toshare): | ||||
global G_gn | global G_gn | ||||
G_gn = gn_toshare | G_gn = gn_toshare | ||||
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels, | |||||
do_partial = partial(wrapper_fp_labeled_do, ds_attrs, node_kernels, | |||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
return Kmatrix | return Kmatrix | ||||
def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||||
def wrapper_fp_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels, | |||||
edge_label, lmda, itr): | edge_label, lmda, itr): | ||||
i = itr[0] | i = itr[0] | ||||
j = itr[1] | j = itr[1] | ||||
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||||
return i, j, _fp_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels, | |||||
node_label, edge_kernels, edge_label, lmda) | node_label, edge_kernels, edge_label, lmda) | ||||
def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
def _fp_labeled_do(g1, g2, ds_attrs, node_kernels, node_label, | |||||
edge_kernels, edge_label, lmda): | edge_kernels, edge_label, lmda): | ||||
# Frist, compute kernels between all pairs of nodes, method borrowed | |||||
# Frist, compute kernels between all pairs of nodes using the method borrowed | |||||
# from FCSP. It is faster than directly computing all edge kernels | # from FCSP. It is faster than directly computing all edge kernels | ||||
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | # when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the | ||||
# graphs compared, which is the most case we went though. For very | # graphs compared, which is the most case we went though. For very | ||||
@@ -519,13 +519,13 @@ def func_fp(x, p_times, lmda, w_times): | |||||
############################################################################### | ############################################################################### | ||||
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True): | def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True): | ||||
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using | |||||
"""Compute walk graph kernels up to n between 2 unlabeled graphs using | |||||
spectral decomposition method. Labels will be ignored. | spectral decomposition method. Labels will be ignored. | ||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graph | G1, G2 : NetworkX graph | ||||
Graphs between which the kernel is calculated. | |||||
Graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -538,7 +538,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||||
""" | """ | ||||
Kmatrix = np.zeros((len(Gn), len(Gn))) | Kmatrix = np.zeros((len(Gn), len(Gn))) | ||||
if q == None: | |||||
if q is None: | |||||
# precompute the spectral decomposition of each graph. | # precompute the spectral decomposition of each graph. | ||||
P_list = [] | P_list = [] | ||||
D_list = [] | D_list = [] | ||||
@@ -552,7 +552,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||||
P_list.append(ev) | P_list.append(ev) | ||||
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | # P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs? | ||||
if p == None: # p is uniform distribution as default. | |||||
if p is None: # p is uniform distribution as default. | |||||
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] | q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn] | ||||
# q_T_list = [q.T for q in q_list] | # q_T_list = [q.T for q in q_list] | ||||
def init_worker(q_T_toshare, P_toshare, D_toshare): | def init_worker(q_T_toshare, P_toshare, D_toshare): | ||||
@@ -568,7 +568,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk | |||||
# pbar = tqdm( | # pbar = tqdm( | ||||
# total=(1 + len(Gn)) * len(Gn) / 2, | # total=(1 + len(Gn)) * len(Gn) / 2, | ||||
# desc='calculating kernels', | |||||
# desc='Computing kernels', | |||||
# file=sys.stdout) | # file=sys.stdout) | ||||
# for i in range(0, len(Gn)): | # for i in range(0, len(Gn)): | ||||
# for j in range(i, len(Gn)): | # for j in range(i, len(Gn)): | ||||
@@ -605,12 +605,12 @@ def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel): | |||||
############################################################################### | ############################################################################### | ||||
def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | def _randomwalkkernel_kron(G1, G2, node_label, edge_label): | ||||
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||||
"""Compute walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G1, G2 : NetworkX graph | G1, G2 : NetworkX graph | ||||
Graphs between which the kernel is calculated. | |||||
Graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -692,8 +692,8 @@ def computeVK(g1, g2, ds_attrs, node_kernels, node_label): | |||||
def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): | def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label): | ||||
'''Compute weight matrix of the direct product graph. | |||||
''' | |||||
"""Compute the weight matrix of the direct product graph. | |||||
""" | |||||
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2) | ||||
w_times = np.zeros((w_dim, w_dim)) | w_times = np.zeros((w_dim, w_dim)) | ||||
if vk_dict: # node labeled | if vk_dict: # node labeled | ||||
@@ -47,7 +47,7 @@ class ShortestPath(GraphKernel): | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
@@ -102,7 +102,7 @@ class ShortestPath(GraphKernel): | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
@@ -145,7 +145,7 @@ class ShortestPath(GraphKernel): | |||||
itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -29,15 +29,15 @@ def spkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate shortest-path kernels between graphs. | |||||
"""Compute shortest-path kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. The default node label is atom. | Node attribute used as label. The default node label is atom. | ||||
@@ -179,7 +179,7 @@ def spkernel(*args, | |||||
# do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | # do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels) | ||||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | # itr = combinations_with_replacement(range(0, len(Gn)), 2) | ||||
# for i, j, kernel in tqdm( | # for i, j, kernel in tqdm( | ||||
# pool.map(do_partial, itr), desc='calculating kernels', | |||||
# pool.map(do_partial, itr), desc='Computing kernels', | |||||
# file=sys.stdout): | # file=sys.stdout): | ||||
# Kmatrix[i][j] = kernel | # Kmatrix[i][j] = kernel | ||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
@@ -202,7 +202,7 @@ def spkernel(*args, | |||||
# # ---- direct running, normally use single CPU core. ---- | # # ---- direct running, normally use single CPU core. ---- | ||||
# from itertools import combinations_with_replacement | # from itertools import combinations_with_replacement | ||||
# itr = combinations_with_replacement(range(0, len(Gn)), 2) | # itr = combinations_with_replacement(range(0, len(Gn)), 2) | ||||
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout): | |||||
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout): | |||||
# kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels) | # kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels) | ||||
# Kmatrix[i][j] = kernel | # Kmatrix[i][j] = kernel | ||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
@@ -18,7 +18,7 @@ from tqdm import tqdm | |||||
# import networkx as nx | # import networkx as nx | ||||
import numpy as np | import numpy as np | ||||
from gklearn.utils.parallel import parallel_gm, parallel_me | from gklearn.utils.parallel import parallel_gm, parallel_me | ||||
from gklearn.utils.utils import get_shortest_paths | |||||
from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels | |||||
from gklearn.kernels import GraphKernel | from gklearn.kernels import GraphKernel | ||||
@@ -57,7 +57,7 @@ class StructuralSP(GraphKernel): | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
if self.__compute_method == 'trie': | if self.__compute_method == 'trie': | ||||
@@ -135,7 +135,7 @@ class StructuralSP(GraphKernel): | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
if self.__compute_method == 'trie': | if self.__compute_method == 'trie': | ||||
@@ -193,7 +193,7 @@ class StructuralSP(GraphKernel): | |||||
itr = range(len(g_list)) | itr = range(len(g_list)) | ||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -273,7 +273,7 @@ class StructuralSP(GraphKernel): | |||||
if len(p1) == len(p2): | if len(p1) == len(p2): | ||||
kernel += 1 | kernel += 1 | ||||
try: | try: | ||||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average | |||||
except ZeroDivisionError: | except ZeroDivisionError: | ||||
print(spl1, spl2) | print(spl1, spl2) | ||||
print(g1.nodes(data=True)) | print(g1.nodes(data=True)) | ||||
@@ -318,40 +318,7 @@ class StructuralSP(GraphKernel): | |||||
def __get_all_node_kernels(self, g1, g2): | def __get_all_node_kernels(self, g1, g2): | ||||
# compute shortest path matrices, method borrowed from FCSP. | |||||
vk_dict = {} # shortest path matrices dict | |||||
if len(self.__node_labels) > 0: | |||||
# node symb and non-synb labeled | |||||
if len(self.__node_attrs) > 0: | |||||
kn = self.__node_kernels['mix'] | |||||
for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)): | |||||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs) | |||||
# node symb labeled | |||||
else: | |||||
kn = self.__node_kernels['symb'] | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
n1_labels = [n1[1][nl] for nl in self.__node_labels] | |||||
n2_labels = [n2[1][nl] for nl in self.__node_labels] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels) | |||||
else: | |||||
# node non-synb labeled | |||||
if len(self.__node_attrs) > 0: | |||||
kn = self.__node_kernels['nsymb'] | |||||
for n1 in g1.nodes(data=True): | |||||
for n2 in g2.nodes(data=True): | |||||
n1_attrs = [n1[1][na] for na in self.__node_attrs] | |||||
n2_attrs = [n2[1][na] for na in self.__node_attrs] | |||||
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs) | |||||
# node unlabeled | |||||
else: | |||||
pass | |||||
return vk_dict | |||||
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) | |||||
def __get_all_edge_kernels(self, g1, g2): | def __get_all_edge_kernels(self, g1, g2): | ||||
@@ -37,15 +37,15 @@ def structuralspkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate mean average structural shortest path kernels between graphs. | |||||
"""Compute mean average structural shortest path kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. The default node label is atom. | Node attribute used as label. The default node label is atom. | ||||
@@ -215,7 +215,7 @@ def structuralspkernel(*args, | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(Gn)), 2) | itr = combinations_with_replacement(range(0, len(Gn)), 2) | ||||
if verbose: | if verbose: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
if compute_method == 'trie': | if compute_method == 'trie': | ||||
@@ -241,7 +241,7 @@ def structuralspkernel(*args, | |||||
# combinations_with_replacement(splist, 2), | # combinations_with_replacement(splist, 2), | ||||
# combinations_with_replacement(range(0, len(Gn)), 2)) | # combinations_with_replacement(range(0, len(Gn)), 2)) | ||||
# for i, j, kernel in tqdm( | # for i, j, kernel in tqdm( | ||||
# pool.map(do_partial, itr), desc='calculating kernels', | |||||
# pool.map(do_partial, itr), desc='Computing kernels', | |||||
# file=sys.stdout): | # file=sys.stdout): | ||||
# Kmatrix[i][j] = kernel | # Kmatrix[i][j] = kernel | ||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
@@ -263,7 +263,7 @@ def structuralspkernel(*args, | |||||
# with closing(Pool(n_jobs)) as pool: | # with closing(Pool(n_jobs)) as pool: | ||||
# for i, j, kernel in tqdm( | # for i, j, kernel in tqdm( | ||||
# pool.imap_unordered(do_partial, itr, 1000), | # pool.imap_unordered(do_partial, itr, 1000), | ||||
# desc='calculating kernels', | |||||
# desc='Computing kernels', | |||||
# file=sys.stdout): | # file=sys.stdout): | ||||
# Kmatrix[i][j] = kernel | # Kmatrix[i][j] = kernel | ||||
# Kmatrix[j][i] = kernel | # Kmatrix[j][i] = kernel | ||||
@@ -335,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label, | |||||
if len(p1) == len(p2): | if len(p1) == len(p2): | ||||
kernel += 1 | kernel += 1 | ||||
try: | try: | ||||
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average | |||||
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average | |||||
except ZeroDivisionError: | except ZeroDivisionError: | ||||
print(spl1, spl2) | print(spl1, spl2) | ||||
print(g1.nodes(data=True)) | print(g1.nodes(data=True)) | ||||
@@ -429,7 +429,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, | |||||
# # compute graph kernels | # # compute graph kernels | ||||
# traverseBothTrie(trie1[0].root, trie2[0], kernel) | # traverseBothTrie(trie1[0].root, trie2[0], kernel) | ||||
# | # | ||||
# kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average | |||||
# kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average | |||||
# # traverse all paths in graph1. Deep-first search is applied. | # # traverse all paths in graph1. Deep-first search is applied. | ||||
# def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]): | # def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]): | ||||
@@ -485,7 +485,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label, | |||||
else: | else: | ||||
traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict) | traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict) | ||||
kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average | |||||
kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average | |||||
return kernel | return kernel | ||||
@@ -781,9 +781,9 @@ def get_shortest_paths(G, weight, directed): | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G : NetworkX graphs | G : NetworkX graphs | ||||
The graphs whose paths are calculated. | |||||
The graphs whose paths are computed. | |||||
weight : string/None | weight : string/None | ||||
edge attribute used as weight to calculate the shortest path. | |||||
edge attribute used as weight to compute the shortest path. | |||||
directed: boolean | directed: boolean | ||||
Whether graph is directed. | Whether graph is directed. | ||||
@@ -822,9 +822,9 @@ def get_sps_as_trie(G, weight, directed): | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
G : NetworkX graphs | G : NetworkX graphs | ||||
The graphs whose paths are calculated. | |||||
The graphs whose paths are computed. | |||||
weight : string/None | weight : string/None | ||||
edge attribute used as weight to calculate the shortest path. | |||||
edge attribute used as weight to compute the shortest path. | |||||
directed: boolean | directed: boolean | ||||
Whether graph is directed. | Whether graph is directed. | ||||
@@ -39,7 +39,7 @@ class Treelet(GraphKernel): | |||||
def _compute_gm_series(self): | def _compute_gm_series(self): | ||||
self.__add_dummy_labels(self._graphs) | self.__add_dummy_labels(self._graphs) | ||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys = [] | canonkeys = [] | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
@@ -55,7 +55,7 @@ class Treelet(GraphKernel): | |||||
from itertools import combinations_with_replacement | from itertools import combinations_with_replacement | ||||
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | itr = combinations_with_replacement(range(0, len(self._graphs)), 2) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = itr | iterator = itr | ||||
for i, j in iterator: | for i, j in iterator: | ||||
@@ -69,7 +69,7 @@ class Treelet(GraphKernel): | |||||
def _compute_gm_imap_unordered(self): | def _compute_gm_imap_unordered(self): | ||||
self.__add_dummy_labels(self._graphs) | self.__add_dummy_labels(self._graphs) | ||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
pool = Pool(self._n_jobs) | pool = Pool(self._n_jobs) | ||||
itr = zip(self._graphs, range(0, len(self._graphs))) | itr = zip(self._graphs, range(0, len(self._graphs))) | ||||
@@ -105,7 +105,7 @@ class Treelet(GraphKernel): | |||||
def _compute_kernel_list_series(self, g1, g_list): | def _compute_kernel_list_series(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | self.__add_dummy_labels(g_list + [g1]) | ||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys_1 = self.__get_canonkeys(g1) | canonkeys_1 = self.__get_canonkeys(g1) | ||||
canonkeys_list = [] | canonkeys_list = [] | ||||
@@ -119,7 +119,7 @@ class Treelet(GraphKernel): | |||||
# compute kernel list. | # compute kernel list. | ||||
kernel_list = [None] * len(g_list) | kernel_list = [None] * len(g_list) | ||||
if self._verbose >= 2: | if self._verbose >= 2: | ||||
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) | |||||
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout) | |||||
else: | else: | ||||
iterator = range(len(g_list)) | iterator = range(len(g_list)) | ||||
for i in iterator: | for i in iterator: | ||||
@@ -132,7 +132,7 @@ class Treelet(GraphKernel): | |||||
def _compute_kernel_list_imap_unordered(self, g1, g_list): | def _compute_kernel_list_imap_unordered(self, g1, g_list): | ||||
self.__add_dummy_labels(g_list + [g1]) | self.__add_dummy_labels(g_list + [g1]) | ||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys_1 = self.__get_canonkeys(g1) | canonkeys_1 = self.__get_canonkeys(g1) | ||||
canonkeys_list = [[] for _ in range(len(g_list))] | canonkeys_list = [[] for _ in range(len(g_list))] | ||||
@@ -167,7 +167,7 @@ class Treelet(GraphKernel): | |||||
len_itr = len(g_list) | len_itr = len(g_list) | ||||
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', | ||||
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) | |||||
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) | |||||
return kernel_list | return kernel_list | ||||
@@ -185,7 +185,7 @@ class Treelet(GraphKernel): | |||||
def __kernel_do(self, canonkey1, canonkey2): | def __kernel_do(self, canonkey1, canonkey2): | ||||
"""Calculate treelet graph kernel between 2 graphs. | |||||
"""Compute treelet graph kernel between 2 graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -29,15 +29,15 @@ def treeletkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate treelet graph kernels between graphs. | |||||
"""Compute treelet graph kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
sub_kernel : function | sub_kernel : function | ||||
The sub-kernel between 2 real number vectors. Each vector counts the | The sub-kernel between 2 real number vectors. Each vector counts the | ||||
@@ -89,7 +89,7 @@ def treeletkernel(*args, | |||||
# ---- use pool.imap_unordered to parallel and track progress. ---- | # ---- use pool.imap_unordered to parallel and track progress. ---- | ||||
if parallel == 'imap_unordered': | if parallel == 'imap_unordered': | ||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
pool = Pool(n_jobs) | pool = Pool(n_jobs) | ||||
itr = zip(Gn, range(0, len(Gn))) | itr = zip(Gn, range(0, len(Gn))) | ||||
@@ -120,8 +120,8 @@ def treeletkernel(*args, | |||||
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
# ---- do not use parallelization. ---- | # ---- do not use parallelization. ---- | ||||
elif parallel == None: | |||||
# get all canonical keys of all graphs before calculating kernels to save | |||||
elif parallel is None: | |||||
# get all canonical keys of all graphs before computing kernels to save | |||||
# time, but this may cost a lot of memory for large dataset. | # time, but this may cost a lot of memory for large dataset. | ||||
canonkeys = [] | canonkeys = [] | ||||
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): | for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn): | ||||
@@ -148,7 +148,7 @@ def treeletkernel(*args, | |||||
def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): | def _treeletkernel_do(canonkey1, canonkey2, sub_kernel): | ||||
"""Calculate treelet graph kernel between 2 graphs. | |||||
"""Compute treelet graph kernel between 2 graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed): | |||||
# n-star patterns | # n-star patterns | ||||
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3] | ||||
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] | |||||
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop. | |||||
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5] | ||||
# n-star patterns | # n-star patterns | ||||
canonkey['6'] = len(patterns['3star']) | canonkey['6'] = len(patterns['3star']) | ||||
@@ -34,15 +34,15 @@ def untilhpathkernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate path graph kernels up to depth/hight h between graphs. | |||||
"""Compute path graph kernels up to depth/hight h between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. The default node label is atom. | Node attribute used as label. The default node label is atom. | ||||
@@ -91,7 +91,7 @@ def untilhpathkernel(*args, | |||||
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled', | ||||
'edge_attr_dim', 'is_directed'], | 'edge_attr_dim', 'is_directed'], | ||||
node_label=node_label, edge_label=edge_label) | node_label=node_label, edge_label=edge_label) | ||||
if k_func != None: | |||||
if k_func is not None: | |||||
if not ds_attrs['node_labeled']: | if not ds_attrs['node_labeled']: | ||||
for G in Gn: | for G in Gn: | ||||
nx.set_node_attributes(G, '0', 'atom') | nx.set_node_attributes(G, '0', 'atom') | ||||
@@ -103,7 +103,7 @@ def untilhpathkernel(*args, | |||||
if parallel == 'imap_unordered': | if parallel == 'imap_unordered': | ||||
# ---- use pool.imap_unordered to parallel and track progress. ---- | # ---- use pool.imap_unordered to parallel and track progress. ---- | ||||
# get all paths of all graphs before calculating kernels to save time, | |||||
# get all paths of all graphs before computing kernels to save time, | |||||
# but this may cost a lot of memory for large datasets. | # but this may cost a lot of memory for large datasets. | ||||
pool = Pool(n_jobs) | pool = Pool(n_jobs) | ||||
itr = zip(Gn, range(0, len(Gn))) | itr = zip(Gn, range(0, len(Gn))) | ||||
@@ -113,10 +113,10 @@ def untilhpathkernel(*args, | |||||
else: | else: | ||||
chunksize = 100 | chunksize = 100 | ||||
all_paths = [[] for _ in range(len(Gn))] | all_paths = [[] for _ in range(len(Gn))] | ||||
if compute_method == 'trie' and k_func != None: | |||||
if compute_method == 'trie' and k_func is not None: | |||||
getps_partial = partial(wrapper_find_all_path_as_trie, depth, | getps_partial = partial(wrapper_find_all_path_as_trie, depth, | ||||
ds_attrs, node_label, edge_label) | ds_attrs, node_label, edge_label) | ||||
elif compute_method != 'trie' and k_func != None: | |||||
elif compute_method != 'trie' and k_func is not None: | |||||
getps_partial = partial(wrapper_find_all_paths_until_length, depth, | getps_partial = partial(wrapper_find_all_paths_until_length, depth, | ||||
ds_attrs, node_label, edge_label, True) | ds_attrs, node_label, edge_label, True) | ||||
else: | else: | ||||
@@ -133,9 +133,9 @@ def untilhpathkernel(*args, | |||||
pool.join() | pool.join() | ||||
# for g in Gn: | # for g in Gn: | ||||
# if compute_method == 'trie' and k_func != None: | |||||
# if compute_method == 'trie' and k_func is not None: | |||||
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | # find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label) | ||||
# elif compute_method != 'trie' and k_func != None: | |||||
# elif compute_method != 'trie' and k_func is not None: | |||||
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label) | ||||
# else: | # else: | ||||
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) | # find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False) | ||||
@@ -155,14 +155,14 @@ def untilhpathkernel(*args, | |||||
## all_paths[i] = ps | ## all_paths[i] = ps | ||||
## print(time.time() - ttt) | ## print(time.time() - ttt) | ||||
if compute_method == 'trie' and k_func != None: | |||||
if compute_method == 'trie' and k_func is not None: | |||||
def init_worker(trie_toshare): | def init_worker(trie_toshare): | ||||
global G_trie | global G_trie | ||||
G_trie = trie_toshare | G_trie = trie_toshare | ||||
do_partial = partial(wrapper_uhpath_do_trie, k_func) | do_partial = partial(wrapper_uhpath_do_trie, k_func) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
elif compute_method != 'trie' and k_func != None: | |||||
elif compute_method != 'trie' and k_func is not None: | |||||
def init_worker(plist_toshare): | def init_worker(plist_toshare): | ||||
global G_plist | global G_plist | ||||
G_plist = plist_toshare | G_plist = plist_toshare | ||||
@@ -177,7 +177,7 @@ def untilhpathkernel(*args, | |||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
elif parallel == None: | |||||
elif parallel is None: | |||||
# from pympler import asizeof | # from pympler import asizeof | ||||
# ---- direct running, normally use single CPU core. ---- | # ---- direct running, normally use single CPU core. ---- | ||||
# print(asizeof.asized(all_paths, detail=1).format()) | # print(asizeof.asized(all_paths, detail=1).format()) | ||||
@@ -195,7 +195,7 @@ def untilhpathkernel(*args, | |||||
# print(sizeof_allpaths) | # print(sizeof_allpaths) | ||||
pbar = tqdm( | pbar = tqdm( | ||||
total=((len(Gn) + 1) * len(Gn) / 2), | total=((len(Gn) + 1) * len(Gn) / 2), | ||||
desc='calculating kernels', | |||||
desc='Computing kernels', | |||||
file=sys.stdout) | file=sys.stdout) | ||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
@@ -217,7 +217,7 @@ def untilhpathkernel(*args, | |||||
# print(sizeof_allpaths) | # print(sizeof_allpaths) | ||||
pbar = tqdm( | pbar = tqdm( | ||||
total=((len(Gn) + 1) * len(Gn) / 2), | total=((len(Gn) + 1) * len(Gn) / 2), | ||||
desc='calculating kernels', | |||||
desc='Computing kernels', | |||||
file=sys.stdout) | file=sys.stdout) | ||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
@@ -236,7 +236,7 @@ def untilhpathkernel(*args, | |||||
def _untilhpathkernel_do_trie(trie1, trie2, k_func): | def _untilhpathkernel_do_trie(trie1, trie2, k_func): | ||||
"""Calculate path graph kernels up to depth d between 2 graphs using trie. | |||||
"""Compute path graph kernels up to depth d between 2 graphs using trie. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -351,7 +351,7 @@ def wrapper_uhpath_do_trie(k_func, itr): | |||||
def _untilhpathkernel_do_naive(paths1, paths2, k_func): | def _untilhpathkernel_do_naive(paths1, paths2, k_func): | ||||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -400,7 +400,7 @@ def wrapper_uhpath_do_naive(k_func, itr): | |||||
def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): | def _untilhpathkernel_do_kernelless(paths1, paths2, k_func): | ||||
"""Calculate path graph kernels up to depth d between 2 graphs naively. | |||||
"""Compute path graph kernels up to depth d between 2 graphs naively. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
@@ -32,15 +32,15 @@ def weisfeilerlehmankernel(*args, | |||||
n_jobs=None, | n_jobs=None, | ||||
chunksize=None, | chunksize=None, | ||||
verbose=True): | verbose=True): | ||||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
G1, G2 : NetworkX graphs | G1, G2 : NetworkX graphs | ||||
Two graphs between which the kernel is calculated. | |||||
Two graphs between which the kernel is computed. | |||||
node_label : string | node_label : string | ||||
Node attribute used as label. The default node label is atom. | Node attribute used as label. The default node label is atom. | ||||
@@ -115,12 +115,12 @@ def weisfeilerlehmankernel(*args, | |||||
def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose): | def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose): | ||||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -146,7 +146,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz | |||||
# number of occurence of each label in G | # number of occurence of each label in G | ||||
all_num_of_each_label.append(dict(Counter(labels_ori))) | all_num_of_each_label.append(dict(Counter(labels_ori))) | ||||
# calculate subtree kernel with the 0th iteration and add it to the final kernel | |||||
# Compute subtree kernel with the 0th iteration and add it to the final kernel | |||||
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | ||||
# iterate each height | # iterate each height | ||||
@@ -255,7 +255,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz | |||||
# all_labels_ori.update(labels_comp) | # all_labels_ori.update(labels_comp) | ||||
all_num_of_each_label.append(dict(Counter(labels_comp))) | all_num_of_each_label.append(dict(Counter(labels_comp))) | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False) | ||||
return Kmatrix | return Kmatrix | ||||
@@ -316,7 +316,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, | |||||
do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) | do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix) | ||||
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker, | ||||
glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose) | ||||
elif parallel == None: | |||||
elif parallel is None: | |||||
for i in range(len(Kmatrix)): | for i in range(len(Kmatrix)): | ||||
for j in range(i, len(Kmatrix)): | for j in range(i, len(Kmatrix)): | ||||
Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], | Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i], | ||||
@@ -345,12 +345,12 @@ def wrapper_compute_subtree_kernel(Kmatrix, itr): | |||||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | def _wl_spkernel_do(Gn, node_label, edge_label, height): | ||||
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman shortest path kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -413,7 +413,7 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
for e1 in Gn[i].edges(data = True): | for e1 in Gn[i].edges(data = True): | ||||
@@ -427,12 +427,12 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height): | |||||
def _wl_edgekernel_do(Gn, node_label, edge_label, height): | def _wl_edgekernel_do(Gn, node_label, edge_label, height): | ||||
"""Calculate Weisfeiler-Lehman edge kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman edge kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -491,7 +491,7 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
for e1 in Gn[i].edges(data = True): | for e1 in Gn[i].edges(data = True): | ||||
@@ -504,12 +504,12 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height): | |||||
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | ||||
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||||
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -564,7 +564,7 @@ def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate kernel with h iterations and add it to the final kernel | |||||
# Compute kernel with h iterations and add it to the final kernel | |||||
Kmatrix += base_kernel(Gn, node_label, edge_label) | Kmatrix += base_kernel(Gn, node_label, edge_label) | ||||
return Kmatrix | return Kmatrix |
@@ -125,12 +125,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def __subtree_kernel_do(self, Gn): | def __subtree_kernel_do(self, Gn): | ||||
"""Calculate Weisfeiler-Lehman kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
Return | Return | ||||
------ | ------ | ||||
@@ -152,7 +152,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
# number of occurence of each label in G | # number of occurence of each label in G | ||||
all_num_of_each_label.append(dict(Counter(labels_ori))) | all_num_of_each_label.append(dict(Counter(labels_ori))) | ||||
# calculate subtree kernel with the 0th iteration and add it to the final kernel. | |||||
# Compute subtree kernel with the 0th iteration and add it to the final kernel. | |||||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | ||||
# iterate each height | # iterate each height | ||||
@@ -198,7 +198,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
# all_labels_ori.update(labels_comp) | # all_labels_ori.update(labels_comp) | ||||
all_num_of_each_label.append(dict(Counter(labels_comp))) | all_num_of_each_label.append(dict(Counter(labels_comp))) | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn) | ||||
return gram_matrix | return gram_matrix | ||||
@@ -244,12 +244,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _wl_spkernel_do(Gn, node_label, edge_label, height): | def _wl_spkernel_do(Gn, node_label, edge_label, height): | ||||
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman shortest path kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -312,7 +312,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
for e1 in Gn[i].edges(data = True): | for e1 in Gn[i].edges(data = True): | ||||
@@ -326,12 +326,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _wl_edgekernel_do(Gn, node_label, edge_label, height): | def _wl_edgekernel_do(Gn, node_label, edge_label, height): | ||||
"""Calculate Weisfeiler-Lehman edge kernels between graphs. | |||||
"""Compute Weisfeiler-Lehman edge kernels between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -390,7 +390,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate subtree kernel with h iterations and add it to the final kernel | |||||
# Compute subtree kernel with h iterations and add it to the final kernel | |||||
for i in range(0, len(Gn)): | for i in range(0, len(Gn)): | ||||
for j in range(i, len(Gn)): | for j in range(i, len(Gn)): | ||||
for e1 in Gn[i].edges(data = True): | for e1 in Gn[i].edges(data = True): | ||||
@@ -403,12 +403,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): | ||||
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||||
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. | |||||
Parameters | Parameters | ||||
---------- | ---------- | ||||
Gn : List of NetworkX graph | Gn : List of NetworkX graph | ||||
List of graphs between which the kernels are calculated. | |||||
List of graphs between which the kernels are computed. | |||||
node_label : string | node_label : string | ||||
node attribute used as label. | node attribute used as label. | ||||
edge_label : string | edge_label : string | ||||
@@ -463,7 +463,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge | |||||
for node in G.nodes(data = True): | for node in G.nodes(data = True): | ||||
node[1][node_label] = set_compressed[set_multisets[node[0]]] | node[1][node_label] = set_compressed[set_multisets[node[0]]] | ||||
# calculate kernel with h iterations and add it to the final kernel | |||||
# Compute kernel with h iterations and add it to the final kernel | |||||
gram_matrix += base_kernel(Gn, node_label, edge_label) | gram_matrix += base_kernel(Gn, node_label, edge_label) | ||||
return gram_matrix | return gram_matrix | ||||
@@ -63,4 +63,4 @@ def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None, | |||||
len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | len_itr = int(len(Gn) * (len(Gn) + 1) / 2) | ||||
parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr, | parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr, | ||||
init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs, | init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs, | ||||
chunksize=chunksize, itr_desc='calculating kernels', verbose=verbose) | |||||
chunksize=chunksize, itr_desc='Computing kernels', verbose=verbose) |