Browse Source

Update comments, minor bugs for graph kernels.

v0.2.x
jajupmochi 4 years ago
parent
commit
320964dd16
17 changed files with 190 additions and 223 deletions
  1. +12
    -12
      gklearn/kernels/commonWalkKernel.py
  2. +7
    -7
      gklearn/kernels/common_walk.py
  3. +2
    -2
      gklearn/kernels/graph_kernel.py
  4. +11
    -11
      gklearn/kernels/marginalized.py
  5. +16
    -16
      gklearn/kernels/marginalizedKernel.py
  6. +8
    -8
      gklearn/kernels/path_up_to_h.py
  7. +43
    -43
      gklearn/kernels/randomWalkKernel.py
  8. +3
    -3
      gklearn/kernels/shortest_path.py
  9. +5
    -5
      gklearn/kernels/spKernel.py
  10. +6
    -39
      gklearn/kernels/structural_sp.py
  11. +13
    -13
      gklearn/kernels/structuralspKernel.py
  12. +8
    -8
      gklearn/kernels/treelet.py
  13. +8
    -8
      gklearn/kernels/treeletKernel.py
  14. +17
    -17
      gklearn/kernels/untilHPathKernel.py
  15. +17
    -17
      gklearn/kernels/weisfeilerLehmanKernel.py
  16. +13
    -13
      gklearn/kernels/weisfeiler_lehman.py
  17. +1
    -1
      gklearn/utils/parallel.py

+ 12
- 12
gklearn/kernels/commonWalkKernel.py View File

@@ -30,15 +30,15 @@ def commonwalkkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate common walk graph kernels between graphs.
"""Compute common walk graph kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.
node_label : string
Node attribute used as symbolic label. The default node label is 'atom'.
edge_label : string
@@ -133,7 +133,7 @@ def commonwalkkernel(*args,
#
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, chunksize),
# desc='calculating kernels',
# desc='computing kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
@@ -145,14 +145,14 @@ def commonwalkkernel(*args,
# # direct product graph method - exponential
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# if compute_method == 'exp':
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_exp(Gn[i], Gn[j], node_label,
# edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j]
#
# # direct product graph method - geometric
# elif compute_method == 'geo':
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# Kmatrix[i][j] = _commonwalkkernel_geo(Gn[i], Gn[j], node_label,
# edge_label, weight)
# Kmatrix[j][i] = Kmatrix[i][j]
@@ -161,7 +161,7 @@ def commonwalkkernel(*args,
# # search all paths use brute force.
# elif compute_method == 'brute':
# n = int(n)
# # get all paths of all graphs before calculating kernels to save time, but this may cost a lot of memory for large dataset.
# # get all paths of all graphs before computing kernels to save time, but this may cost a lot of memory for large dataset.
# all_walks = [
# find_all_walks_until_length(Gn[i], n, node_label, edge_label)
# for i in range(0, len(Gn))
@@ -185,13 +185,13 @@ def commonwalkkernel(*args,


def _commonwalkkernel_exp(g1, g2, node_label, edge_label, beta):
"""Calculate walk graph kernels up to n between 2 graphs using exponential
"""Compute walk graph kernels up to n between 2 graphs using exponential
series.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
Node attribute used as label.
edge_label : string
@@ -259,13 +259,13 @@ def wrapper_cw_exp(node_label, edge_label, beta, itr):


def _commonwalkkernel_geo(g1, g2, node_label, edge_label, gamma):
"""Calculate common walk graph kernels up to n between 2 graphs using
"""Compute common walk graph kernels up to n between 2 graphs using
geometric series.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
Node attribute used as label.
edge_label : string
@@ -304,7 +304,7 @@ def _commonwalkkernel_brute(walks1,
node_label='atom',
edge_label='bond_type',
labeled=True):
"""Calculate walk graph kernels up to n between 2 graphs.
"""Compute walk graph kernels up to n between 2 graphs.

Parameters
----------


+ 7
- 7
gklearn/kernels/common_walk.py View File

@@ -46,7 +46,7 @@ class CommonWalk(GraphKernel):
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
@@ -102,7 +102,7 @@ class CommonWalk(GraphKernel):
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
@@ -148,7 +148,7 @@ class CommonWalk(GraphKernel):
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list
@@ -179,13 +179,13 @@ class CommonWalk(GraphKernel):
def __kernel_do_exp(self, g1, g2, beta):
"""Calculate common walk graph kernel between 2 graphs using exponential
"""Compute common walk graph kernel between 2 graphs using exponential
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
Graphs between which the kernels are computed.
beta : integer
Weight.
@@ -231,13 +231,13 @@ class CommonWalk(GraphKernel):
def __kernel_do_geo(self, g1, g2, gamma):
"""Calculate common walk graph kernel between 2 graphs using geometric
"""Compute common walk graph kernel between 2 graphs using geometric
series.
Parameters
----------
g1, g2 : NetworkX graphs
Graphs between which the kernels are calculated.
Graphs between which the kernels are computed.
gamma : integer
Weight.


+ 2
- 2
gklearn/kernels/graph_kernel.py View File

@@ -104,7 +104,7 @@ class GraphKernel(object):
if self._parallel == 'imap_unordered':
gram_matrix = self._compute_gm_imap_unordered()
elif self._parallel == None:
elif self._parallel is None:
gram_matrix = self._compute_gm_series()
else:
raise Exception('Parallel mode is not set correctly.')
@@ -130,7 +130,7 @@ class GraphKernel(object):
if self._parallel == 'imap_unordered':
kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
elif self._parallel == None:
elif self._parallel is None:
kernel_list = self._compute_kernel_list_series(g1, g_list)
else:
raise Exception('Parallel mode is not set correctly.')


+ 11
- 11
gklearn/kernels/marginalized.py View File

@@ -59,7 +59,7 @@ class Marginalized(GraphKernel):
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
@@ -119,7 +119,7 @@ class Marginalized(GraphKernel):
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
@@ -165,7 +165,7 @@ class Marginalized(GraphKernel):
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list
@@ -184,12 +184,12 @@ class Marginalized(GraphKernel):
def __kernel_do(self, g1, g2):
"""Calculate marginalized graph kernel between 2 graphs.
"""Compute marginalized graph kernel between 2 graphs.
Parameters
----------
g1, g2 : NetworkX graphs
2 graphs between which the kernel is calculated.
2 graphs between which the kernel is computed.
Return
------
@@ -212,12 +212,12 @@ class Marginalized(GraphKernel):
# # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
#
# # calculate R_inf with a simple interative method
# # Compute R_inf with a simple interative method
# for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1)
#
# # calculate R_inf for each pair of nodes
# # Compute R_inf for each pair of nodes
# for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks
@@ -243,7 +243,7 @@ class Marginalized(GraphKernel):
# neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new
#
# # add elements of R_inf up and calculate kernel
# # add elements of R_inf up and compute kernel
# for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel(
@@ -288,11 +288,11 @@ class Marginalized(GraphKernel):
deltakernel(tuple(g1.nodes[neighbor1][nl] for nl in self.__node_labels), tuple(g2.nodes[neighbor2][nl] for nl in self.__node_labels)) * \
deltakernel(tuple(neighbor_n1[neighbor1][el] for el in self.__edge_labels), tuple(neighbor_n2[neighbor2][el] for el in self.__edge_labels))
# calculate R_inf with a simple interative method
# Compute R_inf with a simple interative method
for i in range(2, self.__n_iteration + 1):
R_inf_old = R_inf.copy()
# calculate R_inf for each pair of nodes
# Compute R_inf for each pair of nodes
for node1 in g1.nodes():
neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks
@@ -309,7 +309,7 @@ class Marginalized(GraphKernel):
(t_dict[(node1, node2, neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)
# add elements of R_inf up and calculate kernel
# add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(tuple(g1.nodes[n1][nl] for nl in self.__node_labels), tuple(g2.nodes[n2][nl] for nl in self.__node_labels))
kernel += s * value # ref [1] equation (6)


+ 16
- 16
gklearn/kernels/marginalizedKernel.py View File

@@ -39,15 +39,15 @@ def marginalizedkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate marginalized graph kernels between graphs.
"""Compute marginalized graph kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

node_label : string
Node attribute used as symbolic label. The default node label is 'atom'.
@@ -59,7 +59,7 @@ def marginalizedkernel(*args,
The termination probability in the random walks generating step.

n_iteration : integer
Time of iterations to calculate R_inf.
Time of iterations to compute R_inf.

remove_totters : boolean
Whether to remove totterings by method introduced in [2]. The default
@@ -83,11 +83,11 @@ def marginalizedkernel(*args,
Gn,
attr_names=['node_labeled', 'edge_labeled', 'is_directed'],
node_label=node_label, edge_label=edge_label)
if not ds_attrs['node_labeled'] or node_label == None:
if not ds_attrs['node_labeled'] or node_label is None:
node_label = 'atom'
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
if not ds_attrs['edge_labeled'] or edge_label == None:
if not ds_attrs['edge_labeled'] or edge_label is None:
edge_label = 'bond_type'
for G in Gn:
nx.set_edge_attributes(G, '0', 'bond_type')
@@ -133,7 +133,7 @@ def marginalizedkernel(*args,
# # ---- direct running, normally use single CPU core. ----
## pbar = tqdm(
## total=(1 + len(Gn)) * len(Gn) / 2,
## desc='calculating kernels',
## desc='Computing kernels',
## file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
@@ -152,12 +152,12 @@ def marginalizedkernel(*args,


def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
"""Calculate marginalized graph kernel between 2 graphs.
"""Compute marginalized graph kernel between 2 graphs.

Parameters
----------
G1, G2 : NetworkX graphs
2 graphs between which the kernel is calculated.
2 graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -165,7 +165,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
p_quit : integer
the termination probability in the random walks generating step.
n_iteration : integer
time of iterations to calculate R_inf.
time of iterations to compute R_inf.

Return
------
@@ -188,12 +188,12 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
# # matrix to save all the R_inf for all pairs of nodes
# R_inf = np.zeros([num_nodes_G1, num_nodes_G2])
#
# # calculate R_inf with a simple interative method
# # Compute R_inf with a simple interative method
# for i in range(1, n_iteration):
# R_inf_new = np.zeros([num_nodes_G1, num_nodes_G2])
# R_inf_new.fill(r1)
#
# # calculate R_inf for each pair of nodes
# # Compute R_inf for each pair of nodes
# for node1 in g1.nodes(data=True):
# neighbor_n1 = g1[node1[0]]
# # the transition probability distribution in the random walks
@@ -219,7 +219,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
# neighbor2] # ref [1] equation (8)
# R_inf[:] = R_inf_new
#
# # add elements of R_inf up and calculate kernel
# # add elements of R_inf up and compute kernel.
# for node1 in g1.nodes(data=True):
# for node2 in g2.nodes(data=True):
# s = p_init_G1 * p_init_G2 * deltakernel(
@@ -267,11 +267,11 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
neighbor_n1[neighbor1][edge_label],
neighbor_n2[neighbor2][edge_label])

# calculate R_inf with a simple interative method
# Compute R_inf with a simple interative method
for i in range(2, n_iteration + 1):
R_inf_old = R_inf.copy()

# calculate R_inf for each pair of nodes
# Compute R_inf for each pair of nodes
for node1 in g1.nodes():
neighbor_n1 = g1[node1]
# the transition probability distribution in the random walks
@@ -288,7 +288,7 @@ def _marginalizedkernel_do(g1, g2, node_label, edge_label, p_quit, n_iteration):
(t_dict[(node1, node2, neighbor1, neighbor2)] * \
R_inf_old[(neighbor1, neighbor2)]) # ref [1] equation (8)

# add elements of R_inf up and calculate kernel
# add elements of R_inf up and compute kernel.
for (n1, n2), value in R_inf.items():
s = p_init_G1 * p_init_G2 * deltakernel(
g1.nodes[n1][node_label], g2.nodes[n2][node_label])


+ 8
- 8
gklearn/kernels/path_up_to_h.py View File

@@ -24,7 +24,7 @@ from gklearn.kernels import GraphKernel
from gklearn.utils import Trie


class PathUpToH(GraphKernel): # @todo: add function for k_func == None
class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def __init__(self, **kwargs):
GraphKernel.__init__(self)
@@ -43,7 +43,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator_ps = tqdm(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout)
iterator_kernel = tqdm(itr_kernel, desc='calculating kernels', file=sys.stdout)
iterator_kernel = tqdm(itr_kernel, desc='Computing kernels', file=sys.stdout)
else:
iterator_ps = range(0, len(self._graphs))
iterator_kernel = itr_kernel
@@ -69,7 +69,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs)))
@@ -123,7 +123,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
if self._verbose >= 2:
iterator_ps = tqdm(g_list, desc='getting paths', file=sys.stdout)
iterator_kernel = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator_kernel = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator_ps = g_list
iterator_kernel = range(len(g_list))
@@ -149,7 +149,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs)
itr = zip(g_list, range(0, len(g_list)))
@@ -190,7 +190,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list
@@ -218,7 +218,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None

def __kernel_do_trie(self, trie1, trie2):
"""Calculate path graph kernels up to depth d between 2 graphs using trie.
"""Compute path graph kernels up to depth d between 2 graphs using trie.
Parameters
----------
@@ -335,7 +335,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func == None
def __kernel_do_naive(self, paths1, paths2):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.
Parameters
----------


+ 43
- 43
gklearn/kernels/randomWalkKernel.py View File

@@ -37,15 +37,15 @@ def randomwalkkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate random walk graph kernels.
"""Compute random walk graph kernels.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

compute_method : string
Method used to compute kernel. The Following choices are
@@ -125,7 +125,7 @@ def randomwalkkernel(*args,
Gn = [g.copy() for g in Gn]

eweight = None
if edge_weight == None:
if edge_weight is None:
if verbose:
print('\n None edge weight specified. Set all weight to 1.\n')
else:
@@ -212,12 +212,12 @@ def randomwalkkernel(*args,

###############################################################################
def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using Sylvester method.
"""Compute walk graph kernels up to n between 2 graphs using Sylvester method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -230,7 +230,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
if q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_list = [
@@ -245,7 +245,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
# norm = A_tilde.sum(axis=0)
# norm[norm == 0] = 1
# A_wave_list.append(A_tilde / norm)
if p == None: # p is uniform distribution as default.
if p is None: # p is uniform distribution as default.
def init_worker(Awl_toshare):
global G_Awl
G_Awl = Awl_toshare
@@ -255,7 +255,7 @@ def _sylvester_equation(Gn, lmda, p, q, eweight, n_jobs, chunksize, verbose=True
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
@@ -300,12 +300,12 @@ def _se_do(A_wave1, A_wave2, lmda):
###############################################################################
def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using conjugate method.
"""Compute walk graph kernels up to n between 2 graphs using conjugate method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -321,14 +321,14 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] < 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# if q is None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list actually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# if p is None: # p is uniform distribution as default.
# def init_worker(Awl_toshare):
# global G_Awl
# G_Awl = Awl_toshare
@@ -336,23 +336,23 @@ def _conjugate_gradient(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
# glbv=(A_wave_list,), n_jobs=n_jobs)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
# reindex nodes using consecutive integers for convenience of kernel computation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in (tqdm(
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)]
if p == None and q == None: # p and q are uniform distributions as default.
if p is None and q is None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_cg_labled_do, ds_attrs, node_kernels,
do_partial = partial(wrapper_cg_labeled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
@@ -382,24 +382,24 @@ def _cg_unlabled_do(A_wave1, A_wave2, lmda):
return np.dot(q_times, x)


def wrapper_cg_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
def wrapper_cg_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _cg_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
return i, j, _cg_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
def _cg_labeled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, compute kernels between all pairs of nodes, method borrowed
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
# sparse graphs, this would be slow.
vk_dict = computeVK(g1, g2, ds_attrs, node_kernels, node_label)
# Compute weight matrix of the direct product graph.
# Compute the weight matrix of the direct product graph.
w_times, w_dim = computeW(g1, g2, vk_dict, ds_attrs,
edge_kernels, edge_label)
# use uniform distribution if there is no prior knowledge.
@@ -415,12 +415,12 @@ def _cg_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
###############################################################################
def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
node_label, edge_label, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 graphs using Fixed-Point method.
"""Compute walk graph kernels up to n between 2 graphs using Fixed-Point method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -438,17 +438,17 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# if not ds_attrs['node_labeled'] and ds_attrs['node_attr_dim'] < 1 and \
# not ds_attrs['edge_labeled'] and ds_attrs['edge_attr_dim'] > 1:
# # this is faster from unlabeled graphs. @todo: why?
# if q == None:
# if q is None:
# # don't normalize adjacency matrices if q is a uniform vector. Note
# # A_wave_list actually contains the transposes of the adjacency matrices.
# A_wave_list = [
# nx.adjacency_matrix(G, eweight).todense().transpose() for G in
# tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout)
# ]
# if p == None: # p is uniform distribution as default.
# if p is None: # p is uniform distribution as default.
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
@@ -464,33 +464,33 @@ def _fixed_point(Gn, lmda, p, q, ds_attrs, node_kernels, edge_kernels,
# Kmatrix[j][i] = Kmatrix[i][j]
# pbar.update(1)
# else:
# reindex nodes using consecutive integers for convenience of kernel calculation.
# reindex nodes using consecutive integers for the convenience of kernel computation.
Gn = [nx.convert_node_labels_to_integers(
g, first_label=0, label_attribute='label_orignal') for g in (tqdm(
Gn, desc='reindex vertices', file=sys.stdout) if verbose else Gn)]
if p == None and q == None: # p and q are uniform distributions as default.
if p is None and q is None: # p and q are uniform distributions as default.
def init_worker(gn_toshare):
global G_gn
G_gn = gn_toshare
do_partial = partial(wrapper_fp_labled_do, ds_attrs, node_kernels,
do_partial = partial(wrapper_fp_labeled_do, ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(Gn,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
return Kmatrix


def wrapper_fp_labled_do(ds_attrs, node_kernels, node_label, edge_kernels,
def wrapper_fp_labeled_do(ds_attrs, node_kernels, node_label, edge_kernels,
edge_label, lmda, itr):
i = itr[0]
j = itr[1]
return i, j, _fp_labled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
return i, j, _fp_labeled_do(G_gn[i], G_gn[j], ds_attrs, node_kernels,
node_label, edge_kernels, edge_label, lmda)


def _fp_labled_do(g1, g2, ds_attrs, node_kernels, node_label,
def _fp_labeled_do(g1, g2, ds_attrs, node_kernels, node_label,
edge_kernels, edge_label, lmda):
# Frist, compute kernels between all pairs of nodes, method borrowed
# Frist, compute kernels between all pairs of nodes using the method borrowed
# from FCSP. It is faster than directly computing all edge kernels
# when $d_1d_2>2$, where $d_1$ and $d_2$ are vertex degrees of the
# graphs compared, which is the most case we went though. For very
@@ -519,13 +519,13 @@ def func_fp(x, p_times, lmda, w_times):

###############################################################################
def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunksize, verbose=True):
"""Calculate walk graph kernels up to n between 2 unlabeled graphs using
"""Compute walk graph kernels up to n between 2 unlabeled graphs using
spectral decomposition method. Labels will be ignored.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -538,7 +538,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
"""
Kmatrix = np.zeros((len(Gn), len(Gn)))

if q == None:
if q is None:
# precompute the spectral decomposition of each graph.
P_list = []
D_list = []
@@ -552,7 +552,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
P_list.append(ev)
# P_inv_list = [p.T for p in P_list] # @todo: also works for directed graphs?

if p == None: # p is uniform distribution as default.
if p is None: # p is uniform distribution as default.
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in Gn]
# q_T_list = [q.T for q in q_list]
def init_worker(q_T_toshare, P_toshare, D_toshare):
@@ -568,7 +568,7 @@ def _spectral_decomposition(Gn, weight, p, q, sub_kernel, eweight, n_jobs, chunk
# pbar = tqdm(
# total=(1 + len(Gn)) * len(Gn) / 2,
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout)
# for i in range(0, len(Gn)):
# for j in range(i, len(Gn)):
@@ -605,12 +605,12 @@ def _sd_do(q_T1, q_T2, P1, P2, D1, D2, weight, sub_kernel):

###############################################################################
def _randomwalkkernel_kron(G1, G2, node_label, edge_label):
"""Calculate walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.
"""Compute walk graph kernels up to n between 2 graphs using nearest Kronecker product approximation method.

Parameters
----------
G1, G2 : NetworkX graph
Graphs between which the kernel is calculated.
Graphs between which the kernel is computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -692,8 +692,8 @@ def computeVK(g1, g2, ds_attrs, node_kernels, node_label):


def computeW(g1, g2, vk_dict, ds_attrs, edge_kernels, edge_label):
'''Compute weight matrix of the direct product graph.
'''
"""Compute the weight matrix of the direct product graph.
"""
w_dim = nx.number_of_nodes(g1) * nx.number_of_nodes(g2)
w_times = np.zeros((w_dim, w_dim))
if vk_dict: # node labeled


+ 3
- 3
gklearn/kernels/shortest_path.py View File

@@ -47,7 +47,7 @@ class ShortestPath(GraphKernel):
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
@@ -102,7 +102,7 @@ class ShortestPath(GraphKernel):
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
@@ -145,7 +145,7 @@ class ShortestPath(GraphKernel):
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list


+ 5
- 5
gklearn/kernels/spKernel.py View File

@@ -29,15 +29,15 @@ def spkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate shortest-path kernels between graphs.
"""Compute shortest-path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

node_label : string
Node attribute used as label. The default node label is atom.
@@ -179,7 +179,7 @@ def spkernel(*args,
# do_partial = partial(spkernel_do, Gn, ds_attrs, node_label, node_kernels)
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# pool.map(do_partial, itr), desc='Computing kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
@@ -202,7 +202,7 @@ def spkernel(*args,
# # ---- direct running, normally use single CPU core. ----
# from itertools import combinations_with_replacement
# itr = combinations_with_replacement(range(0, len(Gn)), 2)
# for i, j in tqdm(itr, desc='calculating kernels', file=sys.stdout):
# for i, j in tqdm(itr, desc='Computing kernels', file=sys.stdout):
# kernel = spkernel_do(Gn[i], Gn[j], ds_attrs, node_label, node_kernels)
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel


+ 6
- 39
gklearn/kernels/structural_sp.py View File

@@ -18,7 +18,7 @@ from tqdm import tqdm
# import networkx as nx
import numpy as np
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import get_shortest_paths
from gklearn.utils.utils import get_shortest_paths, compute_vertex_kernels
from gklearn.kernels import GraphKernel


@@ -57,7 +57,7 @@ class StructuralSP(GraphKernel):
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
if self.__compute_method == 'trie':
@@ -135,7 +135,7 @@ class StructuralSP(GraphKernel):
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
if self.__compute_method == 'trie':
@@ -193,7 +193,7 @@ class StructuralSP(GraphKernel):
itr = range(len(g_list))
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list
@@ -273,7 +273,7 @@ class StructuralSP(GraphKernel):
if len(p1) == len(p2):
kernel += 1
try:
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
except ZeroDivisionError:
print(spl1, spl2)
print(g1.nodes(data=True))
@@ -318,40 +318,7 @@ class StructuralSP(GraphKernel):
def __get_all_node_kernels(self, g1, g2):
# compute shortest path matrices, method borrowed from FCSP.
vk_dict = {} # shortest path matrices dict
if len(self.__node_labels) > 0:
# node symb and non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['mix']
for n1, n2 in product(g1.nodes(data=True), g2.nodes(data=True)):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
# node symb labeled
else:
kn = self.__node_kernels['symb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_labels = [n1[1][nl] for nl in self.__node_labels]
n2_labels = [n2[1][nl] for nl in self.__node_labels]
vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
else:
# node non-synb labeled
if len(self.__node_attrs) > 0:
kn = self.__node_kernels['nsymb']
for n1 in g1.nodes(data=True):
for n2 in g2.nodes(data=True):
n1_attrs = [n1[1][na] for na in self.__node_attrs]
n2_attrs = [n2[1][na] for na in self.__node_attrs]
vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
# node unlabeled
else:
pass
return vk_dict
return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs)
def __get_all_edge_kernels(self, g1, g2):


+ 13
- 13
gklearn/kernels/structuralspKernel.py View File

@@ -37,15 +37,15 @@ def structuralspkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate mean average structural shortest path kernels between graphs.
"""Compute mean average structural shortest path kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

node_label : string
Node attribute used as label. The default node label is atom.
@@ -215,7 +215,7 @@ def structuralspkernel(*args,
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(Gn)), 2)
if verbose:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
if compute_method == 'trie':
@@ -241,7 +241,7 @@ def structuralspkernel(*args,
# combinations_with_replacement(splist, 2),
# combinations_with_replacement(range(0, len(Gn)), 2))
# for i, j, kernel in tqdm(
# pool.map(do_partial, itr), desc='calculating kernels',
# pool.map(do_partial, itr), desc='Computing kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
@@ -263,7 +263,7 @@ def structuralspkernel(*args,
# with closing(Pool(n_jobs)) as pool:
# for i, j, kernel in tqdm(
# pool.imap_unordered(do_partial, itr, 1000),
# desc='calculating kernels',
# desc='Computing kernels',
# file=sys.stdout):
# Kmatrix[i][j] = kernel
# Kmatrix[j][i] = kernel
@@ -335,7 +335,7 @@ def structuralspkernel_do(g1, g2, spl1, spl2, ds_attrs, node_label, edge_label,
if len(p1) == len(p2):
kernel += 1
try:
kernel = kernel / (len(spl1) * len(spl2)) # calculate mean average
kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average
except ZeroDivisionError:
print(spl1, spl2)
print(g1.nodes(data=True))
@@ -429,7 +429,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,
# # compute graph kernels
# traverseBothTrie(trie1[0].root, trie2[0], kernel)
#
# kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average
# kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average

# # traverse all paths in graph1. Deep-first search is applied.
# def traverseBothTrie(root, trie2, kernel, vk_dict, ek_dict, pcurrent=[]):
@@ -485,7 +485,7 @@ def ssp_do_trie(g1, g2, trie1, trie2, ds_attrs, node_label, edge_label,
else:
traverseBothTrieu(trie1[0].root, trie2[0], kernel, vk_dict, ek_dict)

kernel = kernel[0] / (trie1[1] * trie2[1]) # calculate mean average
kernel = kernel[0] / (trie1[1] * trie2[1]) # Compute mean average

return kernel

@@ -781,9 +781,9 @@ def get_shortest_paths(G, weight, directed):
Parameters
----------
G : NetworkX graphs
The graphs whose paths are calculated.
The graphs whose paths are computed.
weight : string/None
edge attribute used as weight to calculate the shortest path.
edge attribute used as weight to compute the shortest path.
directed: boolean
Whether graph is directed.

@@ -822,9 +822,9 @@ def get_sps_as_trie(G, weight, directed):
Parameters
----------
G : NetworkX graphs
The graphs whose paths are calculated.
The graphs whose paths are computed.
weight : string/None
edge attribute used as weight to calculate the shortest path.
edge attribute used as weight to compute the shortest path.
directed: boolean
Whether graph is directed.



+ 8
- 8
gklearn/kernels/treelet.py View File

@@ -39,7 +39,7 @@ class Treelet(GraphKernel):
def _compute_gm_series(self):
self.__add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys = []
if self._verbose >= 2:
@@ -55,7 +55,7 @@ class Treelet(GraphKernel):
from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
if self._verbose >= 2:
iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout)
iterator = tqdm(itr, desc='Computing kernels', file=sys.stdout)
else:
iterator = itr
for i, j in iterator:
@@ -69,7 +69,7 @@ class Treelet(GraphKernel):
def _compute_gm_imap_unordered(self):
self.__add_dummy_labels(self._graphs)
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
pool = Pool(self._n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs)))
@@ -105,7 +105,7 @@ class Treelet(GraphKernel):
def _compute_kernel_list_series(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_list = []
@@ -119,7 +119,7 @@ class Treelet(GraphKernel):
# compute kernel list.
kernel_list = [None] * len(g_list)
if self._verbose >= 2:
iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout)
iterator = tqdm(range(len(g_list)), desc='Computing kernels', file=sys.stdout)
else:
iterator = range(len(g_list))
for i in iterator:
@@ -132,7 +132,7 @@ class Treelet(GraphKernel):
def _compute_kernel_list_imap_unordered(self, g1, g_list):
self.__add_dummy_labels(g_list + [g1])
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self.__get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))]
@@ -167,7 +167,7 @@ class Treelet(GraphKernel):
len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose)
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
return kernel_list
@@ -185,7 +185,7 @@ class Treelet(GraphKernel):
def __kernel_do(self, canonkey1, canonkey2):
"""Calculate treelet graph kernel between 2 graphs.
"""Compute treelet graph kernel between 2 graphs.
Parameters
----------


+ 8
- 8
gklearn/kernels/treeletKernel.py View File

@@ -29,15 +29,15 @@ def treeletkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate treelet graph kernels between graphs.
"""Compute treelet graph kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

sub_kernel : function
The sub-kernel between 2 real number vectors. Each vector counts the
@@ -89,7 +89,7 @@ def treeletkernel(*args,
# ---- use pool.imap_unordered to parallel and track progress. ----
if parallel == 'imap_unordered':
# get all canonical keys of all graphs before calculating kernels to save
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
pool = Pool(n_jobs)
itr = zip(Gn, range(0, len(Gn)))
@@ -120,8 +120,8 @@ def treeletkernel(*args,
glbv=(canonkeys,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
# ---- do not use parallelization. ----
elif parallel == None:
# get all canonical keys of all graphs before calculating kernels to save
elif parallel is None:
# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys = []
for g in (tqdm(Gn, desc='getting canonkeys', file=sys.stdout) if verbose else Gn):
@@ -148,7 +148,7 @@ def treeletkernel(*args,


def _treeletkernel_do(canonkey1, canonkey2, sub_kernel):
"""Calculate treelet graph kernel between 2 graphs.
"""Compute treelet graph kernel between 2 graphs.
Parameters
----------
@@ -210,7 +210,7 @@ def get_canonkeys(G, node_label, edge_label, labeled, is_directed):

# n-star patterns
patterns['3star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 3]
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4]
patterns['4star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 4] # @todo: check self loop.
patterns['5star'] = [[node] + [neighbor for neighbor in G[node]] for node in G.nodes() if G.degree(node) == 5]
# n-star patterns
canonkey['6'] = len(patterns['3star'])


+ 17
- 17
gklearn/kernels/untilHPathKernel.py View File

@@ -34,15 +34,15 @@ def untilhpathkernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate path graph kernels up to depth/hight h between graphs.
"""Compute path graph kernels up to depth/hight h between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

node_label : string
Node attribute used as label. The default node label is atom.
@@ -91,7 +91,7 @@ def untilhpathkernel(*args,
attr_names=['node_labeled', 'node_attr_dim', 'edge_labeled',
'edge_attr_dim', 'is_directed'],
node_label=node_label, edge_label=edge_label)
if k_func != None:
if k_func is not None:
if not ds_attrs['node_labeled']:
for G in Gn:
nx.set_node_attributes(G, '0', 'atom')
@@ -103,7 +103,7 @@ def untilhpathkernel(*args,

if parallel == 'imap_unordered':
# ---- use pool.imap_unordered to parallel and track progress. ----
# get all paths of all graphs before calculating kernels to save time,
# get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets.
pool = Pool(n_jobs)
itr = zip(Gn, range(0, len(Gn)))
@@ -113,10 +113,10 @@ def untilhpathkernel(*args,
else:
chunksize = 100
all_paths = [[] for _ in range(len(Gn))]
if compute_method == 'trie' and k_func != None:
if compute_method == 'trie' and k_func is not None:
getps_partial = partial(wrapper_find_all_path_as_trie, depth,
ds_attrs, node_label, edge_label)
elif compute_method != 'trie' and k_func != None:
elif compute_method != 'trie' and k_func is not None:
getps_partial = partial(wrapper_find_all_paths_until_length, depth,
ds_attrs, node_label, edge_label, True)
else:
@@ -133,9 +133,9 @@ def untilhpathkernel(*args,
pool.join()
# for g in Gn:
# if compute_method == 'trie' and k_func != None:
# if compute_method == 'trie' and k_func is not None:
# find_all_path_as_trie(g, depth, ds_attrs, node_label, edge_label)
# elif compute_method != 'trie' and k_func != None:
# elif compute_method != 'trie' and k_func is not None:
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label)
# else:
# find_all_paths_until_length(g, depth, ds_attrs, node_label, edge_label, False)
@@ -155,14 +155,14 @@ def untilhpathkernel(*args,
## all_paths[i] = ps
## print(time.time() - ttt)
if compute_method == 'trie' and k_func != None:
if compute_method == 'trie' and k_func is not None:
def init_worker(trie_toshare):
global G_trie
G_trie = trie_toshare
do_partial = partial(wrapper_uhpath_do_trie, k_func)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif compute_method != 'trie' and k_func != None:
elif compute_method != 'trie' and k_func is not None:
def init_worker(plist_toshare):
global G_plist
G_plist = plist_toshare
@@ -177,7 +177,7 @@ def untilhpathkernel(*args,
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_paths,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif parallel == None:
elif parallel is None:
# from pympler import asizeof
# ---- direct running, normally use single CPU core. ----
# print(asizeof.asized(all_paths, detail=1).format())
@@ -195,7 +195,7 @@ def untilhpathkernel(*args,
# print(sizeof_allpaths)
pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
desc='Computing kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
@@ -217,7 +217,7 @@ def untilhpathkernel(*args,
# print(sizeof_allpaths)
pbar = tqdm(
total=((len(Gn) + 1) * len(Gn) / 2),
desc='calculating kernels',
desc='Computing kernels',
file=sys.stdout)
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
@@ -236,7 +236,7 @@ def untilhpathkernel(*args,


def _untilhpathkernel_do_trie(trie1, trie2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs using trie.
"""Compute path graph kernels up to depth d between 2 graphs using trie.

Parameters
----------
@@ -351,7 +351,7 @@ def wrapper_uhpath_do_trie(k_func, itr):

def _untilhpathkernel_do_naive(paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.

Parameters
----------
@@ -400,7 +400,7 @@ def wrapper_uhpath_do_naive(k_func, itr):


def _untilhpathkernel_do_kernelless(paths1, paths2, k_func):
"""Calculate path graph kernels up to depth d between 2 graphs naively.
"""Compute path graph kernels up to depth d between 2 graphs naively.

Parameters
----------


+ 17
- 17
gklearn/kernels/weisfeilerLehmanKernel.py View File

@@ -32,15 +32,15 @@ def weisfeilerlehmankernel(*args,
n_jobs=None,
chunksize=None,
verbose=True):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
G1, G2 : NetworkX graphs
Two graphs between which the kernel is calculated.
Two graphs between which the kernel is computed.

node_label : string
Node attribute used as label. The default node label is atom.
@@ -115,12 +115,12 @@ def weisfeilerlehmankernel(*args,


def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksize, verbose):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.

Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -146,7 +146,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz
# number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori)))

# calculate subtree kernel with the 0th iteration and add it to the final kernel
# Compute subtree kernel with the 0th iteration and add it to the final kernel
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False)

# iterate each height
@@ -255,7 +255,7 @@ def _wl_kernel_do(Gn, node_label, edge_label, height, parallel, n_jobs, chunksiz
# all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp)))

# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs, chunksize, False)

return Kmatrix
@@ -316,7 +316,7 @@ def compute_kernel_matrix(Kmatrix, all_num_of_each_label, Gn, parallel, n_jobs,
do_partial = partial(wrapper_compute_subtree_kernel, Kmatrix)
parallel_gm(do_partial, Kmatrix, Gn, init_worker=init_worker,
glbv=(all_num_of_each_label,), n_jobs=n_jobs, chunksize=chunksize, verbose=verbose)
elif parallel == None:
elif parallel is None:
for i in range(len(Kmatrix)):
for j in range(i, len(Kmatrix)):
Kmatrix[i][j] = compute_subtree_kernel(all_num_of_each_label[i],
@@ -345,12 +345,12 @@ def wrapper_compute_subtree_kernel(Kmatrix, itr):

def _wl_spkernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs.
"""Compute Weisfeiler-Lehman shortest path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -413,7 +413,7 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height):
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
@@ -427,12 +427,12 @@ def _wl_spkernel_do(Gn, node_label, edge_label, height):


def _wl_edgekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman edge kernels between graphs.
"""Compute Weisfeiler-Lehman edge kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -491,7 +491,7 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height):
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
@@ -504,12 +504,12 @@ def _wl_edgekernel_do(Gn, node_label, edge_label, height):


def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -564,7 +564,7 @@ def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate kernel with h iterations and add it to the final kernel
# Compute kernel with h iterations and add it to the final kernel
Kmatrix += base_kernel(Gn, node_label, edge_label)
return Kmatrix

+ 13
- 13
gklearn/kernels/weisfeiler_lehman.py View File

@@ -125,12 +125,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def __subtree_kernel_do(self, Gn):
"""Calculate Weisfeiler-Lehman kernels between graphs.
"""Compute Weisfeiler-Lehman kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
Return
------
@@ -152,7 +152,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
# number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori)))
# calculate subtree kernel with the 0th iteration and add it to the final kernel.
# Compute subtree kernel with the 0th iteration and add it to the final kernel.
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
# iterate each height
@@ -198,7 +198,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
# all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp)))
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
self.__compute_gram_matrix(gram_matrix, all_num_of_each_label, Gn)
return gram_matrix
@@ -244,12 +244,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_spkernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman shortest path kernels between graphs.
"""Compute Weisfeiler-Lehman shortest path kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -312,7 +312,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
@@ -326,12 +326,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_edgekernel_do(Gn, node_label, edge_label, height):
"""Calculate Weisfeiler-Lehman edge kernels between graphs.
"""Compute Weisfeiler-Lehman edge kernels between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -390,7 +390,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate subtree kernel with h iterations and add it to the final kernel
# Compute subtree kernel with h iterations and add it to the final kernel
for i in range(0, len(Gn)):
for j in range(i, len(Gn)):
for e1 in Gn[i].edges(data = True):
@@ -403,12 +403,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
"""Calculate Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
Parameters
----------
Gn : List of NetworkX graph
List of graphs between which the kernels are calculated.
List of graphs between which the kernels are computed.
node_label : string
node attribute used as label.
edge_label : string
@@ -463,7 +463,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
for node in G.nodes(data = True):
node[1][node_label] = set_compressed[set_multisets[node[0]]]
# calculate kernel with h iterations and add it to the final kernel
# Compute kernel with h iterations and add it to the final kernel
gram_matrix += base_kernel(Gn, node_label, edge_label)
return gram_matrix


+ 1
- 1
gklearn/utils/parallel.py View File

@@ -63,4 +63,4 @@ def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None,
len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
parallel_me(func, func_assign, Kmatrix, itr, len_itr=len_itr,
init_worker=init_worker, glbv=glbv, method=method, n_jobs=n_jobs,
chunksize=chunksize, itr_desc='calculating kernels', verbose=verbose)
chunksize=chunksize, itr_desc='Computing kernels', verbose=verbose)

Loading…
Cancel
Save