Browse Source

[Major Feature] Graph kernel classes now can compute kernel matrix between two different list of graphs using fit/transform methods which uses the same scheme as the scikit-learn library!

v0.2.x
jajupmochi 4 years ago
parent
commit
bb36d0f507
12 changed files with 825 additions and 254 deletions
  1. +5
    -5
      gklearn/kernels/common_walk.py
  2. +13
    -13
      gklearn/kernels/conjugate_gradient.py
  3. +13
    -13
      gklearn/kernels/fixed_point.py
  4. +379
    -34
      gklearn/kernels/graph_kernel.py
  5. +14
    -14
      gklearn/kernels/marginalized.py
  6. +14
    -14
      gklearn/kernels/path_up_to_h.py
  7. +14
    -14
      gklearn/kernels/shortest_path.py
  8. +19
    -19
      gklearn/kernels/spectral_decomposition.py
  9. +14
    -14
      gklearn/kernels/structural_sp.py
  10. +18
    -18
      gklearn/kernels/sylvester_equation.py
  11. +313
    -87
      gklearn/kernels/treelet.py
  12. +9
    -9
      gklearn/kernels/weisfeiler_lehman.py

+ 5
- 5
gklearn/kernels/common_walk.py View File

@@ -47,7 +47,7 @@ class CommonWalk(GraphKernel):
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self._verbose >= 2))
length=len_itr, verbose=(self.verbose >= 2))


# direct product graph method - exponential # direct product graph method - exponential
if self._compute_method == 'exp': if self._compute_method == 'exp':
@@ -86,7 +86,7 @@ class CommonWalk(GraphKernel):
do_fun = self._wrapper_kernel_do_geo do_fun = self._wrapper_kernel_do_geo


parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=_init_worker_gm,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -100,9 +100,9 @@ class CommonWalk(GraphKernel):


# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
if self._verbose >= 2:
if self.verbose >= 2:
iterator = get_iters(range(len(g_list)), desc='Computing kernels', iterator = get_iters(range(len(g_list)), desc='Computing kernels',
file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
else: else:
iterator = range(len(g_list)) iterator = range(len(g_list))


@@ -148,7 +148,7 @@ class CommonWalk(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered', init_worker=_init_worker_list, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list




+ 13
- 13
gklearn/kernels/conjugate_gradient.py View File

@@ -35,7 +35,7 @@ class ConjugateGradient(RandomWalkMeta):




def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)


lmda = self._weight lmda = self._weight
@@ -44,7 +44,7 @@ class ConjugateGradient(RandomWalkMeta):
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -52,7 +52,7 @@ class ConjugateGradient(RandomWalkMeta):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))


for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
@@ -66,7 +66,7 @@ class ConjugateGradient(RandomWalkMeta):




def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)


# Compute Gram matrix. # Compute Gram matrix.
@@ -74,7 +74,7 @@ class ConjugateGradient(RandomWalkMeta):


# @todo: parallel this. # @todo: parallel this.
# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -86,7 +86,7 @@ class ConjugateGradient(RandomWalkMeta):
do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do


parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -95,7 +95,7 @@ class ConjugateGradient(RandomWalkMeta):




def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])


lmda = self._weight lmda = self._weight
@@ -105,11 +105,11 @@ class ConjugateGradient(RandomWalkMeta):


# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))


for i in iterator: for i in iterator:
kernel = self._kernel_do(g1, g_list[i], lmda) kernel = self._kernel_do(g1, g_list[i], lmda)
@@ -122,7 +122,7 @@ class ConjugateGradient(RandomWalkMeta):




def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])


# compute kernel list. # compute kernel list.
@@ -131,7 +131,7 @@ class ConjugateGradient(RandomWalkMeta):
# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
# @todo: parallel this. # @todo: parallel this.
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -149,7 +149,7 @@ class ConjugateGradient(RandomWalkMeta):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -162,7 +162,7 @@ class ConjugateGradient(RandomWalkMeta):




def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_edge_weight([g1] + [g2], self.verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])


lmda = self._weight lmda = self._weight


+ 13
- 13
gklearn/kernels/fixed_point.py View File

@@ -35,7 +35,7 @@ class FixedPoint(RandomWalkMeta):




def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)


lmda = self._weight lmda = self._weight
@@ -44,7 +44,7 @@ class FixedPoint(RandomWalkMeta):
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout,verbose=(self.verbose >= 2))
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -52,7 +52,7 @@ class FixedPoint(RandomWalkMeta):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))


for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda) kernel = self._kernel_do(self._graphs[i], self._graphs[j], lmda)
@@ -66,7 +66,7 @@ class FixedPoint(RandomWalkMeta):




def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)


# Compute Gram matrix. # Compute Gram matrix.
@@ -74,7 +74,7 @@ class FixedPoint(RandomWalkMeta):


# @todo: parallel this. # @todo: parallel this.
# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] self._graphs = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -86,7 +86,7 @@ class FixedPoint(RandomWalkMeta):
do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do


parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -95,7 +95,7 @@ class FixedPoint(RandomWalkMeta):




def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])


lmda = self._weight lmda = self._weight
@@ -105,12 +105,12 @@ class FixedPoint(RandomWalkMeta):


# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.


iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))


for i in iterator: for i in iterator:
kernel = self._kernel_do(g1, g_list[i], lmda) kernel = self._kernel_do(g1, g_list[i], lmda)
@@ -123,7 +123,7 @@ class FixedPoint(RandomWalkMeta):




def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])


# compute kernel list. # compute kernel list.
@@ -132,7 +132,7 @@ class FixedPoint(RandomWalkMeta):
# Reindex nodes using consecutive integers for the convenience of kernel computation. # Reindex nodes using consecutive integers for the convenience of kernel computation.
g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal') g1 = nx.convert_node_labels_to_integers(g1, first_label=0, label_attribute='label_orignal')
# @todo: parallel this. # @todo: parallel this.
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='Reindex vertices', file=sys.stdout, verbose=(self.verbose >= 2))
g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator] g_list = [nx.convert_node_labels_to_integers(g, first_label=0, label_attribute='label_orignal') for g in iterator]


if self._p is None and self._q is None: # p and q are uniform distributions as default. if self._p is None and self._q is None: # p and q are uniform distributions as default.
@@ -150,7 +150,7 @@ class FixedPoint(RandomWalkMeta):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -163,7 +163,7 @@ class FixedPoint(RandomWalkMeta):




def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_edge_weight([g1] + [g2], self.verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])


lmda = self._weight lmda = self._weight


+ 379
- 34
gklearn/kernels/graph_kernel.py View File

@@ -9,27 +9,372 @@ import numpy as np
import networkx as nx import networkx as nx
import multiprocessing import multiprocessing
import time import time
# from abc import ABC, abstractmethod
from sklearn.base import BaseEstimator # , TransformerMixin
from sklearn.utils.validation import check_is_fitted # check_X_y, check_array,
from sklearn.exceptions import NotFittedError
from gklearn.utils import normalize_gram_matrix from gklearn.utils import normalize_gram_matrix




class GraphKernel(object):
class GraphKernel(BaseEstimator): #, ABC):
"""The basic graph kernel class.


def __init__(self):
self._graphs = None
self._parallel = ''
self._n_jobs = 0
self._verbose = None
self._normalize = True
self._run_time = 0
self._gram_matrix = None
self._gram_matrix_unnorm = None
Attributes
----------
_graphs : list
Stores the input graphs on fit input data.
Default format of the list objects is `NetworkX` graphs.
**We don't guarantee that the input graphs remain unchanged during the
computation.**

References
----------
https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
"""

def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2):
"""`__init__` for `GraphKernel` object."""
# @todo: the default settings of the parameters are different from those in the self.compute method.
# self._graphs = None
self.parallel = parallel
self.n_jobs = n_jobs
self.chunksize = chunksize
self.normalize = normalize
self.verbose = verbose
# self._run_time = 0
# self._gram_matrix = None
# self._gram_matrix_unnorm = None


##########################################################################
# The following is the 1st paradigm to compute kernel matrix, which is
# compatible with `scikit-learn`.
# -------------------------------------------------------------------
# Special thanks to the "GraKeL" library for providing an excellent template!
##########################################################################


def fit(self, X, y=None):
"""Fit a graph dataset for a transformer.

Parameters
----------
X : iterable
DESCRIPTION.

y : None, optional
There is no need of a target in a transformer, yet the `scikit-learn`
pipeline API requires this parameter.

Returns
-------
object
Returns self.

"""
# self._is_tranformed = False

# Clear any prior attributes stored on the estimator, # @todo: unless warm_start is used;
self.clear_attributes()

# X = check_array(X, accept_sparse=True)

# Validate parameters for the transformer.
self.validate_parameters()

# Validate the input.
self._graphs = self.validate_input(X)

# self._X = X
# self._kernel = self._get_kernel_instance()

# Return the transformer.
return self


def transform(self, X):
"""Compute the graph kernel matrix between given and fitted data.

Parameters
----------
X : TYPE
DESCRIPTION.

Raises
------
ValueError
DESCRIPTION.

Returns
-------
None.

"""
# Check if method "fit" had been called.
check_is_fitted(self, '_graphs')

# Validate the input.
Y = self.validate_input(X)

# Transform: compute the graph kernel matrix.
kernel_matrix = self.compute_kernel_matrix(Y)
self._Y = Y

# Self transform must appear before the diagonal call on normilization.
self._is_transformed = True
if self.normalize:
X_diag, Y_diag = self.diagonals()
kernel_matrix /= np.sqrt(np.outer(Y_diag, X_diag))

return kernel_matrix



def fit_transform(self, X):
"""Fit and transform: compute Gram matrix on the same data.

Parameters
----------
X : list of graphs
Input graphs.

Returns
-------
gram_matrix : numpy array, shape = [len(X), len(X)]
The Gram matrix of X.

"""
self.fit(X)

# Transform: compute Gram matrix.
gram_matrix = self.compute_kernel_matrix()

# Normalize.
self._X_diag = np.diagonal(gram_matrix).copy()
if self.normalize:
gram_matrix /= np.sqrt(np.outer(self._X_diag, self._X_diag))

return gram_matrix


def get_params(self):
pass


def set_params(self):
pass


def clear_attributes(self):
if hasattr(self, '_X_diag'):
delattr(self, '_X_diag')
if hasattr(self, '_graphs'):
delattr(self, '_graphs')
if hasattr(self, '_Y'):
delattr(self, '_Y')
if hasattr(self, '_run_time'):
delattr(self, '_run_time')


def validate_parameters(self):
"""Validate all parameters for the transformer.

Returns
-------
None.

"""
if self.parallel is not None and self.parallel != 'imap_unordered':
raise ValueError('Parallel mode is not set correctly.')

if self.parallel == 'imap_unordered' and self.n_jobs is None:
self.n_jobs = multiprocessing.cpu_count()


def validate_input(self, X):
"""Validate the given input and raise errors if it is invalid.

Parameters
----------
X : list
The input to check. Should be a list of graph.

Raises
------
ValueError
Raise if the input is not correct.

Returns
-------
X : list
The input. A list of graph.

"""
if X is None:
raise ValueError('Please add graphs before computing.')
elif not isinstance(X, list):
raise ValueError('Cannot detect graphs.')
elif len(X) == 0:
raise ValueError('The graph list given is empty. No computation will be performed.')

return X


def compute_kernel_matrix(self, Y=None):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) or the Gram matrix for the fitted
graphs (X / self._graphs).

Parameters
----------
Y : list of graphs, optional
The target graphs. The default is None. If None kernel is computed
between X and itself.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
The computed kernel matrix.

"""
if Y is None:
# Compute Gram matrix for self._graphs (X).
kernel_matrix = self._compute_gram_matrix()
# self._gram_matrix_unnorm = np.copy(self._gram_matrix)

else:
# Compute kernel matrix between Y and self._graphs (X).
start_time = time.time()

if self.parallel == 'imap_unordered':
kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y)

elif self.parallel is None:
kernel_matrix = self._compute_kernel_matrix_series(Y)

self._run_time = time.time() - start_time
if self.verbose:
print('Kernel matrix of size (%d, %d) built in %s seconds.'
% (len(Y), len(self._graphs), self._run_time))

return kernel_matrix


def _compute_kernel_matrix_series(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization.

Parameters
----------
Y : list of graphs, optional
The target graphs.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
The computed kernel matrix.

"""
kernel_matrix = np.zeros((len(Y), len(self._graphs)))

for i_y, g_y in enumerate(Y):
for i_x, g_x in enumerate(self._graphs):
kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x)

return kernel_matrix


def _compute_kernel_matrix_imap_unordered(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) using imap unordered parallelization.

Parameters
----------
Y : list of graphs, optional
The target graphs.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
The computed kernel matrix.

"""
raise Exception('Parallelization for kernel matrix is not implemented.')


def diagonals(self):
"""Compute the kernel matrix diagonals of the fit/transformed data.

Returns
-------
X_diag : numpy array
The diagonal of the kernel matrix between the fitted data.
This consists of each element calculated with itself.

Y_diag : numpy array
The diagonal of the kernel matrix, of the transform.
This consists of each element calculated with itself.

"""
# Check if method "fit" had been called.
check_is_fitted(self, ['_graphs'])

# Check if the diagonals of X exist.
try:
check_is_fitted(self, ['_X_diag'])
except NotFittedError:
# Compute diagonals of X.
self._X_diag = np.empty(shape=(len(self._graphs),))
for i, x in enumerate(self._graphs):
self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?

try:
# If transform has happened, return both diagonals.
check_is_fitted(self, ['_Y'])
self._Y_diag = np.empty(shape=(len(self._Y),))
for (i, y) in enumerate(self._Y):
self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?

return self._X_diag, self._Y_diag
except NotFittedError:
# Else just return both X_diag
return self._X_diag


# @abstractmethod
def pairwise_kernel(self, x, y):
"""Compute pairwise kernel between two graphs.

Parameters
----------
x, y : NetworkX Graph.
Graphs bewteen which the kernel is computed.

Returns
-------
kernel: float
The computed kernel.

# Notes
# -----
# This method is abstract and must be implemented by a subclass.

"""
raise NotImplementedError('Pairwise kernel computation is not implemented!')


##########################################################################
# The following is the 2nd paradigm to compute kernel matrix. It is
# simplified and not compatible with `scikit-learn`.
##########################################################################




def compute(self, *graphs, **kwargs): def compute(self, *graphs, **kwargs):
self._parallel = kwargs.get('parallel', 'imap_unordered')
self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self._normalize = kwargs.get('normalize', True)
self._verbose = kwargs.get('verbose', 2)
self.parallel = kwargs.get('parallel', 'imap_unordered')
self.n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
self.normalize = kwargs.get('normalize', True)
self.verbose = kwargs.get('verbose', 2)
self.validate_parameters()


if len(graphs) == 1: if len(graphs) == 1:
if not isinstance(graphs[0], list): if not isinstance(graphs[0], list):
@@ -40,7 +385,7 @@ class GraphKernel(object):
self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow. self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow.
self._gram_matrix = self._compute_gram_matrix() self._gram_matrix = self._compute_gram_matrix()
self._gram_matrix_unnorm = np.copy(self._gram_matrix) self._gram_matrix_unnorm = np.copy(self._gram_matrix)
if self._normalize:
if self.normalize:
self._gram_matrix = normalize_gram_matrix(self._gram_matrix) self._gram_matrix = normalize_gram_matrix(self._gram_matrix)
return self._gram_matrix, self._run_time return self._gram_matrix, self._run_time


@@ -103,15 +448,15 @@ class GraphKernel(object):
def _compute_gram_matrix(self): def _compute_gram_matrix(self):
start_time = time.time() start_time = time.time()


if self._parallel == 'imap_unordered':
if self.parallel == 'imap_unordered':
gram_matrix = self._compute_gm_imap_unordered() gram_matrix = self._compute_gm_imap_unordered()
elif self._parallel is None:
elif self.parallel is None:
gram_matrix = self._compute_gm_series() gram_matrix = self._compute_gm_series()
else: else:
raise Exception('Parallel mode is not set correctly.') raise Exception('Parallel mode is not set correctly.')


self._run_time = time.time() - start_time self._run_time = time.time() - start_time
if self._verbose:
if self.verbose:
print('Gram matrix of size %d built in %s seconds.' print('Gram matrix of size %d built in %s seconds.'
% (len(self._graphs), self._run_time)) % (len(self._graphs), self._run_time))


@@ -129,15 +474,15 @@ class GraphKernel(object):
def _compute_kernel_list(self, g1, g_list): def _compute_kernel_list(self, g1, g_list):
start_time = time.time() start_time = time.time()


if self._parallel == 'imap_unordered':
if self.parallel == 'imap_unordered':
kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
elif self._parallel is None:
elif self.parallel is None:
kernel_list = self._compute_kernel_list_series(g1, g_list) kernel_list = self._compute_kernel_list_series(g1, g_list)
else: else:
raise Exception('Parallel mode is not set correctly.') raise Exception('Parallel mode is not set correctly.')


self._run_time = time.time() - start_time self._run_time = time.time() - start_time
if self._verbose:
if self.verbose:
print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.' print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.'
% (len(g_list), self._run_time)) % (len(g_list), self._run_time))


@@ -158,7 +503,7 @@ class GraphKernel(object):
kernel = self._compute_single_kernel_series(g1, g2) kernel = self._compute_single_kernel_series(g1, g2)


self._run_time = time.time() - start_time self._run_time = time.time() - start_time
if self._verbose:
if self.verbose:
print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time)) print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time))


return kernel return kernel
@@ -185,24 +530,24 @@ class GraphKernel(object):
return self._graphs return self._graphs




@property
def parallel(self):
return self._parallel
# @property
# def parallel(self):
# return self.parallel




@property
def n_jobs(self):
return self._n_jobs
# @property
# def n_jobs(self):
# return self.n_jobs




@property
def verbose(self):
return self._verbose
# @property
# def verbose(self):
# return self.verbose




@property
def normalize(self):
return self._normalize
# @property
# def normalize(self):
# return self.normalize




@property @property


+ 14
- 14
gklearn/kernels/marginalized.py View File

@@ -46,7 +46,7 @@ class Marginalized(GraphKernel):
self._add_dummy_labels(self._graphs) self._add_dummy_labels(self._graphs)


if self._remove_totters: if self._remove_totters:
iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='removing tottering', file=sys.stdout, verbose=(self.verbose >= 2))
# @todo: this may not work. # @todo: this may not work.
self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] self._graphs = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]


@@ -57,7 +57,7 @@ class Marginalized(GraphKernel):
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self._verbose >= 2))
length=len_itr, verbose=(self.verbose >= 2))
for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(self._graphs[i], self._graphs[j]) kernel = self._kernel_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -70,16 +70,16 @@ class Marginalized(GraphKernel):
self._add_dummy_labels(self._graphs) self._add_dummy_labels(self._graphs)


if self._remove_totters: if self._remove_totters:
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = range(0, len(self._graphs)) itr = range(0, len(self._graphs))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
if len(self._graphs) < 100 * self.n_jobs:
chunksize = int(len(self._graphs) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
remove_fun = self._wrapper_untotter remove_fun = self._wrapper_untotter
iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize),
desc='removing tottering', file=sys.stdout, desc='removing tottering', file=sys.stdout,
length=len(self._graphs), verbose=(self._verbose >= 2))
length=len(self._graphs), verbose=(self.verbose >= 2))
for i, g in iterator: for i, g in iterator:
self._graphs[i] = g self._graphs[i] = g
pool.close() pool.close()
@@ -93,7 +93,7 @@ class Marginalized(GraphKernel):
G_gn = gn_toshare G_gn = gn_toshare
do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -103,13 +103,13 @@ class Marginalized(GraphKernel):


if self._remove_totters: if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='removing tottering', file=sys.stdout, verbose=(self.verbose >= 2))
# @todo: this may not work. # @todo: this may not work.
g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator] g_list = [untotterTransformation(G, self._node_labels, self._edge_labels) for G in iterator]


# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
for i in iterator: for i in iterator:
kernel = self._kernel_do(g1, g_list[i]) kernel = self._kernel_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
@@ -122,16 +122,16 @@ class Marginalized(GraphKernel):


if self._remove_totters: if self._remove_totters:
g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work. g1 = untotterTransformation(g1, self._node_labels, self._edge_labels) # @todo: this may not work.
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = range(0, len(g_list)) itr = range(0, len(g_list))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
if len(g_list) < 100 * self.n_jobs:
chunksize = int(len(g_list) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
remove_fun = self._wrapper_untotter remove_fun = self._wrapper_untotter
iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(remove_fun, itr, chunksize),
desc='removing tottering', file=sys.stdout, desc='removing tottering', file=sys.stdout,
length=len(g_list), verbose=(self._verbose >= 2))
length=len(g_list), verbose=(self.verbose >= 2))
for i, g in iterator: for i, g in iterator:
g_list[i] = g g_list[i] = g
pool.close() pool.close()
@@ -151,7 +151,7 @@ class Marginalized(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list




+ 14
- 14
gklearn/kernels/path_up_to_h.py View File

@@ -41,10 +41,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None


from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2) itr_kernel = combinations_with_replacement(range(0, len(self._graphs)), 2)
iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self._verbose >= 2))
iterator_ps = get_iters(range(0, len(self._graphs)), desc='getting paths', file=sys.stdout, length=len(self._graphs), verbose=(self.verbose >= 2))
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator_kernel = get_iters(itr_kernel, desc='Computing kernels', iterator_kernel = get_iters(itr_kernel, desc='Computing kernels',
file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))


gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


@@ -69,10 +69,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None


# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
if len(self._graphs) < 100 * self.n_jobs:
chunksize = int(len(self._graphs) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
all_paths = [[] for _ in range(len(self._graphs))] all_paths = [[] for _ in range(len(self._graphs))]
@@ -84,7 +84,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize),
desc='getting paths', file=sys.stdout, desc='getting paths', file=sys.stdout,
length=len(self._graphs), verbose=(self._verbose >= 2))
length=len(self._graphs), verbose=(self.verbose >= 2))
for i, ps in iterator: for i, ps in iterator:
all_paths[i] = ps all_paths[i] = ps
pool.close() pool.close()
@@ -109,7 +109,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
G_plist = plist_toshare G_plist = plist_toshare
do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this? do_fun = self._wrapper_kernel_do_kernelless # @todo: what is this?
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(all_paths,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(all_paths,), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -117,8 +117,8 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._add_dummy_labels(g_list + [g1]) self._add_dummy_labels(g_list + [g1])


iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self._verbose >= 2))
iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator_ps = get_iters(g_list, desc='getting paths', file=sys.stdout, verbose=(self.verbose >= 2))
iterator_kernel = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))


kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)


@@ -143,10 +143,10 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None


# get all paths of all graphs before computing kernels to save time, # get all paths of all graphs before computing kernels to save time,
# but this may cost a lot of memory for large datasets. # but this may cost a lot of memory for large datasets.
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
if len(g_list) < 100 * self.n_jobs:
chunksize = int(len(g_list) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
paths_g_list = [[] for _ in range(len(g_list))] paths_g_list = [[] for _ in range(len(g_list))]
@@ -161,7 +161,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False) get_ps_fun = partial(self._wrapper_find_all_paths_until_length, False)
iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_ps_fun, itr, chunksize),
desc='getting paths', file=sys.stdout, desc='getting paths', file=sys.stdout,
length=len(g_list), verbose=(self._verbose >= 2))
length=len(g_list), verbose=(self.verbose >= 2))
for i, ps in iterator: for i, ps in iterator:
paths_g_list[i] = ps paths_g_list[i] = ps
pool.close() pool.close()
@@ -180,7 +180,7 @@ class PathUpToH(GraphKernel): # @todo: add function for k_func is None
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(paths_g1, paths_g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list




+ 14
- 14
gklearn/kernels/shortest_path.py View File

@@ -38,7 +38,7 @@ class ShortestPath(GraphKernel):
def _compute_gm_series(self): def _compute_gm_series(self):
self._all_graphs_have_edges(self._graphs) self._all_graphs_have_edges(self._graphs)
# get shortest path graph of each graph. # get shortest path graph of each graph.
iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2))
self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


# compute Gram matrix. # compute Gram matrix.
@@ -48,7 +48,7 @@ class ShortestPath(GraphKernel):
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', iterator = get_iters(itr, desc='Computing kernels',
length=len_itr, file=sys.stdout,verbose=(self._verbose >= 2))
length=len_itr, file=sys.stdout,verbose=(self.verbose >= 2))
for i, j in iterator: for i, j in iterator:
kernel = self._sp_do(self._graphs[i], self._graphs[j]) kernel = self._sp_do(self._graphs[i], self._graphs[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -60,16 +60,16 @@ class ShortestPath(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._all_graphs_have_edges(self._graphs) self._all_graphs_have_edges(self._graphs)
# get shortest path graph of each graph. # get shortest path graph of each graph.
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
get_sp_graphs_fun = self._wrapper_get_sp_graphs get_sp_graphs_fun = self._wrapper_get_sp_graphs
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
if len(self._graphs) < 100 * self.n_jobs:
chunksize = int(len(self._graphs) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
desc='getting sp graphs', file=sys.stdout, desc='getting sp graphs', file=sys.stdout,
length=len(self._graphs), verbose=(self._verbose >= 2))
length=len(self._graphs), verbose=(self.verbose >= 2))
for i, g in iterator: for i, g in iterator:
self._graphs[i] = g self._graphs[i] = g
pool.close() pool.close()
@@ -83,7 +83,7 @@ class ShortestPath(GraphKernel):
G_gs = gs_toshare G_gs = gs_toshare
do_fun = self._wrapper_sp_do do_fun = self._wrapper_sp_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -92,12 +92,12 @@ class ShortestPath(GraphKernel):
self._all_graphs_have_edges([g1] + g_list) self._all_graphs_have_edges([g1] + g_list)
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self._edge_weight) g1 = getSPGraph(g1, edge_weight=self._edge_weight)
iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2))
g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator]


# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
for i in iterator: for i in iterator:
kernel = self._sp_do(g1, g_list[i]) kernel = self._sp_do(g1, g_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
@@ -109,16 +109,16 @@ class ShortestPath(GraphKernel):
self._all_graphs_have_edges([g1] + g_list) self._all_graphs_have_edges([g1] + g_list)
# get shortest path graphs of g1 and each graph in g_list. # get shortest path graphs of g1 and each graph in g_list.
g1 = getSPGraph(g1, edge_weight=self._edge_weight) g1 = getSPGraph(g1, edge_weight=self._edge_weight)
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
get_sp_graphs_fun = self._wrapper_get_sp_graphs get_sp_graphs_fun = self._wrapper_get_sp_graphs
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
if len(g_list) < 100 * self.n_jobs:
chunksize = int(len(g_list) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_sp_graphs_fun, itr, chunksize),
desc='getting sp graphs', file=sys.stdout, desc='getting sp graphs', file=sys.stdout,
length=len(g_list), verbose=(self._verbose >= 2))
length=len(g_list), verbose=(self.verbose >= 2))
for i, g in iterator: for i, g in iterator:
g_list[i] = g g_list[i] = g
pool.close() pool.close()
@@ -137,7 +137,7 @@ class ShortestPath(GraphKernel):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list




+ 19
- 19
gklearn/kernels/spectral_decomposition.py View File

@@ -28,9 +28,9 @@ class SpectralDecomposition(RandomWalkMeta):




def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')


@@ -41,7 +41,7 @@ class SpectralDecomposition(RandomWalkMeta):
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
P_list = [] P_list = []
D_list = [] D_list = []
iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2))
for G in iterator: for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix. # A actually is the transpose of the adjacency matrix.
@@ -58,7 +58,7 @@ class SpectralDecomposition(RandomWalkMeta):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))


for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel) kernel = self._kernel_do(q_T_list[i], q_T_list[j], P_list[i], P_list[j], D_list[i], D_list[j], self._weight, self._sub_kernel)
@@ -74,9 +74,9 @@ class SpectralDecomposition(RandomWalkMeta):




def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')


@@ -87,7 +87,7 @@ class SpectralDecomposition(RandomWalkMeta):
# precompute the spectral decomposition of each graph. # precompute the spectral decomposition of each graph.
P_list = [] P_list = []
D_list = [] D_list = []
iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2))
for G in iterator: for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix. # A actually is the transpose of the adjacency matrix.
@@ -107,7 +107,7 @@ class SpectralDecomposition(RandomWalkMeta):


do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(q_T_list, P_list, D_list), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(q_T_list, P_list, D_list), n_jobs=self.n_jobs, verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -118,9 +118,9 @@ class SpectralDecomposition(RandomWalkMeta):




def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')


@@ -133,7 +133,7 @@ class SpectralDecomposition(RandomWalkMeta):
D1, P1 = np.linalg.eig(A1) D1, P1 = np.linalg.eig(A1)
P_list = [] P_list = []
D_list = [] D_list = []
iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout, verbose=(self.verbose >= 2))
for G in iterator: for G in iterator:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A actually is the transpose of the adjacency matrix. # A actually is the transpose of the adjacency matrix.
@@ -145,7 +145,7 @@ class SpectralDecomposition(RandomWalkMeta):
if self._p is None: # p is uniform distribution as default. if self._p is None: # p is uniform distribution as default.
q_T1 = 1 / nx.number_of_nodes(g1) q_T1 = 1 / nx.number_of_nodes(g1)
q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list] q_T_list = [np.full((1, nx.number_of_nodes(G)), 1 / nx.number_of_nodes(G)) for G in g_list]
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))


for i in iterator: for i in iterator:
kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel) kernel = self._kernel_do(q_T1, q_T_list[i], P1, P_list[i], D1, D_list[i], self._weight, self._sub_kernel)
@@ -160,9 +160,9 @@ class SpectralDecomposition(RandomWalkMeta):




def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')


@@ -175,8 +175,8 @@ class SpectralDecomposition(RandomWalkMeta):
D1, P1 = np.linalg.eig(A1) D1, P1 = np.linalg.eig(A1)
P_list = [] P_list = []
D_list = [] D_list = []
if self._verbose >= 2:
iterator = tqdm(g_list, desc='spectral decompose', file=sys.stdout)
if self.verbose >= 2:
iterator = get_iters(g_list, desc='spectral decompose', file=sys.stdout)
else: else:
iterator = g_list iterator = g_list
for G in iterator: for G in iterator:
@@ -207,7 +207,7 @@ class SpectralDecomposition(RandomWalkMeta):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(q_T1, P1, D1, q_T_list, P_list, D_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -222,9 +222,9 @@ class SpectralDecomposition(RandomWalkMeta):




def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_edge_weight([g1] + [g2], self.verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored. Only works for undirected graphs.') warnings.warn('All labels are ignored. Only works for undirected graphs.')




+ 14
- 14
gklearn/kernels/structural_sp.py View File

@@ -41,7 +41,7 @@ class StructuralSP(GraphKernel):
def _compute_gm_series(self): def _compute_gm_series(self):
# get shortest paths of each graph in the graphs. # get shortest paths of each graph in the graphs.
splist = [] splist = []
iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='getting sp graphs', file=sys.stdout, verbose=(self.verbose >= 2))
if self._compute_method == 'trie': if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self._get_sps_as_trie(g)) splist.append(self._get_sps_as_trie(g))
@@ -56,7 +56,7 @@ class StructuralSP(GraphKernel):
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self._verbose >= 2))
length=len_itr, verbose=(self.verbose >= 2))
if self._compute_method == 'trie': if self._compute_method == 'trie':
for i, j in iterator: for i, j in iterator:
kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j]) kernel = self._ssp_do_trie(self._graphs[i], self._graphs[j], splist[i], splist[j])
@@ -76,10 +76,10 @@ class StructuralSP(GraphKernel):
def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
# get shortest paths of each graph in the graphs. # get shortest paths of each graph in the graphs.
splist = [None] * len(self._graphs) splist = [None] * len(self._graphs)
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
if len(self._graphs) < 100 * self.n_jobs:
chunksize = int(len(self._graphs) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of self._graphs # get shortest path graphs of self._graphs
@@ -89,7 +89,7 @@ class StructuralSP(GraphKernel):
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize),
desc='getting shortest paths', file=sys.stdout, desc='getting shortest paths', file=sys.stdout,
length=len(self._graphs), verbose=(self._verbose >= 2))
length=len(self._graphs), verbose=(self.verbose >= 2))
for i, sp in iterator: for i, sp in iterator:
splist[i] = sp splist[i] = sp
pool.close() pool.close()
@@ -107,7 +107,7 @@ class StructuralSP(GraphKernel):
else: else:
do_fun = self._wrapper_ssp_do_naive do_fun = self._wrapper_ssp_do_naive
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(splist, self._graphs), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(splist, self._graphs), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -117,7 +117,7 @@ class StructuralSP(GraphKernel):
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [] splist = []
iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout, iterator = get_iters(g_list, desc='getting sp graphs', file=sys.stdout,
verbose=(self._verbose >= 2))
verbose=(self.verbose >= 2))
if self._compute_method == 'trie': if self._compute_method == 'trie':
for g in iterator: for g in iterator:
splist.append(self._get_sps_as_trie(g)) splist.append(self._get_sps_as_trie(g))
@@ -128,7 +128,7 @@ class StructuralSP(GraphKernel):
# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
iterator = get_iters(range(len(g_list)), desc='Computing kernels', iterator = get_iters(range(len(g_list)), desc='Computing kernels',
file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
if self._compute_method == 'trie': if self._compute_method == 'trie':
for i in iterator: for i in iterator:
kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i]) kernel = self._ssp_do_trie(g1, g_list[i], sp1, splist[i])
@@ -145,10 +145,10 @@ class StructuralSP(GraphKernel):
# get shortest paths of g1 and each graph in g_list. # get shortest paths of g1 and each graph in g_list.
sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed'])
splist = [None] * len(g_list) splist = [None] * len(g_list)
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
if len(g_list) < 100 * self.n_jobs:
chunksize = int(len(g_list) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
# get shortest path graphs of g_list # get shortest path graphs of g_list
@@ -158,7 +158,7 @@ class StructuralSP(GraphKernel):
get_sps_fun = self._wrapper_get_sps_naive get_sps_fun = self._wrapper_get_sps_naive
iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_sps_fun, itr, chunksize),
desc='getting shortest paths', file=sys.stdout, desc='getting shortest paths', file=sys.stdout,
length=len(g_list), verbose=(self._verbose >= 2))
length=len(g_list), verbose=(self.verbose >= 2))
for i, sp in iterator: for i, sp in iterator:
splist[i] = sp splist[i] = sp
pool.close() pool.close()
@@ -182,7 +182,7 @@ class StructuralSP(GraphKernel):
itr = range(len(g_list)) itr = range(len(g_list))
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list




+ 18
- 18
gklearn/kernels/sylvester_equation.py View File

@@ -27,9 +27,9 @@ class SylvesterEquation(RandomWalkMeta):




def _compute_gm_series(self): def _compute_gm_series(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored.') warnings.warn('All labels are ignored.')


@@ -41,7 +41,7 @@ class SylvesterEquation(RandomWalkMeta):
if self._q is None: if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]
# # normalized adjacency matrices # # normalized adjacency matrices
# A_wave_list = [] # A_wave_list = []
@@ -55,7 +55,7 @@ class SylvesterEquation(RandomWalkMeta):
from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self._verbose >= 2))
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))


for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda) kernel = self._kernel_do(A_wave_list[i], A_wave_list[j], lmda)
@@ -71,9 +71,9 @@ class SylvesterEquation(RandomWalkMeta):




def _compute_gm_imap_unordered(self): def _compute_gm_imap_unordered(self):
self._check_edge_weight(self._graphs, self._verbose)
self._check_edge_weight(self._graphs, self.verbose)
self._check_graphs(self._graphs) self._check_graphs(self._graphs)
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored.') warnings.warn('All labels are ignored.')


@@ -83,7 +83,7 @@ class SylvesterEquation(RandomWalkMeta):
if self._q is None: if self._q is None:
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(self._graphs, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?


if self._p is None: # p is uniform distribution as default. if self._p is None: # p is uniform distribution as default.
@@ -94,7 +94,7 @@ class SylvesterEquation(RandomWalkMeta):
do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do


parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(A_wave_list,), n_jobs=self.n_jobs, verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -105,9 +105,9 @@ class SylvesterEquation(RandomWalkMeta):




def _compute_kernel_list_series(self, g1, g_list): def _compute_kernel_list_series(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored.') warnings.warn('All labels are ignored.')


@@ -120,11 +120,11 @@ class SylvesterEquation(RandomWalkMeta):
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator]


if self._p is None: # p is uniform distribution as default. if self._p is None: # p is uniform distribution as default.
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))


for i in iterator: for i in iterator:
kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda) kernel = self._kernel_do(A_wave_1, A_wave_list[i], lmda)
@@ -139,9 +139,9 @@ class SylvesterEquation(RandomWalkMeta):




def _compute_kernel_list_imap_unordered(self, g1, g_list): def _compute_kernel_list_imap_unordered(self, g1, g_list):
self._check_edge_weight(g_list + [g1], self._verbose)
self._check_edge_weight(g_list + [g1], self.verbose)
self._check_graphs(g_list + [g1]) self._check_graphs(g_list + [g1])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored.') warnings.warn('All labels are ignored.')


@@ -152,7 +152,7 @@ class SylvesterEquation(RandomWalkMeta):
# don't normalize adjacency matrices if q is a uniform vector. Note # don't normalize adjacency matrices if q is a uniform vector. Note
# A_wave_list actually contains the transposes of the adjacency matrices. # A_wave_list actually contains the transposes of the adjacency matrices.
A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose()
iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='compute adjacency matrices', file=sys.stdout, verbose=(self.verbose >= 2))
A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel?


if self._p is None: # p is uniform distribution as default. if self._p is None: # p is uniform distribution as default.
@@ -169,7 +169,7 @@ class SylvesterEquation(RandomWalkMeta):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


else: # @todo else: # @todo
pass pass
@@ -184,9 +184,9 @@ class SylvesterEquation(RandomWalkMeta):




def _compute_single_kernel_series(self, g1, g2): def _compute_single_kernel_series(self, g1, g2):
self._check_edge_weight([g1] + [g2], self._verbose)
self._check_edge_weight([g1] + [g2], self.verbose)
self._check_graphs([g1] + [g2]) self._check_graphs([g1] + [g2])
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('All labels are ignored.') warnings.warn('All labels are ignored.')




+ 313
- 87
gklearn/kernels/treelet.py View File

@@ -18,6 +18,8 @@ import numpy as np
import networkx as nx import networkx as nx
from collections import Counter from collections import Counter
from itertools import chain from itertools import chain
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError
from gklearn.utils import SpecialLabel from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs from gklearn.utils.utils import find_all_paths, get_mlti_dim_node_attrs
@@ -26,14 +28,211 @@ from gklearn.kernels import GraphKernel


class Treelet(GraphKernel): class Treelet(GraphKernel):


def __init__(self, **kwargs):
GraphKernel.__init__(self)
self._node_labels = kwargs.get('node_labels', [])
self._edge_labels = kwargs.get('edge_labels', [])
self._sub_kernel = kwargs.get('sub_kernel', None)
self._ds_infos = kwargs.get('ds_infos', {})
if self._sub_kernel is None:
raise Exception('Sub kernel not set.')
def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs):
"""Initialise a treelet kernel.
"""
super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose)
self.node_labels = kwargs.get('node_labels', [])
self.edge_labels = kwargs.get('edge_labels', [])
self.sub_kernel = kwargs.get('sub_kernel', None)
self.ds_infos = kwargs.get('ds_infos', {})
self.precompute_canonkeys = precompute_canonkeys
self.save_canonkeys = save_canonkeys


##########################################################################
# The following is the 1st paradigm to compute kernel matrix, which is
# compatible with `scikit-learn`.
# -------------------------------------------------------------------
# Special thanks to the "GraKeL" library for providing an excellent template!
##########################################################################


def clear_attributes(self):
super().clear_attributes()
if hasattr(self, '_canonkeys'):
delattr(self, '_canonkeys')
if hasattr(self, '_Y_canonkeys'):
delattr(self, '_Y_canonkeys')
if hasattr(self, '_dummy_labels_considered'):
delattr(self, '_dummy_labels_considered')


def validate_parameters(self):
"""Validate all parameters for the transformer.

Returns
-------
None.

"""
super().validate_parameters()
if self.sub_kernel is None:
raise ValueError('Sub-kernel not set.')


def _compute_kernel_matrix_series(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization.

Parameters
----------
Y : list of graphs, optional
The target graphs.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
The computed kernel matrix.

"""

# self._add_dummy_labels will modify the input in place.
self._add_dummy_labels() # For self._graphs
# Y = [g.copy() for g in Y] # @todo: ?
self._add_dummy_labels(Y)

# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.

# Canonical keys for self._graphs.
try:
check_is_fitted(self, ['_canonkeys'])
canonkeys_list1 = self._canonkeys
except NotFittedError:
canonkeys_list1 = []
iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator:
canonkeys_list1.append(self._get_canonkeys(g))

if self.save_canonkeys:
self._canonkeys = canonkeys_list1

# Canonical keys for Y.
canonkeys_list2 = []
iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator:
canonkeys_list2.append(self._get_canonkeys(g))

if self.save_canonkeys:
self._Y_canonkeys = canonkeys_list2

# compute kernel matrix.
kernel_matrix = np.zeros((len(Y), len(canonkeys_list1)))

from itertools import product
itr = product(range(len(Y)), range(len(canonkeys_list1)))
len_itr = int(len(Y) * len(canonkeys_list1))
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self.verbose >= 2))
for i_y, i_x in iterator:
kernel = self._kernel_do(canonkeys_list2[i_y], canonkeys_list1[i_x])
kernel_matrix[i_y][i_x] = kernel

return kernel_matrix


def _compute_kernel_matrix_imap_unordered(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) using imap unordered parallelization.

Parameters
----------
Y : list of graphs, optional
The target graphs.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
The computed kernel matrix.

"""
raise Exception('Parallelization for kernel matrix is not implemented.')


def pairwise_kernel(self, x, y, are_keys=False):
"""Compute pairwise kernel between two graphs.

Parameters
----------
x, y : NetworkX Graph.
Graphs bewteen which the kernel is computed.

are_keys : boolean, optional
If `True`, `x` and `y` are canonical keys, otherwise are graphs.
The default is False.

Returns
-------
kernel: float
The computed kernel.

"""
if are_keys:
# x, y are canonical keys.
kernel = self._kernel_do(x, y)

else:
# x, y are graphs.
kernel = self._compute_single_kernel_series(x, y)

return kernel


def diagonals(self):
"""Compute the kernel matrix diagonals of the fit/transformed data.

Returns
-------
X_diag : numpy array
The diagonal of the kernel matrix between the fitted data.
This consists of each element calculated with itself.

Y_diag : numpy array
The diagonal of the kernel matrix, of the transform.
This consists of each element calculated with itself.

"""
# Check if method "fit" had been called.
check_is_fitted(self, ['_graphs'])

# Check if the diagonals of X exist.
try:
check_is_fitted(self, ['_X_diag'])
except NotFittedError:
# Compute diagonals of X.
self._X_diag = np.empty(shape=(len(self._graphs),))
try:
check_is_fitted(self, ['_canonkeys'])
for i, x in enumerate(self._canonkeys):
self._X_diag[i] = self.pairwise_kernel(x, x, are_keys=True) # @todo: parallel?
except NotFittedError:
for i, x in enumerate(self._graphs):
self._X_diag[i] = self.pairwise_kernel(x, x, are_keys=False) # @todo: parallel?

try:
# If transform has happened, return both diagonals.
check_is_fitted(self, ['_Y'])
self._Y_diag = np.empty(shape=(len(self._Y),))
try:
check_is_fitted(self, ['_Y_canonkeys'])
for (i, y) in enumerate(self._Y_canonkeys):
self._Y_diag[i] = self.pairwise_kernel(y, y, are_keys=True) # @todo: parallel?
except NotFittedError:
for (i, y) in enumerate(self._Y):
self._Y_diag[i] = self.pairwise_kernel(y, y, are_keys=False) # @todo: parallel?

return self._X_diag, self._Y_diag

except NotFittedError:
# Else just return both X_diag
return self._X_diag


##########################################################################
# The following is the 2nd paradigm to compute kernel matrix. It is
# simplified and not compatible with `scikit-learn`.
##########################################################################




def _compute_gm_series(self): def _compute_gm_series(self):
@@ -43,10 +242,13 @@ class Treelet(GraphKernel):
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys = [] canonkeys = []
iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout,
verbose=(self._verbose >= 2))
verbose=(self.verbose >= 2))
for g in iterator: for g in iterator:
canonkeys.append(self._get_canonkeys(g)) canonkeys.append(self._get_canonkeys(g))


if self.save_canonkeys:
self._canonkeys = canonkeys

# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


@@ -54,7 +256,7 @@ class Treelet(GraphKernel):
itr = combinations_with_replacement(range(0, len(self._graphs)), 2) itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self._verbose >= 2))
length=len_itr, verbose=(self.verbose >= 2))
for i, j in iterator: for i, j in iterator:
kernel = self._kernel_do(canonkeys[i], canonkeys[j]) kernel = self._kernel_do(canonkeys[i], canonkeys[j])
gram_matrix[i][j] = kernel gram_matrix[i][j] = kernel
@@ -68,22 +270,25 @@ class Treelet(GraphKernel):


# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(self._graphs, range(0, len(self._graphs))) itr = zip(self._graphs, range(0, len(self._graphs)))
if len(self._graphs) < 100 * self._n_jobs:
chunksize = int(len(self._graphs) / self._n_jobs) + 1
if len(self._graphs) < 100 * self.n_jobs:
chunksize = int(len(self._graphs) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
canonkeys = [[] for _ in range(len(self._graphs))] canonkeys = [[] for _ in range(len(self._graphs))]
get_fun = self._wrapper_get_canonkeys get_fun = self._wrapper_get_canonkeys
iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize),
desc='getting canonkeys', file=sys.stdout, desc='getting canonkeys', file=sys.stdout,
length=len(self._graphs), verbose=(self._verbose >= 2))
length=len(self._graphs), verbose=(self.verbose >= 2))
for i, ck in iterator: for i, ck in iterator:
canonkeys[i] = ck canonkeys[i] = ck
pool.close() pool.close()
pool.join() pool.join()


if self.save_canonkeys:
self._canonkeys = canonkeys

# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))


@@ -92,7 +297,7 @@ class Treelet(GraphKernel):
G_canonkeys = canonkeys_toshare G_canonkeys = canonkeys_toshare
do_fun = self._wrapper_kernel_do do_fun = self._wrapper_kernel_do
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(canonkeys,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(canonkeys,), n_jobs=self.n_jobs, verbose=self.verbose)


return gram_matrix return gram_matrix


@@ -104,13 +309,13 @@ class Treelet(GraphKernel):
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self._get_canonkeys(g1) canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [] canonkeys_list = []
iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self._verbose >= 2))
iterator = get_iters(g_list, desc='getting canonkeys', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator: for g in iterator:
canonkeys_list.append(self._get_canonkeys(g)) canonkeys_list.append(self._get_canonkeys(g))


# compute kernel list. # compute kernel list.
kernel_list = [None] * len(g_list) kernel_list = [None] * len(g_list)
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self._verbose >= 2))
iterator = get_iters(range(len(g_list)), desc='Computing kernels', file=sys.stdout, length=len(g_list), verbose=(self.verbose >= 2))
for i in iterator: for i in iterator:
kernel = self._kernel_do(canonkeys_1, canonkeys_list[i]) kernel = self._kernel_do(canonkeys_1, canonkeys_list[i])
kernel_list[i] = kernel kernel_list[i] = kernel
@@ -125,16 +330,16 @@ class Treelet(GraphKernel):
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys_1 = self._get_canonkeys(g1) canonkeys_1 = self._get_canonkeys(g1)
canonkeys_list = [[] for _ in range(len(g_list))] canonkeys_list = [[] for _ in range(len(g_list))]
pool = Pool(self._n_jobs)
pool = Pool(self.n_jobs)
itr = zip(g_list, range(0, len(g_list))) itr = zip(g_list, range(0, len(g_list)))
if len(g_list) < 100 * self._n_jobs:
chunksize = int(len(g_list) / self._n_jobs) + 1
if len(g_list) < 100 * self.n_jobs:
chunksize = int(len(g_list) / self.n_jobs) + 1
else: else:
chunksize = 100 chunksize = 100
get_fun = self._wrapper_get_canonkeys get_fun = self._wrapper_get_canonkeys
iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize), iterator = get_iters(pool.imap_unordered(get_fun, itr, chunksize),
desc='getting canonkeys', file=sys.stdout, desc='getting canonkeys', file=sys.stdout,
length=len(g_list), verbose=(self._verbose >= 2))
length=len(g_list), verbose=(self.verbose >= 2))
for i, ck in iterator: for i, ck in iterator:
canonkeys_list[i] = ck canonkeys_list[i] = ck
pool.close() pool.close()
@@ -154,7 +359,7 @@ class Treelet(GraphKernel):
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered', init_worker=init_worker, glbv=(canonkeys_1, canonkeys_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)


return kernel_list return kernel_list


@@ -187,7 +392,7 @@ class Treelet(GraphKernel):
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
kernel = self._sub_kernel(vector1, vector2)
kernel = self.sub_kernel(vector1, vector2)
return kernel return kernel




@@ -223,7 +428,7 @@ class Treelet(GraphKernel):
patterns['0'] = list(G.nodes()) patterns['0'] = list(G.nodes())
canonkey['0'] = nx.number_of_nodes(G) canonkey['0'] = nx.number_of_nodes(G)
for i in range(1, 6): # for i in range(1, 6): for i in range(1, 6): # for i in range(1, 6):
patterns[str(i)] = find_all_paths(G, i, self._ds_infos['directed'])
patterns[str(i)] = find_all_paths(G, i, self.ds_infos['directed'])
canonkey[str(i)] = len(patterns[str(i)]) canonkey[str(i)] = len(patterns[str(i)])


# n-star patterns # n-star patterns
@@ -317,11 +522,11 @@ class Treelet(GraphKernel):
### pattern obtained in the structural analysis section above, which is a ### pattern obtained in the structural analysis section above, which is a
### string corresponding to a unique treelet. A dictionary is built to keep ### string corresponding to a unique treelet. A dictionary is built to keep
### track of the amount of every treelet. ### track of the amount of every treelet.
if len(self._node_labels) > 0 or len(self._edge_labels) > 0:
if len(self.node_labels) > 0 or len(self.edge_labels) > 0:
canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet. canonkey_l = {} # canonical key, a dictionary which keeps track of amount of every treelet.


# linear patterns # linear patterns
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self._node_labels))
canonkey_t = Counter(get_mlti_dim_node_attrs(G, self.node_labels))
for key in canonkey_t: for key in canonkey_t:
canonkey_l[('0', key)] = canonkey_t[key] canonkey_l[('0', key)] = canonkey_t[key]


@@ -330,9 +535,9 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i)]: for pattern in patterns[str(i)]:
canonlist = [] canonlist = []
for idx, node in enumerate(pattern[:-1]): for idx, node in enumerate(pattern[:-1]):
canonlist.append(tuple(G.nodes[node][nl] for nl in self._node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self._edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self._node_labels))
canonlist.append(tuple(G.nodes[node][nl] for nl in self.node_labels))
canonlist.append(tuple(G[node][pattern[idx+1]][el] for el in self.edge_labels))
canonlist.append(tuple(G.nodes[pattern[-1]][nl] for nl in self.node_labels))
canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1] canonkey_t = canonlist if canonlist < canonlist[::-1] else canonlist[::-1]
treelet.append(tuple([str(i)] + canonkey_t)) treelet.append(tuple([str(i)] + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -343,13 +548,13 @@ class Treelet(GraphKernel):
for pattern in patterns[str(i) + 'star']: for pattern in patterns[str(i) + 'star']:
canonlist = [] canonlist = []
for leaf in pattern[1:]: for leaf in pattern[1:]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['d' if i == 5 else str(i * 2)] + canonkey_t = tuple(['d' if i == 5 else str(i * 2)] +
[tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
[tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)]
+ canonlist) + canonlist)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -359,17 +564,17 @@ class Treelet(GraphKernel):
for pattern in patterns['7']: for pattern in patterns['7']:
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['7'] canonkey_t = tuple(['7']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))


@@ -378,38 +583,38 @@ class Treelet(GraphKernel):
for pattern in patterns['11']: for pattern in patterns['11']:
canonlist = [] canonlist = []
for leaf in pattern[1:4]: for leaf in pattern[1:4]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonlist = list(chain.from_iterable(canonlist)) canonlist = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['b'] canonkey_t = tuple(['b']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)])
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[4]][pattern[0]][el] for el in self.edge_labels)]
+ [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[5]][pattern[4]][el] for el in self.edge_labels)])
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))


# pattern 10 # pattern 10
treelet = [] treelet = []
for pattern in patterns['10']: for pattern in patterns['10']:
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self._edge_labels)]
canonkey4 = [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels),
tuple(G[pattern[5]][pattern[4]][el] for el in self.edge_labels)]
canonlist = [] canonlist = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels)
canonlist.append(tuple((nlabels, elabels))) canonlist.append(tuple((nlabels, elabels)))
canonlist.sort() canonlist.sort()
canonkey0 = list(chain.from_iterable(canonlist)) canonkey0 = list(chain.from_iterable(canonlist))
canonkey_t = tuple(['a'] canonkey_t = tuple(['a']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)]
+ [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[4]][pattern[3]][el] for el in self.edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.edge_labels)]
+ canonkey4 + canonkey0) + canonkey4 + canonkey0)
treelet.append(canonkey_t) treelet.append(canonkey_t)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -419,15 +624,15 @@ class Treelet(GraphKernel):
for pattern in patterns['12']: for pattern in patterns['12']:
canonlist0 = [] canonlist0 = []
for leaf in pattern[1:3]: for leaf in pattern[1:3]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[0]][el] for el in self.edge_labels)
canonlist0.append(tuple((nlabels, elabels))) canonlist0.append(tuple((nlabels, elabels)))
canonlist0.sort() canonlist0.sort()
canonlist0 = list(chain.from_iterable(canonlist0)) canonlist0 = list(chain.from_iterable(canonlist0))
canonlist3 = [] canonlist3 = []
for leaf in pattern[4:6]: for leaf in pattern[4:6]:
nlabels = tuple(G.nodes[leaf][nl] for nl in self._node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self._edge_labels)
nlabels = tuple(G.nodes[leaf][nl] for nl in self.node_labels)
elabels = tuple(G[leaf][pattern[3]][el] for el in self.edge_labels)
canonlist3.append(tuple((nlabels, elabels))) canonlist3.append(tuple((nlabels, elabels)))
canonlist3.sort() canonlist3.sort()
canonlist3 = list(chain.from_iterable(canonlist3)) canonlist3 = list(chain.from_iterable(canonlist3))
@@ -435,14 +640,14 @@ class Treelet(GraphKernel):
# 2 possible key can be generated from 2 nodes with extended label 3, # 2 possible key can be generated from 2 nodes with extended label 3,
# select the one with lower lexicographic order. # select the one with lower lexicographic order.
canonkey_t1 = tuple(['c'] canonkey_t1 = tuple(['c']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)] + canonlist0
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)]
+ canonlist3) + canonlist3)
canonkey_t2 = tuple(['c'] canonkey_t2 = tuple(['c']
+ [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self._edge_labels)]
+ [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels)] + canonlist3
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)]
+ [tuple(G[pattern[0]][pattern[3]][el] for el in self.edge_labels)]
+ canonlist0) + canonlist0)
treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2) treelet.append(canonkey_t1 if canonkey_t1 < canonkey_t2 else canonkey_t2)
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))
@@ -450,24 +655,24 @@ class Treelet(GraphKernel):
# pattern 9 # pattern 9
treelet = [] treelet = []
for pattern in patterns['9']: for pattern in patterns['9']:
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self._node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self._edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self._node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self._edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self._node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self._edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self._node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self._edge_labels)]
canonkey2 = [tuple(G.nodes[pattern[4]][nl] for nl in self.node_labels),
tuple(G[pattern[4]][pattern[2]][el] for el in self.edge_labels)]
canonkey3 = [tuple(G.nodes[pattern[5]][nl] for nl in self.node_labels),
tuple(G[pattern[5]][pattern[3]][el] for el in self.edge_labels)]
prekey2 = [tuple(G.nodes[pattern[2]][nl] for nl in self.node_labels),
tuple(G[pattern[2]][pattern[0]][el] for el in self.edge_labels)]
prekey3 = [tuple(G.nodes[pattern[3]][nl] for nl in self.node_labels),
tuple(G[pattern[3]][pattern[0]][el] for el in self.edge_labels)]
if prekey2 + canonkey2 < prekey3 + canonkey3: if prekey2 + canonkey2 < prekey3 + canonkey3:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.edge_labels)] \
+ prekey2 + prekey3 + canonkey2 + canonkey3 + prekey2 + prekey3 + canonkey2 + canonkey3
else: else:
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self._node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self._edge_labels)] \
canonkey_t = [tuple(G.nodes[pattern[1]][nl] for nl in self.node_labels)] \
+ [tuple(G[pattern[1]][pattern[0]][el] for el in self.edge_labels)] \
+ prekey3 + prekey2 + canonkey3 + canonkey2 + prekey3 + prekey2 + canonkey3 + canonkey2
treelet.append(tuple(['9'] treelet.append(tuple(['9']
+ [tuple(G.nodes[pattern[0]][nl] for nl in self._node_labels)]
+ [tuple(G.nodes[pattern[0]][nl] for nl in self.node_labels)]
+ canonkey_t)) + canonkey_t))
canonkey_l.update(Counter(treelet)) canonkey_l.update(Counter(treelet))


@@ -482,12 +687,33 @@ class Treelet(GraphKernel):
return i, self._get_canonkeys(g) return i, self._get_canonkeys(g)




def _add_dummy_labels(self, Gn):
if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self._node_labels = [SpecialLabel.DUMMY]
if len(self._edge_labels) == 0 or (len(self._edge_labels) == 1 and self._edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self._edge_labels = [SpecialLabel.DUMMY]
def _add_dummy_labels(self, Gn=None):
def _add_dummy(Gn):
if len(self.node_labels) == 0 or (len(self.node_labels) == 1 and self.node_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.node_labels = [SpecialLabel.DUMMY]
if len(self.edge_labels) == 0 or (len(self.edge_labels) == 1 and self.edge_labels[0] == SpecialLabel.DUMMY):
for i in range(len(Gn)):
nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY)
self.edge_labels = [SpecialLabel.DUMMY]

if Gn is None or Gn is self._graphs:
# Add dummy labels for the copy of self._graphs.
try:
check_is_fitted(self, ['_dummy_labels_considered'])
if not self._dummy_labels_considered:
Gn = self._graphs # @todo: ?[g.copy() for g in self._graphs]
_add_dummy(Gn)
self._graphs = Gn
self._dummy_labels_considered = True
except NotFittedError:
Gn = self._graphs # @todo: ?[g.copy() for g in self._graphs]
_add_dummy(Gn)
self._graphs = Gn
self._dummy_labels_considered = True

else:
# Add dummy labels for the input.
_add_dummy(Gn)


+ 9
- 9
gklearn/kernels/weisfeiler_lehman.py View File

@@ -33,7 +33,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.




def _compute_gm_series(self): def _compute_gm_series(self):
# if self._verbose >= 2:
# if self.verbose >= 2:
# import warnings # import warnings
# warnings.warn('A part of the computation is parallelized.') # warnings.warn('A part of the computation is parallelized.')


@@ -74,17 +74,17 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
G_gn = gn_toshare G_gn = gn_toshare
do_fun = self._wrapper_pairwise do_fun = self._wrapper_pairwise
parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)
return gram_matrix return gram_matrix
else: else:
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
return self._compute_gm_series() return self._compute_gm_series()




def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
# if self._verbose >= 2:
# if self.verbose >= 2:
# import warnings # import warnings
# warnings.warn('A part of the computation is parallelized.') # warnings.warn('A part of the computation is parallelized.')


@@ -126,10 +126,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
len_itr = len(g_list) len_itr = len(g_list)
parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)
return kernel_list return kernel_list
else: else:
if self._verbose >= 2:
if self.verbose >= 2:
import warnings import warnings
warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
return self._compute_kernel_list_series(g1, g_list) return self._compute_kernel_list_series(g1, g_list)
@@ -332,15 +332,15 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
def _compute_gram_itr(self, gram_matrix, all_num_of_each_label): def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
"""Compute Gram matrix using the base kernel. """Compute Gram matrix using the base kernel.
""" """
# if self._parallel == 'imap_unordered':
# if self.parallel == 'imap_unordered':
# # compute kernels. # # compute kernels.
# def init_worker(alllabels_toshare): # def init_worker(alllabels_toshare):
# global G_alllabels # global G_alllabels
# G_alllabels = alllabels_toshare # G_alllabels = alllabels_toshare
# do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix) # do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
# parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, # parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
# glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose)
# elif self._parallel is None:
# glbv=(all_num_of_each_label,), n_jobs=self.n_jobs, verbose=self.verbose)
# elif self.parallel is None:
for i in range(len(gram_matrix)): for i in range(len(gram_matrix)):
for j in range(i, len(gram_matrix)): for j in range(i, len(gram_matrix)):
gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],


Loading…
Cancel
Save