diff --git a/gklearn/kernels/__init__.py b/gklearn/kernels/__init__.py index 9cba06d..8c83f70 100644 --- a/gklearn/kernels/__init__.py +++ b/gklearn/kernels/__init__.py @@ -10,6 +10,8 @@ __date__ = "November 2018" from gklearn.kernels.graph_kernel import GraphKernel from gklearn.kernels.common_walk import CommonWalk from gklearn.kernels.marginalized import Marginalized +from gklearn.kernels.random_walk import RandomWalk +from gklearn.kernels.sylvester_equation import SylvesterEquation from gklearn.kernels.shortest_path import ShortestPath from gklearn.kernels.structural_sp import StructuralSP from gklearn.kernels.path_up_to_h import PathUpToH diff --git a/gklearn/kernels/common_walk.py b/gklearn/kernels/common_walk.py index b892b5c..f6ee71d 100644 --- a/gklearn/kernels/common_walk.py +++ b/gklearn/kernels/common_walk.py @@ -268,7 +268,7 @@ class CommonWalk(GraphKernel): def __check_graphs(self, Gn): for g in Gn: if nx.number_of_nodes(g) == 1: - raise Exception('Graphs must contain more than 1 node to construct adjacency matrices.') + raise Exception('Graphs must contain more than 1 nodes to construct adjacency matrices.') def __add_dummy_labels(self, Gn): diff --git a/gklearn/kernels/random_walk.py b/gklearn/kernels/random_walk.py new file mode 100644 index 0000000..f2d0961 --- /dev/null +++ b/gklearn/kernels/random_walk.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 16:55:17 2020 + +@author: ljia + +@references: + + [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. +""" + +import sys +from tqdm import tqdm +import numpy as np +import networkx as nx +from gklearn.utils import SpecialLabel +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.utils.utils import direct_product_graph +from gklearn.kernels import GraphKernel + + +class RandomWalk(GraphKernel): + + + def __init__(self, **kwargs): + GraphKernel.__init__(self) + self._compute_method = kwargs.get('compute_method', None) + self._weight = kwargs.get('weight', 1) + self._p = kwargs.get('p', None) + self._q = kwargs.get('q', None) + self._edge_weight = kwargs.get('edge_weight', None) + self._ds_infos = kwargs.get('ds_infos', {}) + + self._compute_method = self.__compute_method.lower() + + + def _compute_gm_series(self): + pass + + + def _compute_gm_imap_unordered(self): + pass + + + def _compute_kernel_list_series(self, g1, g_list): + pass + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + pass + + + def _compute_single_kernel_series(self, g1, g2): + pass + + + def _check_graphs(self, Gn): + # remove graphs with no edges, as no walk can be found in their structures, + # so the weight matrix between such a graph and itself might be zero. + for g in Gn: + if nx.number_of_edges(g) == 0: + raise Exception('Graphs must contain edges to construct weight matrices.') + + + def _check_edge_weight(self, G0, verbose): + eweight = None + if self._edge_weight == None: + if verbose >= 2: + print('\n None edge weight is specified. Set all weight to 1.\n') + else: + try: + some_weight = list(nx.get_edge_attributes(G0, self._edge_weight).values())[0] + if isinstance(some_weight, float) or isinstance(some_weight, int): + eweight = self._edge_weight + else: + if verbose >= 2: + print('\n Edge weight with name %s is not float or integer. Set all weight to 1.\n' % self._edge_weight) + except: + if verbose >= 2: + print('\n Edge weight with name "%s" is not found in the edge attributes. Set all weight to 1.\n' % self._edge_weight) + + self._edge_weight = eweight + + + def _add_dummy_labels(self, Gn): + if len(self.__node_labels) == 0 or (len(self.__node_labels) == 1 and self.__node_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__node_labels = [SpecialLabel.DUMMY] + if len(self.__edge_labels) == 0 or (len(self.__edge_labels) == 1 and self.__edge_labels[0] == SpecialLabel.DUMMY): + for i in range(len(Gn)): + nx.set_edge_attributes(Gn[i], '0', SpecialLabel.DUMMY) + self.__edge_labels = [SpecialLabel.DUMMY] \ No newline at end of file diff --git a/gklearn/kernels/sylvester_equation.py b/gklearn/kernels/sylvester_equation.py new file mode 100644 index 0000000..3879b59 --- /dev/null +++ b/gklearn/kernels/sylvester_equation.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Wed Aug 19 17:24:46 2020 + +@author: ljia + +@references: + + [1] S Vichy N Vishwanathan, Nicol N Schraudolph, Risi Kondor, and Karsten M Borgwardt. Graph kernels. Journal of Machine Learning Research, 11(Apr):1201–1242, 2010. +""" + +import sys +from tqdm import tqdm +import numpy as np +import networkx as nx +from control import dlyap +from gklearn.utils.parallel import parallel_gm, parallel_me +from gklearn.kernels import RandomWalk + + +class SylvesterEquation(RandomWalk): + + + def __init__(self, **kwargs): + RandomWalk.__init__(self, **kwargs) + + + def _compute_gm_series(self): + self._check_edge_weight(self._graphs) + self._check_graphs(self._graphs) + if self._verbose >= 2: + import warnings + warnings.warn('All labels are ignored.') + + lmda = self._weight + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + if self._q == None: + # don't normalize adjacency matrices if q is a uniform vector. Note + # A_wave_list actually contains the transposes of the adjacency matrices. + if self._verbose >= 2: + iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) + else: + iterator = self._graphs + A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] + # # normalized adjacency matrices + # A_wave_list = [] + # for G in tqdm(Gn, desc='compute adjacency matrices', file=sys.stdout): + # A_tilde = nx.adjacency_matrix(G, eweight).todense().transpose() + # norm = A_tilde.sum(axis=0) + # norm[norm == 0] = 1 + # A_wave_list.append(A_tilde / norm) + + if self._p == None: # p is uniform distribution as default. + from itertools import combinations_with_replacement + itr = combinations_with_replacement(range(0, len(self._graphs)), 2) + if self._verbose >= 2: + iterator = tqdm(itr, desc='calculating kernels', file=sys.stdout) + else: + iterator = itr + + for i, j in iterator: + kernel = self.__kernel_do(A_wave_list[i], A_wave_list[j], lmda) + gram_matrix[i][j] = kernel + gram_matrix[j][i] = kernel + + else: # @todo + pass + else: # @todo + pass + + return gram_matrix + + + def _compute_gm_imap_unordered(self): + self._check_edge_weight(self._graphs) + self._check_graphs(self._graphs) + if self._verbose >= 2: + import warnings + warnings.warn('All labels are ignored.') + + # compute Gram matrix. + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + if self._q == None: + # don't normalize adjacency matrices if q is a uniform vector. Note + # A_wave_list actually contains the transposes of the adjacency matrices. + if self._verbose >= 2: + iterator = tqdm(self._graphs, desc='compute adjacency matrices', file=sys.stdout) + else: + iterator = self._graphs + A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? + + if self._p == None: # p is uniform distribution as default. + def init_worker(A_wave_list_toshare): + global G_A_wave_list + G_A_wave_list = A_wave_list_toshare + + do_fun = self._wrapper_kernel_do + + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(A_wave_list,), n_jobs=self._n_jobs, verbose=self._verbose) + + else: # @todo + pass + else: # @todo + pass + + return gram_matrix + + + def _compute_kernel_list_series(self, g1, g_list): + self._check_edge_weight(g_list + [g1]) + self._check_graphs(g_list + [g1]) + if self._verbose >= 2: + import warnings + warnings.warn('All labels are ignored.') + + lmda = self._weight + + # compute kernel list. + kernel_list = [None] * len(g_list) + + if self._q == None: + # don't normalize adjacency matrices if q is a uniform vector. Note + # A_wave_list actually contains the transposes of the adjacency matrices. + A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) + else: + iterator = range(len(g_list)) + A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] + + if self._p == None: # p is uniform distribution as default. + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='calculating kernels', file=sys.stdout) + else: + iterator = range(len(g_list)) + + for i in iterator: + kernel = self.__kernel_do(A_wave_1, A_wave_list[i], lmda) + kernel_list[i] = kernel + + else: # @todo + pass + else: # @todo + pass + + return kernel_list + + + def _compute_kernel_list_imap_unordered(self, g1, g_list): + self._check_edge_weight(g_list + [g1]) + self._check_graphs(g_list + [g1]) + if self._verbose >= 2: + import warnings + warnings.warn('All labels are ignored.') + + # compute kernel list. + kernel_list = [None] * len(g_list) + + if self._q == None: + # don't normalize adjacency matrices if q is a uniform vector. Note + # A_wave_list actually contains the transposes of the adjacency matrices. + A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() + if self._verbose >= 2: + iterator = tqdm(range(len(g_list)), desc='compute adjacency matrices', file=sys.stdout) + else: + iterator = range(len(g_list)) + A_wave_list = [nx.adjacency_matrix(G, self._edge_weight).todense().transpose() for G in iterator] # @todo: parallel? + + if self._p == None: # p is uniform distribution as default. + def init_worker(A_wave_1_toshare, A_wave_list_toshare): + global G_A_wave_1, G_A_wave_list + G_A_wave_1 = A_wave_1_toshare + G_A_wave_list = A_wave_list_toshare + + do_fun = self._wrapper_kernel_list_do + + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(A_wave_1, A_wave_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='calculating kernels', verbose=self._verbose) + + else: # @todo + pass + else: # @todo + pass + + return kernel_list + + + def _wrapper_kernel_list_do(self, itr): + return itr, self._kernel_do(G_A_wave_1, G_A_wave_list[itr], self._weight) + + + def _compute_single_kernel_series(self, g1, g2): + self._check_edge_weight([g1] + [g2]) + self._check_graphs([g1] + [g2]) + if self._verbose >= 2: + import warnings + warnings.warn('All labels are ignored.') + + lmda = self._weight + + if self._q == None: + # don't normalize adjacency matrices if q is a uniform vector. Note + # A_wave_list actually contains the transposes of the adjacency matrices. + A_wave_1 = nx.adjacency_matrix(g1, self._edge_weight).todense().transpose() + A_wave_2 = nx.adjacency_matrix(g2, self._edge_weight).todense().transpose() + if self._p == None: # p is uniform distribution as default. + kernel = self.__kernel_do(A_wave_1, A_wave_2, lmda) + else: # @todo + pass + else: # @todo + pass + + return kernel + + + def __kernel_do(self, A_wave1, A_wave2, lmda): + + S = lmda * A_wave2 + T_t = A_wave1 + # use uniform distribution if there is no prior knowledge. + nb_pd = len(A_wave1) * len(A_wave2) + p_times_uni = 1 / nb_pd + M0 = np.full((len(A_wave2), len(A_wave1)), p_times_uni) + X = dlyap(S, T_t, M0) + X = np.reshape(X, (-1, 1), order='F') + # use uniform distribution if there is no prior knowledge. + q_times = np.full((1, nb_pd), p_times_uni) + return np.dot(q_times, X) + + + def _wrapper_kernel_do(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.__kernel_do(G_A_wave_list[i], G_A_wave_list[j], self._weight) \ No newline at end of file diff --git a/gklearn/utils/parallel.py b/gklearn/utils/parallel.py index b5d0579..4c29522 100644 --- a/gklearn/utils/parallel.py +++ b/gklearn/utils/parallel.py @@ -54,7 +54,7 @@ def parallel_me(func, func_assign, var_to_assign, itr, len_itr=None, init_worker def parallel_gm(func, Kmatrix, Gn, init_worker=None, glbv=None, method='imap_unordered', n_jobs=None, chunksize=None, - verbose=True): + verbose=True): # @todo: Gn seems not necessary. from itertools import combinations_with_replacement def func_assign(result, var_to_assign): var_to_assign[result[0]][result[1]] = result[2]