From 10a276008599bec0c44f74f12dac9f04d0d0463f Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sun, 13 Dec 2020 12:32:25 +0100 Subject: [PATCH] Enable sp kernel and ssp kernel to NOT use fcsp. --- gklearn/kernels/graph_kernel.py | 107 +++++++++-------- gklearn/kernels/metadata.py | 2 +- gklearn/kernels/shortest_path.py | 150 ++++++++++++++++------- gklearn/kernels/structural_sp.py | 232 ++++++++++++++++++++++++++++-------- gklearn/tests/test_graph_kernels.py | 133 +++++++++++++-------- 5 files changed, 426 insertions(+), 198 deletions(-) diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py index d263828..e9a4032 100644 --- a/gklearn/kernels/graph_kernel.py +++ b/gklearn/kernels/graph_kernel.py @@ -9,10 +9,11 @@ import numpy as np import networkx as nx import multiprocessing import time +from gklearn.utils import normalize_gram_matrix class GraphKernel(object): - + def __init__(self): self._graphs = None self._parallel = '' @@ -22,14 +23,14 @@ class GraphKernel(object): self._run_time = 0 self._gram_matrix = None self._gram_matrix_unnorm = None - + def compute(self, *graphs, **kwargs): self._parallel = kwargs.get('parallel', 'imap_unordered') self._n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count()) self._normalize = kwargs.get('normalize', True) self._verbose = kwargs.get('verbose', 2) - + if len(graphs) == 1: if not isinstance(graphs[0], list): raise Exception('Cannot detect graphs.') @@ -40,9 +41,9 @@ class GraphKernel(object): self._gram_matrix = self._compute_gram_matrix() self._gram_matrix_unnorm = np.copy(self._gram_matrix) if self._normalize: - self._gram_matrix = self.normalize_gm(self._gram_matrix) + self._gram_matrix = normalize_gram_matrix(self._gram_matrix) return self._gram_matrix, self._run_time - + elif len(graphs) == 2: if self.is_graph(graphs[0]) and self.is_graph(graphs[1]): kernel = self._compute_single_kernel(graphs[0].copy(), graphs[1].copy()) @@ -59,14 +60,14 @@ class GraphKernel(object): return kernel_list, self._run_time else: raise Exception('Cannot detect graphs.') - + elif len(graphs) == 0 and self._graphs is None: raise Exception('Please add graphs before computing.') - + else: raise Exception('Cannot detect graphs.') - - + + def normalize_gm(self, gram_matrix): import warnings warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning) @@ -77,8 +78,8 @@ class GraphKernel(object): gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j]) gram_matrix[j][i] = gram_matrix[i][j] return gram_matrix - - + + def compute_distance_matrix(self): if self._gram_matrix is None: raise Exception('Please compute the Gram matrix before computing distance matrix.') @@ -97,98 +98,98 @@ class GraphKernel(object): dis_min = np.min(np.min(dis_mat[dis_mat != 0])) dis_mean = np.mean(np.mean(dis_mat)) return dis_mat, dis_max, dis_min, dis_mean - - + + def _compute_gram_matrix(self): start_time = time.time() - + if self._parallel == 'imap_unordered': gram_matrix = self._compute_gm_imap_unordered() elif self._parallel is None: gram_matrix = self._compute_gm_series() else: raise Exception('Parallel mode is not set correctly.') - + self._run_time = time.time() - start_time if self._verbose: print('Gram matrix of size %d built in %s seconds.' % (len(self._graphs), self._run_time)) - + return gram_matrix - - + + def _compute_gm_series(self): pass def _compute_gm_imap_unordered(self): pass - - + + def _compute_kernel_list(self, g1, g_list): start_time = time.time() - + if self._parallel == 'imap_unordered': kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list) elif self._parallel is None: kernel_list = self._compute_kernel_list_series(g1, g_list) else: raise Exception('Parallel mode is not set correctly.') - + self._run_time = time.time() - start_time if self._verbose: print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.' % (len(g_list), self._run_time)) - + return kernel_list - + def _compute_kernel_list_series(self, g1, g_list): pass - + def _compute_kernel_list_imap_unordered(self, g1, g_list): pass - - + + def _compute_single_kernel(self, g1, g2): start_time = time.time() - + kernel = self._compute_single_kernel_series(g1, g2) - + self._run_time = time.time() - start_time if self._verbose: print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time)) - + return kernel - - + + def _compute_single_kernel_series(self, g1, g2): pass - - + + def is_graph(self, graph): if isinstance(graph, nx.Graph): return True if isinstance(graph, nx.DiGraph): - return True + return True if isinstance(graph, nx.MultiGraph): - return True + return True if isinstance(graph, nx.MultiDiGraph): - return True + return True return False - - + + @property def graphs(self): return self._graphs - - + + @property def parallel(self): return self._parallel - - + + @property def n_jobs(self): return self._n_jobs @@ -197,30 +198,30 @@ class GraphKernel(object): @property def verbose(self): return self._verbose - - + + @property def normalize(self): return self._normalize - - + + @property def run_time(self): return self._run_time - - + + @property def gram_matrix(self): return self._gram_matrix - + @gram_matrix.setter def gram_matrix(self, value): self._gram_matrix = value - - + + @property def gram_matrix_unnorm(self): - return self._gram_matrix_unnorm + return self._gram_matrix_unnorm @gram_matrix_unnorm.setter def gram_matrix_unnorm(self, value): diff --git a/gklearn/kernels/metadata.py b/gklearn/kernels/metadata.py index d00d5d7..188fc56 100644 --- a/gklearn/kernels/metadata.py +++ b/gklearn/kernels/metadata.py @@ -12,7 +12,7 @@ GRAPH_KERNELS = { 'common walk': '', 'marginalized': '', 'sylvester equation': '', - 'fixed_point': '', + 'fixed point': '', 'conjugate gradient': '', 'spectral decomposition': '', ### based on paths. diff --git a/gklearn/kernels/shortest_path.py b/gklearn/kernels/shortest_path.py index 794095e..3a29423 100644 --- a/gklearn/kernels/shortest_path.py +++ b/gklearn/kernels/shortest_path.py @@ -5,9 +5,9 @@ Created on Tue Apr 7 15:24:58 2020 @author: ljia -@references: - - [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData +@references: + + [1] Borgwardt KM, Kriegel HP. Shortest-path kernels on graphs. InData Mining, Fifth IEEE International Conference on 2005 Nov 27 (pp. 8-pp). IEEE. """ @@ -23,13 +23,14 @@ from gklearn.kernels import GraphKernel class ShortestPath(GraphKernel): - + def __init__(self, **kwargs): GraphKernel.__init__(self) self._node_labels = kwargs.get('node_labels', []) self._node_attrs = kwargs.get('node_attrs', []) self._edge_weight = kwargs.get('edge_weight', None) self._node_kernels = kwargs.get('node_kernels', None) + self._fcsp = kwargs.get('fcsp', True) self._ds_infos = kwargs.get('ds_infos', {}) @@ -40,10 +41,10 @@ class ShortestPath(GraphKernel): else: iterator = self._graphs self._graphs = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] - + # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - + from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: @@ -54,10 +55,10 @@ class ShortestPath(GraphKernel): kernel = self._sp_do(self._graphs[i], self._graphs[j]) gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel - + return gram_matrix - - + + def _compute_gm_imap_unordered(self): # get shortest path graph of each graph. pool = Pool(self._n_jobs) @@ -76,20 +77,20 @@ class ShortestPath(GraphKernel): self._graphs[i] = g pool.close() pool.join() - + # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - + def init_worker(gs_toshare): global G_gs G_gs = gs_toshare do_fun = self._wrapper_sp_do - parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) - + return gram_matrix - - + + def _compute_kernel_list_series(self, g1, g_list): # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) @@ -98,7 +99,7 @@ class ShortestPath(GraphKernel): else: iterator = g_list g_list = [getSPGraph(g, edge_weight=self._edge_weight) for g in iterator] - + # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: @@ -108,10 +109,10 @@ class ShortestPath(GraphKernel): for i in iterator: kernel = self._sp_do(g1, g_list[i]) kernel_list[i] = kernel - + return kernel_list - - + + def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest path graphs of g1 and each graph in g_list. g1 = getSPGraph(g1, edge_weight=self._edge_weight) @@ -131,49 +132,57 @@ class ShortestPath(GraphKernel): g_list[i] = g pool.close() pool.join() - + # compute Gram matrix. kernel_list = [None] * len(g_list) def init_worker(g1_toshare, gl_toshare): global G_g1, G_gl - G_g1 = g1_toshare - G_gl = gl_toshare + G_g1 = g1_toshare + G_gl = gl_toshare do_fun = self._wrapper_kernel_list_do - def func_assign(result, var_to_assign): + def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) - + return kernel_list - - + + def _wrapper_kernel_list_do(self, itr): return itr, self._sp_do(G_g1, G_gl[itr]) - - + + def _compute_single_kernel_series(self, g1, g2): g1 = getSPGraph(g1, edge_weight=self._edge_weight) g2 = getSPGraph(g2, edge_weight=self._edge_weight) kernel = self._sp_do(g1, g2) - return kernel - - + return kernel + + def _wrapper_get_sp_graphs(self, itr_item): g = itr_item[0] i = itr_item[1] return i, getSPGraph(g, edge_weight=self._edge_weight) - - + + def _sp_do(self, g1, g2): - + + if self._fcsp: # @todo: it may be put outside the _sp_do(). + return self._sp_do_fcsp(g1, g2) + else: + return self._sp_do_naive(g1, g2) + + + def _sp_do_fcsp(self, g1, g2): + kernel = 0 - + # compute shortest path matrices first, method borrowed from FCSP. vk_dict = {} # shortest path matrices dict - if len(self._node_labels) > 0: + if len(self._node_labels) > 0: # @todo: it may be put outside the _sp_do(). # node symb and non-synb labeled if len(self._node_attrs) > 0: kn = self._node_kernels['mix'] @@ -208,7 +217,7 @@ class ShortestPath(GraphKernel): if e1[2]['cost'] == e2[2]['cost']: kernel += 1 return kernel - + # compute graph kernels if self._ds_infos['directed']: for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): @@ -225,7 +234,7 @@ class ShortestPath(GraphKernel): kn1 = nk11 * nk22 kn2 = nk12 * nk21 kernel += kn1 + kn2 - + # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation # # compute vertex kernels # try: @@ -238,7 +247,7 @@ class ShortestPath(GraphKernel): # vk_mat[i1][i2] = kn( # n1[1][node_label], n2[1][node_label], # [n1[1]['attributes']], [n2[1]['attributes']]) - + # range1 = range(0, len(edge_w_g[i])) # range2 = range(0, len(edge_w_g[j])) # for i1 in range1: @@ -254,10 +263,67 @@ class ShortestPath(GraphKernel): # kn1 = vk_mat[x1][x2] * vk_mat[y1][y2] # kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] # kernel += kn1 + kn2 - + return kernel - - + + + def _sp_do_naive(self, g1, g2): + + kernel = 0 + + # Define the function to compute kernels between vertices in each condition. + if len(self._node_labels) > 0: + # node symb and non-synb labeled + if len(self._node_attrs) > 0: + def compute_vk(n1, n2): + kn = self._node_kernels['mix'] + n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels] + n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels] + n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs] + n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs] + return kn(n1_labels, n2_labels, n1_attrs, n2_attrs) + # node symb labeled + else: + def compute_vk(n1, n2): + kn = self._node_kernels['symb'] + n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels] + n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels] + return kn(n1_labels, n2_labels) + else: + # node non-synb labeled + if len(self._node_attrs) > 0: + def compute_vk(n1, n2): + kn = self._node_kernels['nsymb'] + n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs] + n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs] + return kn(n1_attrs, n2_attrs) + # node unlabeled + else: + for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): + if e1[2]['cost'] == e2[2]['cost']: + kernel += 1 + return kernel + + # compute graph kernels + if self._ds_infos['directed']: + for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): + if e1[2]['cost'] == e2[2]['cost']: + nk11, nk22 = compute_vk(e1[0], e2[0]), compute_vk(e1[1], e2[1]) + kn1 = nk11 * nk22 + kernel += kn1 + else: + for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): + if e1[2]['cost'] == e2[2]['cost']: + # each edge walk is counted twice, starting from both its extreme nodes. + nk11, nk12, nk21, nk22 = compute_vk(e1[0], e2[0]), compute_vk( + e1[0], e2[1]), compute_vk(e1[1], e2[0]), compute_vk(e1[1], e2[1]) + kn1 = nk11 * nk22 + kn2 = nk12 * nk21 + kernel += kn1 + kn2 + + return kernel + + def _wrapper_sp_do(self, itr): i = itr[0] j = itr[1] diff --git a/gklearn/kernels/structural_sp.py b/gklearn/kernels/structural_sp.py index 19322a7..1464807 100644 --- a/gklearn/kernels/structural_sp.py +++ b/gklearn/kernels/structural_sp.py @@ -5,9 +5,9 @@ Created on Mon Mar 30 11:59:57 2020 @author: ljia -@references: +@references: - [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For + [1] Suard F, Rakotomamonjy A, Bensrhair A. Kernel on Bag of Paths For Measuring Similarity of Shapes. InESANN 2007 Apr 25 (pp. 355-360). """ import sys @@ -23,7 +23,7 @@ from gklearn.kernels import GraphKernel class StructuralSP(GraphKernel): - + def __init__(self, **kwargs): GraphKernel.__init__(self) self._node_labels = kwargs.get('node_labels', []) @@ -34,6 +34,7 @@ class StructuralSP(GraphKernel): self._node_kernels = kwargs.get('node_kernels', None) self._edge_kernels = kwargs.get('edge_kernels', None) self._compute_method = kwargs.get('compute_method', 'naive') + self._fcsp = kwargs.get('fcsp', True) self._ds_infos = kwargs.get('ds_infos', {}) @@ -50,10 +51,10 @@ class StructuralSP(GraphKernel): else: for g in iterator: splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) - + # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) - + from itertools import combinations_with_replacement itr = combinations_with_replacement(range(0, len(self._graphs)), 2) if self._verbose >= 2: @@ -72,10 +73,10 @@ class StructuralSP(GraphKernel): # print("error here ") gram_matrix[i][j] = kernel gram_matrix[j][i] = kernel - + return gram_matrix - - + + def _compute_gm_imap_unordered(self): # get shortest paths of each graph in the graphs. splist = [None] * len(self._graphs) @@ -87,9 +88,9 @@ class StructuralSP(GraphKernel): chunksize = 100 # get shortest path graphs of self._graphs if self._compute_method == 'trie': - get_sps_fun = self._wrapper_get_sps_trie + get_sps_fun = self._wrapper_get_sps_trie else: - get_sps_fun = self._wrapper_get_sps_naive + get_sps_fun = self._wrapper_get_sps_naive if self.verbose >= 2: iterator = tqdm(pool.imap_unordered(get_sps_fun, itr, chunksize), desc='getting shortest paths', file=sys.stdout) @@ -99,24 +100,24 @@ class StructuralSP(GraphKernel): splist[i] = sp pool.close() pool.join() - + # compute Gram matrix. gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) def init_worker(spl_toshare, gs_toshare): global G_spl, G_gs G_spl = spl_toshare - G_gs = gs_toshare - if self._compute_method == 'trie': + G_gs = gs_toshare + if self._compute_method == 'trie': do_fun = self._wrapper_ssp_do_trie - else: - do_fun = self._wrapper_ssp_do_naive - parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + else: + do_fun = self._wrapper_ssp_do_naive + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, glbv=(splist, self._graphs), n_jobs=self._n_jobs, verbose=self._verbose) - + return gram_matrix - - + + def _compute_kernel_list_series(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) @@ -131,7 +132,7 @@ class StructuralSP(GraphKernel): else: for g in iterator: splist.append(get_shortest_paths(g, self._edge_weight, self._ds_infos['directed'])) - + # compute kernel list. kernel_list = [None] * len(g_list) if self._verbose >= 2: @@ -146,10 +147,10 @@ class StructuralSP(GraphKernel): for i in iterator: kernel = self._ssp_do_naive(g1, g_list[i], sp1, splist[i]) kernel_list[i] = kernel - + return kernel_list - - + + def _compute_kernel_list_imap_unordered(self, g1, g_list): # get shortest paths of g1 and each graph in g_list. sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) @@ -162,9 +163,9 @@ class StructuralSP(GraphKernel): chunksize = 100 # get shortest path graphs of g_list if self._compute_method == 'trie': - get_sps_fun = self._wrapper_get_sps_trie + get_sps_fun = self._wrapper_get_sps_trie else: - get_sps_fun = self._wrapper_get_sps_naive + get_sps_fun = self._wrapper_get_sps_naive if self.verbose >= 2: iterator = tqdm(pool.imap_unordered(get_sps_fun, itr, chunksize), desc='getting shortest paths', file=sys.stdout) @@ -174,7 +175,7 @@ class StructuralSP(GraphKernel): splist[i] = sp pool.close() pool.join() - + # compute Gram matrix. kernel_list = [None] * len(g_list) @@ -182,27 +183,27 @@ class StructuralSP(GraphKernel): global G_sp1, G_spl, G_g1, G_gl G_sp1 = sp1_toshare G_spl = spl_toshare - G_g1 = g1_toshare - G_gl = gl_toshare - if self._compute_method == 'trie': + G_g1 = g1_toshare + G_gl = gl_toshare + if self._compute_method == 'trie': do_fun = self._wrapper_ssp_do_trie - else: + else: do_fun = self._wrapper_kernel_list_do - def func_assign(result, var_to_assign): + def func_assign(result, var_to_assign): var_to_assign[result[0]] = result[1] itr = range(len(g_list)) len_itr = len(g_list) parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, init_worker=init_worker, glbv=(sp1, splist, g1, g_list), method='imap_unordered', n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) - + return kernel_list - - + + def _wrapper_kernel_list_do(self, itr): return itr, self._ssp_do_naive(G_g1, G_gl[itr], G_sp1, G_spl[itr]) - - + + def _compute_single_kernel_series(self, g1, g2): sp1 = get_shortest_paths(g1, self._edge_weight, self._ds_infos['directed']) sp2 = get_shortest_paths(g2, self._edge_weight, self._ds_infos['directed']) @@ -210,26 +211,33 @@ class StructuralSP(GraphKernel): kernel = self._ssp_do_trie(g1, g2, sp1, sp2) else: kernel = self._ssp_do_naive(g1, g2, sp1, sp2) - return kernel - - + return kernel + + def _wrapper_get_sps_naive(self, itr_item): g = itr_item[0] i = itr_item[1] return i, get_shortest_paths(g, self._edge_weight, self._ds_infos['directed']) - - + + def _ssp_do_naive(self, g1, g2, spl1, spl2): - + if self._fcsp: # @todo: it may be put outside the _sp_do(). + return self._sp_do_naive_fcsp(g1, g2, spl1, spl2) + else: + return self._sp_do_naive_naive(g1, g2, spl1, spl2) + + + def _sp_do_naive_fcsp(self, g1, g2, spl1, spl2): + kernel = 0 - + # First, compute shortest path matrices, method borrowed from FCSP. vk_dict = self._get_all_node_kernels(g1, g2) # Then, compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we # went though. For dense graphs, this would be slow. ek_dict = self._get_all_edge_kernels(g1, g2) - + # compute graph kernels if vk_dict: if ek_dict: @@ -279,7 +287,7 @@ class StructuralSP(GraphKernel): print(g1.nodes(data=True)) print(g1.edges(data=True)) raise Exception - + # # ---- exact implementation of the Fast Computation of Shortest Path Kernel (FCSP), reference [2], sadly it is slower than the current implementation # # compute vertex kernel matrix # try: @@ -292,7 +300,7 @@ class StructuralSP(GraphKernel): # vk_mat[i1][i2] = kn( # n1[1][node_label], n2[1][node_label], # [n1[1]['attributes']], [n2[1]['attributes']]) - + # range1 = range(0, len(edge_w_g[i])) # range2 = range(0, len(edge_w_g[j])) # for i1 in range1: @@ -309,18 +317,136 @@ class StructuralSP(GraphKernel): # kn2 = vk_mat[x1][y2] * vk_mat[y1][x2] # Kmatrix += kn1 + kn2 return kernel - - + + + def _sp_do_naive_naive(self, g1, g2, spl1, spl2): + + kernel = 0 + + # Define the function to compute kernels between vertices in each condition. + if len(self._node_labels) > 0: + # node symb and non-synb labeled + if len(self._node_attrs) > 0: + def compute_vk(n1, n2): + kn = self._node_kernels['mix'] + n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels] + n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels] + n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs] + n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs] + return kn(n1_labels, n2_labels, n1_attrs, n2_attrs) + # node symb labeled + else: + def compute_vk(n1, n2): + kn = self._node_kernels['symb'] + n1_labels = [g1.nodes[n1][nl] for nl in self._node_labels] + n2_labels = [g2.nodes[n2][nl] for nl in self._node_labels] + return kn(n1_labels, n2_labels) + else: + # node non-synb labeled + if len(self._node_attrs) > 0: + def compute_vk(n1, n2): + kn = self._node_kernels['nsymb'] + n1_attrs = [g1.nodes[n1][na] for na in self._node_attrs] + n2_attrs = [g2.nodes[n2][na] for na in self._node_attrs] + return kn(n1_attrs, n2_attrs) +# # node unlabeled +# else: +# for e1, e2 in product(g1.edges(data=True), g2.edges(data=True)): +# if e1[2]['cost'] == e2[2]['cost']: +# kernel += 1 +# return kernel + + # Define the function to compute kernels between edges in each condition. + if len(self._edge_labels) > 0: + # edge symb and non-synb labeled + if len(self._edge_attrs) > 0: + def compute_ek(e1, e2): + ke = self._edge_kernels['mix'] + e1_labels = [g1.edges[e1][el] for el in self._edge_labels] + e2_labels = [g2.edges[e2][el] for el in self._edge_labels] + e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs] + e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs] + return ke(e1_labels, e2_labels, e1_attrs, e2_attrs) + # edge symb labeled + else: + def compute_ek(e1, e2): + ke = self._edge_kernels['symb'] + e1_labels = [g1.edges[e1][el] for el in self._edge_labels] + e2_labels = [g2.edges[e2][el] for el in self._edge_labels] + return ke(e1_labels, e2_labels) + else: + # edge non-synb labeled + if len(self._edge_attrs) > 0: + def compute_ek(e1, e2): + ke = self._edge_kernels['nsymb'] + e1_attrs = [g1.edges[e1][ea] for ea in self._edge_attrs] + e2_attrs = [g2.edges[e2][ea] for ea in self._edge_attrs] + return ke(e1_attrs, e2_attrs) + + + # compute graph kernels + if len(self._node_labels) > 0 or len(self._node_attrs) > 0: + if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0: + for p1, p2 in product(spl1, spl2): + if len(p1) == len(p2): + kpath = compute_vk(p1[0], p2[0]) + if kpath: + for idx in range(1, len(p1)): + kpath *= compute_vk(p1[idx], p2[idx]) * \ + compute_ek((p1[idx-1], p1[idx]), + (p2[idx-1], p2[idx])) + if not kpath: + break + kernel += kpath # add up kernels of all paths + else: + for p1, p2 in product(spl1, spl2): + if len(p1) == len(p2): + kpath = compute_vk(p1[0], p2[0]) + if kpath: + for idx in range(1, len(p1)): + kpath *= compute_vk(p1[idx], p2[idx]) + if not kpath: + break + kernel += kpath # add up kernels of all paths + else: + if len(self._edge_labels) > 0 or len(self._edge_attrs) > 0: + for p1, p2 in product(spl1, spl2): + if len(p1) == len(p2): + if len(p1) == 0: + kernel += 1 + else: + kpath = 1 + for idx in range(0, len(p1) - 1): + kpath *= compute_ek((p1[idx], p1[idx+1]), + (p2[idx], p2[idx+1])) + if not kpath: + break + kernel += kpath # add up kernels of all paths + else: + for p1, p2 in product(spl1, spl2): + if len(p1) == len(p2): + kernel += 1 + try: + kernel = kernel / (len(spl1) * len(spl2)) # Compute mean average + except ZeroDivisionError: + print(spl1, spl2) + print(g1.nodes(data=True)) + print(g1.edges(data=True)) + raise Exception + + return kernel + + def _wrapper_ssp_do_naive(self, itr): i = itr[0] j = itr[1] return i, j, self._ssp_do_naive(G_gs[i], G_gs[j], G_spl[i], G_spl[j]) - - + + def _get_all_node_kernels(self, g1, g2): return compute_vertex_kernels(g1, g2, self._node_kernels, node_labels=self._node_labels, node_attrs=self._node_attrs) - - + + def _get_all_edge_kernels(self, g1, g2): # compute kernels between all pairs of edges, which is an idea of # extension of FCSP. It suits sparse graphs, which is the most case we @@ -368,5 +494,5 @@ class StructuralSP(GraphKernel): # edge unlabeled else: pass - + return ek_dict \ No newline at end of file diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py index 061b17c..a97635a 100644 --- a/gklearn/tests/test_graph_kernels.py +++ b/gklearn/tests/test_graph_kernels.py @@ -3,13 +3,14 @@ import pytest import multiprocessing +import numpy as np def chooseDataset(ds_name): """Choose dataset according to name. """ from gklearn.utils import Dataset - + dataset = Dataset() # no node labels (and no edge labels). @@ -46,9 +47,9 @@ def chooseDataset(ds_name): elif ds_name == 'Cuneiform': dataset.load_predefined_dataset(ds_name) dataset.trim_dataset(edge_required=True) - + dataset.cut_graphs(range(0, 3)) - + return dataset @@ -57,7 +58,7 @@ def test_list_graph_kernels(): """ from gklearn.kernels import GRAPH_KERNELS, list_of_graph_kernels assert list_of_graph_kernels() == [i for i in GRAPH_KERNELS] - + @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) @@ -68,10 +69,10 @@ def test_CommonWalk(ds_name, parallel, weight, compute_method): """ from gklearn.kernels import CommonWalk import networkx as nx - + dataset = chooseDataset(ds_name) dataset.load_graphs([g for g in dataset.graphs if nx.number_of_nodes(g) > 1]) - + try: graph_kernel = CommonWalk(node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, @@ -87,8 +88,8 @@ def test_CommonWalk(ds_name, parallel, weight, compute_method): except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) @pytest.mark.parametrize('remove_totters', [False]) #[True, False]) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) @@ -96,9 +97,9 @@ def test_Marginalized(ds_name, parallel, remove_totters): """Test marginalized kernel. """ from gklearn.kernels import Marginalized - + dataset = chooseDataset(ds_name) - + try: graph_kernel = Marginalized(node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, @@ -115,15 +116,15 @@ def test_Marginalized(ds_name, parallel, remove_totters): except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Acyclic']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) def test_SylvesterEquation(ds_name, parallel): """Test sylvester equation kernel. """ from gklearn.kernels import SylvesterEquation - + dataset = chooseDataset(ds_name) try: @@ -139,11 +140,11 @@ def test_SylvesterEquation(ds_name, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) - + except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) def test_ConjugateGradient(ds_name, parallel): @@ -152,9 +153,9 @@ def test_ConjugateGradient(ds_name, parallel): from gklearn.kernels import ConjugateGradient from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct import functools - + dataset = chooseDataset(ds_name) - + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} @@ -177,11 +178,11 @@ def test_ConjugateGradient(ds_name, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) - + except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Acyclic', 'AIDS']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) def test_FixedPoint(ds_name, parallel): @@ -190,9 +191,9 @@ def test_FixedPoint(ds_name, parallel): from gklearn.kernels import FixedPoint from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct import functools - + dataset = chooseDataset(ds_name) - + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} @@ -215,11 +216,11 @@ def test_FixedPoint(ds_name, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) - + except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Acyclic']) @pytest.mark.parametrize('sub_kernel', ['exp', 'geo']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) @@ -227,7 +228,7 @@ def test_SpectralDecomposition(ds_name, sub_kernel, parallel): """Test spectral decomposition kernel. """ from gklearn.kernels import SpectralDecomposition - + dataset = chooseDataset(ds_name) try: @@ -244,11 +245,11 @@ def test_SpectralDecomposition(ds_name, sub_kernel, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) - + except Exception as exception: assert False, exception - - + + # @pytest.mark.parametrize( # 'compute_method,ds_name,sub_kernel', # [ @@ -268,7 +269,7 @@ def test_SpectralDecomposition(ds_name, sub_kernel, parallel): # from gklearn.kernels import RandomWalk # from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct # import functools -# +# # dataset = chooseDataset(ds_name) # mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) @@ -297,7 +298,7 @@ def test_SpectralDecomposition(ds_name, sub_kernel, parallel): # except Exception as exception: # assert False, exception - + @pytest.mark.parametrize('ds_name', ['Alkane', 'Acyclic', 'Letter-med', 'AIDS', 'Fingerprint']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) def test_ShortestPath(ds_name, parallel): @@ -306,23 +307,38 @@ def test_ShortestPath(ds_name, parallel): from gklearn.kernels import ShortestPath from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct import functools - + dataset = chooseDataset(ds_name) - + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} try: graph_kernel = ShortestPath(node_labels=dataset.node_labels, node_attrs=dataset.node_attrs, ds_infos=dataset.get_dataset_infos(keys=['directed']), + fcsp=True, node_kernels=sub_kernels) - gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + gram_matrix1, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + + graph_kernel = ShortestPath(node_labels=dataset.node_labels, + node_attrs=dataset.node_attrs, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + fcsp=False, + node_kernels=sub_kernels) + gram_matrix2, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + assert np.array_equal(gram_matrix1, gram_matrix2) + except Exception as exception: assert False, exception @@ -336,26 +352,44 @@ def test_StructuralSP(ds_name, parallel): from gklearn.kernels import StructuralSP from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct import functools - + dataset = chooseDataset(ds_name) - + mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel) sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel} try: graph_kernel = StructuralSP(node_labels=dataset.node_labels, - edge_labels=dataset.edge_labels, + edge_labels=dataset.edge_labels, node_attrs=dataset.node_attrs, edge_attrs=dataset.edge_attrs, ds_infos=dataset.get_dataset_infos(keys=['directed']), + fcsp=True, node_kernels=sub_kernels, edge_kernels=sub_kernels) - gram_matrix, run_time = graph_kernel.compute(dataset.graphs, + gram_matrix1, run_time = graph_kernel.compute(dataset.graphs, + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], + parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + + graph_kernel = StructuralSP(node_labels=dataset.node_labels, + edge_labels=dataset.edge_labels, + node_attrs=dataset.node_attrs, + edge_attrs=dataset.edge_attrs, + ds_infos=dataset.get_dataset_infos(keys=['directed']), + fcsp=False, + node_kernels=sub_kernels, + edge_kernels=sub_kernels) + gram_matrix2, run_time = graph_kernel.compute(dataset.graphs, parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel_list, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1:], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) kernel, run_time = graph_kernel.compute(dataset.graphs[0], dataset.graphs[1], parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) + assert np.array_equal(gram_matrix1, gram_matrix2) + except Exception as exception: assert False, exception @@ -369,9 +403,9 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): """Test path kernel up to length $h$. """ from gklearn.kernels import PathUpToH - + dataset = chooseDataset(ds_name) - + try: graph_kernel = PathUpToH(node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, @@ -385,8 +419,8 @@ def test_PathUpToH(ds_name, parallel, k_func, compute_method): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Alkane', 'AIDS']) @pytest.mark.parametrize('parallel', ['imap_unordered', None]) def test_Treelet(ds_name, parallel): @@ -395,10 +429,10 @@ def test_Treelet(ds_name, parallel): from gklearn.kernels import Treelet from gklearn.utils.kernels import polynomialkernel import functools - + dataset = chooseDataset(ds_name) - pkernel = functools.partial(polynomialkernel, d=2, c=1e5) + pkernel = functools.partial(polynomialkernel, d=2, c=1e5) try: graph_kernel = Treelet(node_labels=dataset.node_labels, edge_labels=dataset.edge_labels, @@ -412,8 +446,8 @@ def test_Treelet(ds_name, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) except Exception as exception: assert False, exception - - + + @pytest.mark.parametrize('ds_name', ['Acyclic']) #@pytest.mark.parametrize('base_kernel', ['subtree', 'sp', 'edge']) # @pytest.mark.parametrize('base_kernel', ['subtree']) @@ -422,7 +456,7 @@ def test_WLSubtree(ds_name, parallel): """Test Weisfeiler-Lehman subtree kernel. """ from gklearn.kernels import WLSubtree - + dataset = chooseDataset(ds_name) try: @@ -438,12 +472,13 @@ def test_WLSubtree(ds_name, parallel): parallel=parallel, n_jobs=multiprocessing.cpu_count(), verbose=True) except Exception as exception: assert False, exception - + if __name__ == "__main__": test_list_graph_kernels() # test_spkernel('Alkane', 'imap_unordered') # test_StructuralSP('Fingerprint_edge', 'imap_unordered') + test_StructuralSP('Acyclic', 'imap_unordered') # test_WLSubtree('Acyclic', 'imap_unordered') # test_RandomWalk('Acyclic', 'sylvester', None, 'imap_unordered') # test_RandomWalk('Acyclic', 'conjugate', None, 'imap_unordered')