From e84bc768669e1a66c59ed17f98e929a59c87dbc0 Mon Sep 17 00:00:00 2001 From: jajupmochi Date: Sat, 26 Dec 2020 11:19:31 +0100 Subject: [PATCH] Add parallelization to WL subtree kernel. --- gklearn/kernels/weisfeiler_lehman.py | 462 ++++++++++++++++++++++------------- gklearn/tests/test_graph_kernels.py | 1 + 2 files changed, 292 insertions(+), 171 deletions(-) diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py index 8b36b37..da14ff8 100644 --- a/gklearn/kernels/weisfeiler_lehman.py +++ b/gklearn/kernels/weisfeiler_lehman.py @@ -7,22 +7,22 @@ Created on Tue Apr 14 15:16:34 2020 @references: - [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. - Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. + [1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. + Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011;12(Sep):2539-61. """ import numpy as np import networkx as nx from collections import Counter -from functools import partial +# from functools import partial from gklearn.utils import SpecialLabel -from gklearn.utils.parallel import parallel_gm +from gklearn.utils.parallel import parallel_gm, parallel_me from gklearn.kernels import GraphKernel -class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge user kernel. - +class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. + def __init__(self, **kwargs): GraphKernel.__init__(self) self._node_labels = kwargs.get('node_labels', []) @@ -33,115 +33,235 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge def _compute_gm_series(self): - if self._verbose >= 2: - import warnings - warnings.warn('A part of the computation is parallelized.') - +# if self._verbose >= 2: +# import warnings +# warnings.warn('A part of the computation is parallelized.') + self._add_dummy_node_labels(self._graphs) - + # for WL subtree kernel - if self._base_kernel == 'subtree': + if self._base_kernel == 'subtree': gram_matrix = self._subtree_kernel_do(self._graphs) - + # for WL shortest path kernel elif self._base_kernel == 'sp': gram_matrix = self._sp_kernel_do(self._graphs) - + # for WL edge kernel elif self._base_kernel == 'edge': gram_matrix = self._edge_kernel_do(self._graphs) - + # for user defined base kernel else: gram_matrix = self._user_kernel_do(self._graphs) - + return gram_matrix - - + + def _compute_gm_imap_unordered(self): - if self._verbose >= 2: - import warnings - warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') - return self._compute_gm_series() - - + self._add_dummy_node_labels(self._graphs) + + if self._base_kernel == 'subtree': + gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + + def init_worker(gn_toshare): + global G_gn + G_gn = gn_toshare + do_fun = self._wrapper_pairwise + parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker, + glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose) + return gram_matrix + else: + if self._verbose >= 2: + import warnings + warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') + return self._compute_gm_series() + + def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better. - if self._verbose >= 2: - import warnings - warnings.warn('A part of the computation is parallelized.') - +# if self._verbose >= 2: +# import warnings +# warnings.warn('A part of the computation is parallelized.') + self._add_dummy_node_labels(g_list + [g1]) - + # for WL subtree kernel - if self._base_kernel == 'subtree': + if self._base_kernel == 'subtree': gram_matrix = self._subtree_kernel_do(g_list + [g1]) - + # for WL shortest path kernel elif self._base_kernel == 'sp': gram_matrix = self._sp_kernel_do(g_list + [g1]) - + # for WL edge kernel elif self._base_kernel == 'edge': gram_matrix = self._edge_kernel_do(g_list + [g1]) - + # for user defined base kernel else: gram_matrix = self._user_kernel_do(g_list + [g1]) - + return list(gram_matrix[-1][0:-1]) - - + + def _compute_kernel_list_imap_unordered(self, g1, g_list): - if self._verbose >= 2: - import warnings - warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.') - return self._compute_kernel_list_series(g1, g_list) - - + self._add_dummy_node_labels(g_list + [g1]) + + if self._base_kernel == 'subtree': + kernel_list = [None] * len(g_list) + + def init_worker(g1_toshare, g_list_toshare): + global G_g1, G_g_list + G_g1 = g1_toshare + G_g_list = g_list_toshare + do_fun = self._wrapper_kernel_list_do + def func_assign(result, var_to_assign): + var_to_assign[result[0]] = result[1] + itr = range(len(g_list)) + len_itr = len(g_list) + parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr, + init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered', + n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose) + return kernel_list + else: + if self._verbose >= 2: + import warnings + warnings.warn('This base kernel is not parallelized. The serial computation is used instead.') + return self._compute_kernel_list_series(g1, g_list) + + def _wrapper_kernel_list_do(self, itr): - pass - - + return self._kernel_do_exp(G_g1, G_g_list[itr]) + + def _compute_single_kernel_series(self, g1, g2): # @todo: this should be better. self._add_dummy_node_labels([g1] + [g2]) # for WL subtree kernel - if self._base_kernel == 'subtree': + if self._base_kernel == 'subtree': gram_matrix = self._subtree_kernel_do([g1] + [g2]) - + # for WL shortest path kernel elif self._base_kernel == 'sp': gram_matrix = self._sp_kernel_do([g1] + [g2]) - + # for WL edge kernel elif self._base_kernel == 'edge': gram_matrix = self._edge_kernel_do([g1] + [g2]) - + # for user defined base kernel else: gram_matrix = self._user_kernel_do([g1] + [g2]) - + return gram_matrix[0][1] - - + + + def pairwise_kernel(self, g1, g2): + Gn = [g1, g2] + kernel = 0 + + # initial for height = 0 + all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration + + # for each graph + for G in Gn: + # set all labels into a tuple. + for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. + G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels) + # get the set of original labels + labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) + # number of occurence of each label in G + all_num_of_each_label.append(dict(Counter(labels_ori))) + + # Compute subtree kernel with the 0th iteration and add it to the final kernel. + kernel = self._compute_kernel_itr(kernel, all_num_of_each_label) + + # iterate each height + for h in range(1, self._height + 1): + all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration + num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs + # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration + all_num_of_each_label = [] # number of occurence of each label in G + + # @todo: parallel this part. + for idx, G in enumerate(Gn): + + all_multisets = [] + for node, attrs in G.nodes(data=True): + # Multiset-label determination. + multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]] + # sorting each multiset + multiset.sort() + multiset = [attrs['label_tuple']] + multiset # add the prefix + all_multisets.append(tuple(multiset)) + + # label compression + set_unique = list(set(all_multisets)) # set of unique multiset labels + # a dictionary mapping original labels to new ones. + set_compressed = {} + # if a label occured before, assign its former compressed label, + # else assign the number of labels occured + 1 as the compressed label. + for value in set_unique: + if value in all_set_compressed.keys(): + set_compressed.update({value: all_set_compressed[value]}) + else: + set_compressed.update({value: str(num_of_labels_occured + 1)}) + num_of_labels_occured += 1 + + all_set_compressed.update(set_compressed) + + # relabel nodes + for idx, node in enumerate(G.nodes()): + G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]] + + # get the set of compressed labels + labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values()) + # all_labels_ori.update(labels_comp) + all_num_of_each_label.append(dict(Counter(labels_comp))) + + # Compute subtree kernel with h iterations and add it to the final kernel + kernel = self._compute_kernel_itr(kernel, all_num_of_each_label) + + return kernel + + + def _wrapper_pairwise(self, itr): + i = itr[0] + j = itr[1] + return i, j, self.pairwise_kernel(G_gn[i], G_gn[j]) + + + def _compute_kernel_itr(kernel, all_num_of_each_label): + labels = set(list(all_num_of_each_label[0].keys()) + + list(all_num_of_each_label[1].keys())) + vector1 = np.array([(all_num_of_each_label[0][label] + if (label in all_num_of_each_label[0].keys()) else 0) + for label in labels]) + vector2 = np.array([(all_num_of_each_label[1][label] + if (label in all_num_of_each_label[1].keys()) else 0) + for label in labels]) + kernel += np.dot(vector1, vector2) + return kernel + + def _subtree_kernel_do(self, Gn): """Compute Weisfeiler-Lehman kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are computed. - + List of graphs between which the kernels are computed. + Return ------ gram_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ gram_matrix = np.zeros((len(Gn), len(Gn))) - + # initial for height = 0 all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration - + # for each graph for G in Gn: # set all labels into a tuple. @@ -151,112 +271,112 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values()) # number of occurence of each label in G all_num_of_each_label.append(dict(Counter(labels_ori))) - + # Compute subtree kernel with the 0th iteration and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn) - + self._compute_gram_itr(gram_matrix, all_num_of_each_label) + # iterate each height for h in range(1, self._height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration all_num_of_each_label = [] # number of occurence of each label in G - + # @todo: parallel this part. for idx, G in enumerate(Gn): - + all_multisets = [] for node, attrs in G.nodes(data=True): # Multiset-label determination. multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]] # sorting each multiset multiset.sort() - multiset = [attrs['label_tuple']] + multiset # add the prefix + multiset = [attrs['label_tuple']] + multiset # add the prefix all_multisets.append(tuple(multiset)) - + # label compression set_unique = list(set(all_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. + # a dictionary mapping original labels to new ones. set_compressed = {} - # if a label occured before, assign its former compressed label, - # else assign the number of labels occured + 1 as the compressed label. + # if a label occured before, assign its former compressed label, + # else assign the number of labels occured + 1 as the compressed label. for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({value: all_set_compressed[value]}) else: set_compressed.update({value: str(num_of_labels_occured + 1)}) num_of_labels_occured += 1 - + all_set_compressed.update(set_compressed) - + # relabel nodes for idx, node in enumerate(G.nodes()): G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]] - + # get the set of compressed labels labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values()) # all_labels_ori.update(labels_comp) all_num_of_each_label.append(dict(Counter(labels_comp))) - + # Compute subtree kernel with h iterations and add it to the final kernel - self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn) - + self._compute_gram_itr(gram_matrix, all_num_of_each_label) + return gram_matrix - - def _compute_gram_itr(self, gram_matrix, all_num_of_each_label, Gn): + + def _compute_gram_itr(self, gram_matrix, all_num_of_each_label): """Compute Gram matrix using the base kernel. """ - if self._parallel == 'imap_unordered': - # compute kernels. - def init_worker(alllabels_toshare): - global G_alllabels - G_alllabels = alllabels_toshare - do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix) - parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, - glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose) - elif self._parallel is None: - for i in range(len(gram_matrix)): - for j in range(i, len(gram_matrix)): - gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], - all_num_of_each_label[j], gram_matrix[i][j]) - gram_matrix[j][i] = gram_matrix[i][j] - - +# if self._parallel == 'imap_unordered': +# # compute kernels. +# def init_worker(alllabels_toshare): +# global G_alllabels +# G_alllabels = alllabels_toshare +# do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix) +# parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, +# glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose) +# elif self._parallel is None: + for i in range(len(gram_matrix)): + for j in range(i, len(gram_matrix)): + gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i], + all_num_of_each_label[j], gram_matrix[i][j]) + gram_matrix[j][i] = gram_matrix[i][j] + + def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel): """Compute the subtree kernel. """ labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys())) - vector1 = np.array([(num_of_each_label1[label] - if (label in num_of_each_label1.keys()) else 0) + vector1 = np.array([(num_of_each_label1[label] + if (label in num_of_each_label1.keys()) else 0) for label in labels]) - vector2 = np.array([(num_of_each_label2[label] - if (label in num_of_each_label2.keys()) else 0) + vector2 = np.array([(num_of_each_label2[label] + if (label in num_of_each_label2.keys()) else 0) for label in labels]) kernel += np.dot(vector1, vector2) return kernel - - - def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): - i = itr[0] - j = itr[1] - return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) - - + + +# def _wrapper_compute_subtree_kernel(self, gram_matrix, itr): +# i = itr[0] +# j = itr[1] +# return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j]) + + def _wl_spkernel_do(Gn, node_label, edge_label, height): """Compute Weisfeiler-Lehman shortest path kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are computed. + List of graphs between which the kernels are computed. node_label : string - node attribute used as label. + node attribute used as label. edge_label : string - edge attribute used as label. + edge attribute used as label. height : int subtree height. - + Return ------ gram_matrix : Numpy matrix @@ -264,22 +384,22 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge """ pass from gklearn.utils.utils import getSPGraph - + # init. height = int(height) gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel - + Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn - + # initial for height = 0 for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): + for e2 in Gn[j].edges(data = True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] - + # iterate each height for h in range(1, height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration @@ -291,193 +411,193 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] # sorting each multiset multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + # label compression set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. + # a dictionary mapping original labels to new ones. set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({ value : all_set_compressed[value] }) else: set_compressed.update({ value : str(num_of_labels_occured + 1) }) num_of_labels_occured += 1 - + all_set_compressed.update(set_compressed) - + # relabel nodes for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): + for e2 in Gn[j].edges(data = True): if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] - + return gram_matrix - - - + + + def _wl_edgekernel_do(Gn, node_label, edge_label, height): """Compute Weisfeiler-Lehman edge kernels between graphs. - + Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are computed. + List of graphs between which the kernels are computed. node_label : string - node attribute used as label. + node attribute used as label. edge_label : string - edge attribute used as label. + edge attribute used as label. height : int subtree height. - + Return ------ gram_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ + """ pass # init. height = int(height) gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel - + # initial for height = 0 for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): + for e2 in Gn[j].edges(data = True): if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] - + # iterate each height for h in range(1, height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs for G in Gn: # for each graph - set_multisets = [] + set_multisets = [] for node in G.nodes(data = True): # Multiset-label determination. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] # sorting each multiset multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + # label compression set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. + # a dictionary mapping original labels to new ones. set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({ value : all_set_compressed[value] }) else: set_compressed.update({ value : str(num_of_labels_occured + 1) }) num_of_labels_occured += 1 - + all_set_compressed.update(set_compressed) - + # relabel nodes for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - + # Compute subtree kernel with h iterations and add it to the final kernel for i in range(0, len(Gn)): for j in range(i, len(Gn)): for e1 in Gn[i].edges(data = True): - for e2 in Gn[j].edges(data = True): + for e2 in Gn[j].edges(data = True): if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])): gram_matrix[i][j] += 1 gram_matrix[j][i] = gram_matrix[i][j] - + return gram_matrix - - + + def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel): """Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs. - + Parameters ---------- Gn : List of NetworkX graph - List of graphs between which the kernels are computed. + List of graphs between which the kernels are computed. node_label : string - node attribute used as label. + node attribute used as label. edge_label : string - edge attribute used as label. + edge attribute used as label. height : int subtree height. base_kernel : string Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs. - + Return ------ gram_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. - """ + """ pass # init. height = int(height) gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel - + # initial for height = 0 gram_matrix = base_kernel(Gn, node_label, edge_label) - + # iterate each height for h in range(1, height + 1): all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs for G in Gn: # for each graph - set_multisets = [] + set_multisets = [] for node in G.nodes(data = True): # Multiset-label determination. multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ] # sorting each multiset multiset.sort() - multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix - set_multisets.append(multiset) - + multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix + set_multisets.append(multiset) + # label compression set_unique = list(set(set_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. + # a dictionary mapping original labels to new ones. set_compressed = {} - # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label + # if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label for value in set_unique: if value in all_set_compressed.keys(): set_compressed.update({ value : all_set_compressed[value] }) else: set_compressed.update({ value : str(num_of_labels_occured + 1) }) num_of_labels_occured += 1 - + all_set_compressed.update(set_compressed) - + # relabel nodes for node in G.nodes(data = True): node[1][node_label] = set_compressed[set_multisets[node[0]]] - + # Compute kernel with h iterations and add it to the final kernel gram_matrix += base_kernel(Gn, node_label, edge_label) - + return gram_matrix - - + + def _add_dummy_node_labels(self, Gn): if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY): for i in range(len(Gn)): nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY) self._node_labels = [SpecialLabel.DUMMY] - - + + class WLSubtree(WeisfeilerLehman): - + def __init__(self, **kwargs): kwargs['base_kernel'] = 'subtree' super().__init__(**kwargs) \ No newline at end of file diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py index 40f1d51..021b1cc 100644 --- a/gklearn/tests/test_graph_kernels.py +++ b/gklearn/tests/test_graph_kernels.py @@ -544,3 +544,4 @@ if __name__ == "__main__": # test_RandomWalk('Acyclic', 'fp', None, None) # test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered') # test_CommonWalk('Alkane', 0.01, 'geo') +# test_ShortestPath('Acyclic')