diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py index 1db38b3..c7a5718 100644 --- a/gklearn/kernels/graph_kernel.py +++ b/gklearn/kernels/graph_kernel.py @@ -32,7 +32,13 @@ class GraphKernel(BaseEstimator): #, ABC): https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel. """ - def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2): + def __init__(self, + parallel=None, + n_jobs=None, + chunksize=None, + normalize=True, + copy_graphs=True, # make sure it is a full deep copy. and faster! + verbose=2): """`__init__` for `GraphKernel` object.""" # @todo: the default settings of the parameters are different from those in the self.compute method. # self._graphs = None @@ -40,6 +46,7 @@ class GraphKernel(BaseEstimator): #, ABC): self.n_jobs = n_jobs self.chunksize = chunksize self.normalize = normalize + self.copy_graphs = copy_graphs self.verbose = verbose # self._run_time = 0 # self._gram_matrix = None @@ -90,7 +97,7 @@ class GraphKernel(BaseEstimator): #, ABC): return self - def transform(self, X): + def transform(self, X=None, load_gm_train=False): """Compute the graph kernel matrix between given and fitted data. Parameters @@ -108,6 +115,12 @@ class GraphKernel(BaseEstimator): #, ABC): None. """ + # If `load_gm_train`, load Gram matrix of training data. + if load_gm_train: + check_is_fitted(self, '_gm_train') + self._is_transformed = True + return self._gm_train # @todo: copy or not? + # Check if method "fit" had been called. check_is_fitted(self, '_graphs') @@ -133,8 +146,7 @@ class GraphKernel(BaseEstimator): #, ABC): return kernel_matrix - - def fit_transform(self, X): + def fit_transform(self, X, save_gm_train=False): """Fit and transform: compute Gram matrix on the same data. Parameters @@ -164,6 +176,9 @@ class GraphKernel(BaseEstimator): #, ABC): finally: np.seterr(**old_settings) + if save_gm_train: + self._gm_train = gram_matrix + return gram_matrix @@ -260,7 +275,9 @@ class GraphKernel(BaseEstimator): #, ABC): kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y) elif self.parallel is None: - kernel_matrix = self._compute_kernel_matrix_series(Y) + Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y) + graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs) + kernel_matrix = self._compute_kernel_matrix_series(Y_copy, graphs_copy) self._run_time = time.time() - start_time if self.verbose: @@ -270,26 +287,25 @@ class GraphKernel(BaseEstimator): #, ABC): return kernel_matrix - def _compute_kernel_matrix_series(self, Y): - """Compute the kernel matrix between a given target graphs (Y) and - the fitted graphs (X / self._graphs) without parallelization. + def _compute_kernel_matrix_series(self, X, Y): + """Compute the kernel matrix between two sets of graphs (X and Y) without parallelization. Parameters ---------- - Y : list of graphs, optional - The target graphs. + X, Y : list of graphs + The input graphs. Returns ------- - kernel_matrix : numpy array, shape = [n_targets, n_inputs] + kernel_matrix : numpy array, shape = [n_X, n_Y] The computed kernel matrix. """ - kernel_matrix = np.zeros((len(Y), len(self._graphs))) + kernel_matrix = np.zeros((len(X), len(Y))) - for i_y, g_y in enumerate(Y): - for i_x, g_x in enumerate(self._graphs): - kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x) + for i_x, g_x in enumerate(X): + for i_y, g_y in enumerate(Y): + kernel_matrix[i_x, i_y] = self.pairwise_kernel(g_x, g_y) return kernel_matrix @@ -335,14 +351,16 @@ class GraphKernel(BaseEstimator): #, ABC): except NotFittedError: # Compute diagonals of X. self._X_diag = np.empty(shape=(len(self._graphs),)) - for i, x in enumerate(self._graphs): + graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs) + for i, x in enumerate(graphs): self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel? try: # If transform has happened, return both diagonals. check_is_fitted(self, ['_Y']) self._Y_diag = np.empty(shape=(len(self._Y),)) - for (i, y) in enumerate(self._Y): + Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y) + for (i, y) in enumerate(Y): self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel? return self._X_diag, self._Y_diag @@ -484,7 +502,8 @@ class GraphKernel(BaseEstimator): #, ABC): if self.parallel == 'imap_unordered': gram_matrix = self._compute_gm_imap_unordered() elif self.parallel is None: - gram_matrix = self._compute_gm_series() + graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs) + gram_matrix = self._compute_gm_series(graphs) else: raise Exception('Parallel mode is not set correctly.') @@ -496,11 +515,11 @@ class GraphKernel(BaseEstimator): #, ABC): return gram_matrix - def _compute_gm_series(self): + def _compute_gm_series(self, graphs): pass - def _compute_gm_imap_unordered(self): + def _compute_gm_imap_unordered(self, graphs): pass diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py index e42142b..c981bdf 100644 --- a/gklearn/kernels/treelet.py +++ b/gklearn/kernels/treelet.py @@ -28,16 +28,16 @@ from gklearn.kernels import GraphKernel class Treelet(GraphKernel): - def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs): + def __init__(self, **kwargs): """Initialise a treelet kernel. """ - super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose) + GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs}) self.node_labels = kwargs.get('node_labels', []) self.edge_labels = kwargs.get('edge_labels', []) self.sub_kernel = kwargs.get('sub_kernel', None) self.ds_infos = kwargs.get('ds_infos', {}) - self.precompute_canonkeys = precompute_canonkeys - self.save_canonkeys = save_canonkeys + self.precompute_canonkeys = kwargs.get('precompute_canonkeys', True) + self.save_canonkeys = kwargs.get('save_canonkeys', True) ########################################################################## @@ -71,7 +71,7 @@ class Treelet(GraphKernel): raise ValueError('Sub-kernel not set.') - def _compute_kernel_matrix_series(self, Y): + def _compute_kernel_matrix_series(self, Y, X=None, load_canonkeys=True): """Compute the kernel matrix between a given target graphs (Y) and the fitted graphs (X / self._graphs) without parallelization. @@ -86,36 +86,45 @@ class Treelet(GraphKernel): The computed kernel matrix. """ + if_comp_X_canonkeys = True + + # if load saved canonkeys of X from the instance: + if load_canonkeys: + # Canonical keys for self._graphs. + try: + check_is_fitted(self, ['_canonkeys']) + canonkeys_list1 = self._canonkeys + if_comp_X_canonkeys = False + except NotFittedError: + import warnings + warnings.warn('The canonkeys of self._graphs are not computed/saved. The keys of `X` is computed instead.') + if_comp_X_canonkeys = True - # self._add_dummy_labels will modify the input in place. - self._add_dummy_labels() # For self._graphs -# Y = [g.copy() for g in Y] # @todo: ? - self._add_dummy_labels(Y) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. - # Canonical keys for self._graphs. - try: - check_is_fitted(self, ['_canonkeys']) - canonkeys_list1 = self._canonkeys - except NotFittedError: + # Compute the canonical keys of X. + if if_comp_X_canonkeys: + if X is None: + raise('X can not be None.') + # self._add_dummy_labels will modify the input in place. + self._add_dummy_labels(X) # for X canonkeys_list1 = [] - iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2)) + iterator = get_iters(self._graphs, desc='Getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2)) for g in iterator: canonkeys_list1.append(self._get_canonkeys(g)) - if self.save_canonkeys: - self._canonkeys = canonkeys_list1 - # Canonical keys for Y. +# Y = [g.copy() for g in Y] # @todo: ? + self._add_dummy_labels(Y) canonkeys_list2 = [] - iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2)) + iterator = get_iters(Y, desc='Getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2)) for g in iterator: canonkeys_list2.append(self._get_canonkeys(g)) - if self.save_canonkeys: - self._Y_canonkeys = canonkeys_list2 +# if self.save_canonkeys: +# self._Y_canonkeys = canonkeys_list2 # compute kernel matrix. kernel_matrix = np.zeros((len(Y), len(canonkeys_list1))) @@ -235,13 +244,13 @@ class Treelet(GraphKernel): ########################################################################## - def _compute_gm_series(self): - self._add_dummy_labels(self._graphs) + def _compute_gm_series(self, graphs): + self._add_dummy_labels(graphs) # get all canonical keys of all graphs before computing kernels to save # time, but this may cost a lot of memory for large dataset. canonkeys = [] - iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout, + iterator = get_iters(graphs, desc='getting canonkeys', file=sys.stdout, verbose=(self.verbose >= 2)) for g in iterator: canonkeys.append(self._get_canonkeys(g)) @@ -250,11 +259,11 @@ class Treelet(GraphKernel): self._canonkeys = canonkeys # compute Gram matrix. - gram_matrix = np.zeros((len(self._graphs), len(self._graphs))) + gram_matrix = np.zeros((len(graphs), len(graphs))) from itertools import combinations_with_replacement - itr = combinations_with_replacement(range(0, len(self._graphs)), 2) - len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2) + itr = combinations_with_replacement(range(0, len(graphs)), 2) + len_itr = int(len(graphs) * (len(graphs) + 1) / 2) iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2)) for i, j in iterator: @@ -390,6 +399,9 @@ class Treelet(GraphKernel): Treelet kernel between 2 graphs. """ keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs + if len(keys) == 0: # There is nothing in common... + return 0 + vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py index f02926e..905b31f 100644 --- a/gklearn/kernels/weisfeiler_lehman.py +++ b/gklearn/kernels/weisfeiler_lehman.py @@ -28,7 +28,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. def __init__(self, **kwargs): - GraphKernel.__init__(self) + GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs}) self.node_labels = kwargs.get('node_labels', []) self.edge_labels = kwargs.get('edge_labels', []) self.height = int(kwargs.get('height', 0)) @@ -50,7 +50,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. ########################################################################## - def _compute_gm_series(self): + def _compute_gm_series(self, graphs): # if self.verbose >= 2: # import warnings # warnings.warn('A part of the computation is parallelized.') @@ -59,19 +59,19 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. # for WL subtree kernel if self._base_kernel == 'subtree': - gram_matrix = self._subtree_kernel_do(self._graphs) + gram_matrix = self._subtree_kernel_do(graphs) # for WL shortest path kernel elif self._base_kernel == 'sp': - gram_matrix = self._sp_kernel_do(self._graphs) + gram_matrix = self._sp_kernel_do(graphs) # for WL edge kernel elif self._base_kernel == 'edge': - gram_matrix = self._edge_kernel_do(self._graphs) + gram_matrix = self._edge_kernel_do(graphs) # for user defined base kernel else: - gram_matrix = self._user_kernel_do(self._graphs) + gram_matrix = self._user_kernel_do(graphs) return gram_matrix @@ -204,70 +204,13 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. def pairwise_kernel(self, g1, g2): - Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster! - kernel = 0 - - # initial for height = 0 - all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration - - # for each graph - for G in Gn: - # set all labels into a tuple. - for nd, attrs in G.nodes(data=True): # @todo: there may be a better way. - G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels) - # get the set of original labels - labels_ori = list(nx.get_node_attributes(G, 'lt').values()) - # number of occurence of each label in G - all_num_of_each_label.append(dict(Counter(labels_ori))) - - # Compute subtree kernel with the 0th iteration and add it to the final kernel. - kernel = self._compute_kernel_itr(kernel, all_num_of_each_label) - - # iterate each height - for h in range(1, self.height + 1): - all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration - num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs - # all_labels_ori = set() # all unique orignal labels in all graphs in this iteration - all_num_of_each_label = [] # number of occurence of each label in G - - # @todo: parallel this part. - for G in Gn: - - all_multisets = [] - for node, attrs in G.nodes(data=True): - # Multiset-label determination. - multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]] - # sorting each multiset - multiset.sort() - multiset = [attrs['lt']] + multiset # add the prefix - all_multisets.append(tuple(multiset)) - - # label compression - set_unique = list(set(all_multisets)) # set of unique multiset labels - # a dictionary mapping original labels to new ones. - set_compressed = {} - # if a label occured before, assign its former compressed label, - # else assign the number of labels occured + 1 as the compressed label. - for value in set_unique: - if value in all_set_compressed.keys(): - set_compressed[value] = all_set_compressed[value] - else: - set_compressed[value] = str(num_of_labels_occured + 1) - num_of_labels_occured += 1 - - all_set_compressed.update(set_compressed) - - # relabel nodes - for idx, node in enumerate(G.nodes()): - G.nodes[node]['lt'] = set_compressed[all_multisets[idx]] - - # get the set of compressed labels - labels_comp = list(nx.get_node_attributes(G, 'lt').values()) - # all_labels_ori.update(labels_comp) - all_num_of_each_label.append(dict(Counter(labels_comp))) +# Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster! + Gn = [g1, g2] + # for WL subtree kernel + if self._base_kernel == 'subtree': + kernel = self._subtree_kernel_do(Gn, return_mat=False) - # Compute subtree kernel with h iterations and add it to the final kernel - kernel = self._compute_kernel_itr(kernel, all_num_of_each_label) + # @todo: other subkernels. return kernel @@ -291,7 +234,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. return kernel - def _subtree_kernel_do_nl(self, Gn): + def _subtree_kernel_do_nl(self, Gn, return_mat=True): """Compute Weisfeiler-Lehman kernels between graphs with node labels. Parameters @@ -301,10 +244,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. Return ------ - gram_matrix : Numpy matrix + kernel_matrix : Numpy matrix / float Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - gram_matrix = np.zeros((len(Gn), len(Gn))) + kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0) + gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr) # initial for height = 0 all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration @@ -324,7 +268,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. all_num_of_each_label.append(dict(Counter(labels_ori))) # Compute subtree kernel with the 0th iteration and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) # iterate each height for h in range(1, self.height + 1): @@ -342,12 +286,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) - return gram_matrix + return kernel_matrix - def _subtree_kernel_do_el(self, Gn): + def _subtree_kernel_do_el(self, Gn, return_mat=True): """Compute Weisfeiler-Lehman kernels between graphs with edge labels. Parameters @@ -357,19 +301,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. Return ------ - gram_matrix : Numpy matrix + kernel_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - gram_matrix = np.zeros((len(Gn), len(Gn))) + kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0) + gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr) # initial for height = 0 all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration # Compute subtree kernel with the 0th iteration and add it to the final kernel. - iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2) - for i, j in iterator: - gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j]) - gram_matrix[j][i] = gram_matrix[i][j] + iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2) + for i, j in iterator: # @todo: not correct if return_mat == False. + kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j]) + kernel_matrix[j][i] = kernel_matrix[i][j] # if h >= 1. @@ -393,7 +338,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) # Iterate along heights (>= 2). @@ -407,12 +352,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) - return gram_matrix + return kernel_matrix - def _subtree_kernel_do_labeled(self, Gn): + def _subtree_kernel_do_labeled(self, Gn, return_mat=True): """Compute Weisfeiler-Lehman kernels between graphs with both node and edge labels. @@ -423,10 +368,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. Return ------ - gram_matrix : Numpy matrix + kernel_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - gram_matrix = np.zeros((len(Gn), len(Gn))) + kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0) + gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr) # initial for height = 0 all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration @@ -446,10 +392,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. all_num_of_each_label.append(dict(Counter(labels_ori))) # Compute subtree kernel with the 0th iteration and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) - # if h >= 1. + # if h >= 1: if self.height > 0: # Set all edge labels into a tuple. # @todo: remove this original labels or not? if self.verbose >= 2: @@ -470,7 +416,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) # Iterate along heights. @@ -484,12 +430,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) - return gram_matrix + return kernel_matrix - def _subtree_kernel_do_unlabeled(self, Gn): + def _subtree_kernel_do_unlabeled(self, Gn, return_mat=True): """Compute Weisfeiler-Lehman kernels between graphs without labels. Parameters @@ -499,19 +445,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. Return ------ - gram_matrix : Numpy matrix + kernel_matrix : Numpy matrix Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. """ - gram_matrix = np.zeros((len(Gn), len(Gn))) + kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0) + gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr) # initial for height = 0 all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration # Compute subtree kernel with the 0th iteration and add it to the final kernel. - iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2) - for i, j in iterator: - gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j]) - gram_matrix[j][i] = gram_matrix[i][j] + iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2) + for i, j in iterator: # @todo: not correct if return_mat == False. + kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j]) + kernel_matrix[j][i] = kernel_matrix[i][j] # if h >= 1. @@ -526,7 +473,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) # Iterate along heights (>= 2). @@ -540,9 +487,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) # Compute subtree kernel with h iterations and add it to the final kernel. - self._compute_gram_itr(gram_matrix, all_num_of_each_label) + kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label) - return gram_matrix + return kernel_matrix def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured): @@ -717,6 +664,8 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel. all_num_of_each_label[j]) gram_matrix[j][i] = gram_matrix[i][j] + return gram_matrix + def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2): """Compute the subtree kernel. diff --git a/gklearn/utils/kernels.py b/gklearn/utils/kernels.py index 182668b..1e1ea52 100644 --- a/gklearn/utils/kernels.py +++ b/gklearn/utils/kernels.py @@ -68,6 +68,11 @@ def gaussian_kernel(x, y, gamma=None): return np.exp((np.sum(np.subtract(x, y) ** 2)) * -gamma) +def tanimoto_kernel(x, y): + xy = np.dot(x, y) + return xy / (np.dot(x, x) + np.dot(y, y) - xy) + + def gaussiankernel(x, y, gamma=None): return gaussian_kernel(x, y, gamma=gamma)