Browse Source

[Features][API Changes] Update kernel classes.

v0.2.x
jajupmochi 3 years ago
parent
commit
a76335ed16
4 changed files with 135 additions and 150 deletions
  1. +39
    -20
      gklearn/kernels/graph_kernel.py
  2. +39
    -27
      gklearn/kernels/treelet.py
  3. +52
    -103
      gklearn/kernels/weisfeiler_lehman.py
  4. +5
    -0
      gklearn/utils/kernels.py

+ 39
- 20
gklearn/kernels/graph_kernel.py View File

@@ -32,7 +32,13 @@ class GraphKernel(BaseEstimator): #, ABC):
https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
"""

def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2):
def __init__(self,
parallel=None,
n_jobs=None,
chunksize=None,
normalize=True,
copy_graphs=True, # make sure it is a full deep copy. and faster!
verbose=2):
"""`__init__` for `GraphKernel` object."""
# @todo: the default settings of the parameters are different from those in the self.compute method.
# self._graphs = None
@@ -40,6 +46,7 @@ class GraphKernel(BaseEstimator): #, ABC):
self.n_jobs = n_jobs
self.chunksize = chunksize
self.normalize = normalize
self.copy_graphs = copy_graphs
self.verbose = verbose
# self._run_time = 0
# self._gram_matrix = None
@@ -90,7 +97,7 @@ class GraphKernel(BaseEstimator): #, ABC):
return self


def transform(self, X):
def transform(self, X=None, load_gm_train=False):
"""Compute the graph kernel matrix between given and fitted data.

Parameters
@@ -108,6 +115,12 @@ class GraphKernel(BaseEstimator): #, ABC):
None.

"""
# If `load_gm_train`, load Gram matrix of training data.
if load_gm_train:
check_is_fitted(self, '_gm_train')
self._is_transformed = True
return self._gm_train # @todo: copy or not?

# Check if method "fit" had been called.
check_is_fitted(self, '_graphs')

@@ -133,8 +146,7 @@ class GraphKernel(BaseEstimator): #, ABC):
return kernel_matrix



def fit_transform(self, X):
def fit_transform(self, X, save_gm_train=False):
"""Fit and transform: compute Gram matrix on the same data.

Parameters
@@ -164,6 +176,9 @@ class GraphKernel(BaseEstimator): #, ABC):
finally:
np.seterr(**old_settings)

if save_gm_train:
self._gm_train = gram_matrix

return gram_matrix


@@ -260,7 +275,9 @@ class GraphKernel(BaseEstimator): #, ABC):
kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y)

elif self.parallel is None:
kernel_matrix = self._compute_kernel_matrix_series(Y)
Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
kernel_matrix = self._compute_kernel_matrix_series(Y_copy, graphs_copy)

self._run_time = time.time() - start_time
if self.verbose:
@@ -270,26 +287,25 @@ class GraphKernel(BaseEstimator): #, ABC):
return kernel_matrix


def _compute_kernel_matrix_series(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization.
def _compute_kernel_matrix_series(self, X, Y):
"""Compute the kernel matrix between two sets of graphs (X and Y) without parallelization.

Parameters
----------
Y : list of graphs, optional
The target graphs.
X, Y : list of graphs
The input graphs.

Returns
-------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
kernel_matrix : numpy array, shape = [n_X, n_Y]
The computed kernel matrix.

"""
kernel_matrix = np.zeros((len(Y), len(self._graphs)))
kernel_matrix = np.zeros((len(X), len(Y)))

for i_y, g_y in enumerate(Y):
for i_x, g_x in enumerate(self._graphs):
kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x)
for i_x, g_x in enumerate(X):
for i_y, g_y in enumerate(Y):
kernel_matrix[i_x, i_y] = self.pairwise_kernel(g_x, g_y)

return kernel_matrix

@@ -335,14 +351,16 @@ class GraphKernel(BaseEstimator): #, ABC):
except NotFittedError:
# Compute diagonals of X.
self._X_diag = np.empty(shape=(len(self._graphs),))
for i, x in enumerate(self._graphs):
graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
for i, x in enumerate(graphs):
self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?

try:
# If transform has happened, return both diagonals.
check_is_fitted(self, ['_Y'])
self._Y_diag = np.empty(shape=(len(self._Y),))
for (i, y) in enumerate(self._Y):
Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
for (i, y) in enumerate(Y):
self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?

return self._X_diag, self._Y_diag
@@ -484,7 +502,8 @@ class GraphKernel(BaseEstimator): #, ABC):
if self.parallel == 'imap_unordered':
gram_matrix = self._compute_gm_imap_unordered()
elif self.parallel is None:
gram_matrix = self._compute_gm_series()
graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
gram_matrix = self._compute_gm_series(graphs)
else:
raise Exception('Parallel mode is not set correctly.')

@@ -496,11 +515,11 @@ class GraphKernel(BaseEstimator): #, ABC):
return gram_matrix


def _compute_gm_series(self):
def _compute_gm_series(self, graphs):
pass


def _compute_gm_imap_unordered(self):
def _compute_gm_imap_unordered(self, graphs):
pass




+ 39
- 27
gklearn/kernels/treelet.py View File

@@ -28,16 +28,16 @@ from gklearn.kernels import GraphKernel

class Treelet(GraphKernel):

def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs):
def __init__(self, **kwargs):
"""Initialise a treelet kernel.
"""
super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose)
GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
self.node_labels = kwargs.get('node_labels', [])
self.edge_labels = kwargs.get('edge_labels', [])
self.sub_kernel = kwargs.get('sub_kernel', None)
self.ds_infos = kwargs.get('ds_infos', {})
self.precompute_canonkeys = precompute_canonkeys
self.save_canonkeys = save_canonkeys
self.precompute_canonkeys = kwargs.get('precompute_canonkeys', True)
self.save_canonkeys = kwargs.get('save_canonkeys', True)


##########################################################################
@@ -71,7 +71,7 @@ class Treelet(GraphKernel):
raise ValueError('Sub-kernel not set.')


def _compute_kernel_matrix_series(self, Y):
def _compute_kernel_matrix_series(self, Y, X=None, load_canonkeys=True):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization.

@@ -86,36 +86,45 @@ class Treelet(GraphKernel):
The computed kernel matrix.

"""
if_comp_X_canonkeys = True

# if load saved canonkeys of X from the instance:
if load_canonkeys:
# Canonical keys for self._graphs.
try:
check_is_fitted(self, ['_canonkeys'])
canonkeys_list1 = self._canonkeys
if_comp_X_canonkeys = False
except NotFittedError:
import warnings
warnings.warn('The canonkeys of self._graphs are not computed/saved. The keys of `X` is computed instead.')
if_comp_X_canonkeys = True

# self._add_dummy_labels will modify the input in place.
self._add_dummy_labels() # For self._graphs
# Y = [g.copy() for g in Y] # @todo: ?
self._add_dummy_labels(Y)

# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.

# Canonical keys for self._graphs.
try:
check_is_fitted(self, ['_canonkeys'])
canonkeys_list1 = self._canonkeys
except NotFittedError:
# Compute the canonical keys of X.
if if_comp_X_canonkeys:
if X is None:
raise('X can not be None.')
# self._add_dummy_labels will modify the input in place.
self._add_dummy_labels(X) # for X
canonkeys_list1 = []
iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
iterator = get_iters(self._graphs, desc='Getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator:
canonkeys_list1.append(self._get_canonkeys(g))

if self.save_canonkeys:
self._canonkeys = canonkeys_list1

# Canonical keys for Y.
# Y = [g.copy() for g in Y] # @todo: ?
self._add_dummy_labels(Y)
canonkeys_list2 = []
iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
iterator = get_iters(Y, desc='Getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator:
canonkeys_list2.append(self._get_canonkeys(g))

if self.save_canonkeys:
self._Y_canonkeys = canonkeys_list2
# if self.save_canonkeys:
# self._Y_canonkeys = canonkeys_list2

# compute kernel matrix.
kernel_matrix = np.zeros((len(Y), len(canonkeys_list1)))
@@ -235,13 +244,13 @@ class Treelet(GraphKernel):
##########################################################################


def _compute_gm_series(self):
self._add_dummy_labels(self._graphs)
def _compute_gm_series(self, graphs):
self._add_dummy_labels(graphs)

# get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset.
canonkeys = []
iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout,
iterator = get_iters(graphs, desc='getting canonkeys', file=sys.stdout,
verbose=(self.verbose >= 2))
for g in iterator:
canonkeys.append(self._get_canonkeys(g))
@@ -250,11 +259,11 @@ class Treelet(GraphKernel):
self._canonkeys = canonkeys

# compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
gram_matrix = np.zeros((len(graphs), len(graphs)))

from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
itr = combinations_with_replacement(range(0, len(graphs)), 2)
len_itr = int(len(graphs) * (len(graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self.verbose >= 2))
for i, j in iterator:
@@ -390,6 +399,9 @@ class Treelet(GraphKernel):
Treelet kernel between 2 graphs.
"""
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
if len(keys) == 0: # There is nothing in common...
return 0

vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])



+ 52
- 103
gklearn/kernels/weisfeiler_lehman.py View File

@@ -28,7 +28,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


def __init__(self, **kwargs):
GraphKernel.__init__(self)
GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
self.node_labels = kwargs.get('node_labels', [])
self.edge_labels = kwargs.get('edge_labels', [])
self.height = int(kwargs.get('height', 0))
@@ -50,7 +50,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
##########################################################################


def _compute_gm_series(self):
def _compute_gm_series(self, graphs):
# if self.verbose >= 2:
# import warnings
# warnings.warn('A part of the computation is parallelized.')
@@ -59,19 +59,19 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.

# for WL subtree kernel
if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(self._graphs)
gram_matrix = self._subtree_kernel_do(graphs)

# for WL shortest path kernel
elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(self._graphs)
gram_matrix = self._sp_kernel_do(graphs)

# for WL edge kernel
elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(self._graphs)
gram_matrix = self._edge_kernel_do(graphs)

# for user defined base kernel
else:
gram_matrix = self._user_kernel_do(self._graphs)
gram_matrix = self._user_kernel_do(graphs)

return gram_matrix

@@ -204,70 +204,13 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


def pairwise_kernel(self, g1, g2):
Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
kernel = 0

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

# for each graph
for G in Gn:
# set all labels into a tuple.
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
# get the set of original labels
labels_ori = list(nx.get_node_attributes(G, 'lt').values())
# number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori)))

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)

# iterate each height
for h in range(1, self.height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
all_num_of_each_label = [] # number of occurence of each label in G

# @todo: parallel this part.
for G in Gn:

all_multisets = []
for node, attrs in G.nodes(data=True):
# Multiset-label determination.
multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]]
# sorting each multiset
multiset.sort()
multiset = [attrs['lt']] + multiset # add the prefix
all_multisets.append(tuple(multiset))

# label compression
set_unique = list(set(all_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label,
# else assign the number of labels occured + 1 as the compressed label.
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed[value] = all_set_compressed[value]
else:
set_compressed[value] = str(num_of_labels_occured + 1)
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)

# relabel nodes
for idx, node in enumerate(G.nodes()):
G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

# get the set of compressed labels
labels_comp = list(nx.get_node_attributes(G, 'lt').values())
# all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp)))
# Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
Gn = [g1, g2]
# for WL subtree kernel
if self._base_kernel == 'subtree':
kernel = self._subtree_kernel_do(Gn, return_mat=False)

# Compute subtree kernel with h iterations and add it to the final kernel
kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
# @todo: other subkernels.

return kernel

@@ -291,7 +234,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
return kernel


def _subtree_kernel_do_nl(self, Gn):
def _subtree_kernel_do_nl(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with node labels.

Parameters
@@ -301,10 +244,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.

Return
------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix / float
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -324,7 +268,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label.append(dict(Counter(labels_ori)))

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)

# iterate each height
for h in range(1, self.height + 1):
@@ -342,12 +286,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)

return gram_matrix
return kernel_matrix


def _subtree_kernel_do_el(self, Gn):
def _subtree_kernel_do_el(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with edge labels.

Parameters
@@ -357,19 +301,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.

Return
------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
for i, j in iterator:
gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
gram_matrix[j][i] = gram_matrix[i][j]
iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
for i, j in iterator: # @todo: not correct if return_mat == False.
kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
kernel_matrix[j][i] = kernel_matrix[i][j]


# if h >= 1.
@@ -393,7 +338,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


# Iterate along heights (>= 2).
@@ -407,12 +352,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)

return gram_matrix
return kernel_matrix


def _subtree_kernel_do_labeled(self, Gn):
def _subtree_kernel_do_labeled(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with both node and
edge labels.

@@ -423,10 +368,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.

Return
------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -446,10 +392,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label.append(dict(Counter(labels_ori)))

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


# if h >= 1.
# if h >= 1:
if self.height > 0:
# Set all edge labels into a tuple. # @todo: remove this original labels or not?
if self.verbose >= 2:
@@ -470,7 +416,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


# Iterate along heights.
@@ -484,12 +430,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)

return gram_matrix
return kernel_matrix


def _subtree_kernel_do_unlabeled(self, Gn):
def _subtree_kernel_do_unlabeled(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs without labels.

Parameters
@@ -499,19 +445,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.

Return
------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
"""
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
for i, j in iterator:
gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
gram_matrix[j][i] = gram_matrix[i][j]
iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
for i, j in iterator: # @todo: not correct if return_mat == False.
kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
kernel_matrix[j][i] = kernel_matrix[i][j]


# if h >= 1.
@@ -526,7 +473,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


# Iterate along heights (>= 2).
@@ -540,9 +487,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

# Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)

return gram_matrix
return kernel_matrix


def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
@@ -717,6 +664,8 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label[j])
gram_matrix[j][i] = gram_matrix[i][j]

return gram_matrix


def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2):
"""Compute the subtree kernel.


+ 5
- 0
gklearn/utils/kernels.py View File

@@ -68,6 +68,11 @@ def gaussian_kernel(x, y, gamma=None):
return np.exp((np.sum(np.subtract(x, y) ** 2)) * -gamma)


def tanimoto_kernel(x, y):
xy = np.dot(x, y)
return xy / (np.dot(x, x) + np.dot(y, y) - xy)


def gaussiankernel(x, y, gamma=None):
return gaussian_kernel(x, y, gamma=gamma)



Loading…
Cancel
Save