Browse Source

[Features][API Changes] Update kernel classes.

v0.2.x
jajupmochi 3 years ago
parent
commit
a76335ed16
4 changed files with 135 additions and 150 deletions
  1. +39
    -20
      gklearn/kernels/graph_kernel.py
  2. +39
    -27
      gklearn/kernels/treelet.py
  3. +52
    -103
      gklearn/kernels/weisfeiler_lehman.py
  4. +5
    -0
      gklearn/utils/kernels.py

+ 39
- 20
gklearn/kernels/graph_kernel.py View File

@@ -32,7 +32,13 @@ class GraphKernel(BaseEstimator): #, ABC):
https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel. https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
""" """


def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2):
def __init__(self,
parallel=None,
n_jobs=None,
chunksize=None,
normalize=True,
copy_graphs=True, # make sure it is a full deep copy. and faster!
verbose=2):
"""`__init__` for `GraphKernel` object.""" """`__init__` for `GraphKernel` object."""
# @todo: the default settings of the parameters are different from those in the self.compute method. # @todo: the default settings of the parameters are different from those in the self.compute method.
# self._graphs = None # self._graphs = None
@@ -40,6 +46,7 @@ class GraphKernel(BaseEstimator): #, ABC):
self.n_jobs = n_jobs self.n_jobs = n_jobs
self.chunksize = chunksize self.chunksize = chunksize
self.normalize = normalize self.normalize = normalize
self.copy_graphs = copy_graphs
self.verbose = verbose self.verbose = verbose
# self._run_time = 0 # self._run_time = 0
# self._gram_matrix = None # self._gram_matrix = None
@@ -90,7 +97,7 @@ class GraphKernel(BaseEstimator): #, ABC):
return self return self




def transform(self, X):
def transform(self, X=None, load_gm_train=False):
"""Compute the graph kernel matrix between given and fitted data. """Compute the graph kernel matrix between given and fitted data.


Parameters Parameters
@@ -108,6 +115,12 @@ class GraphKernel(BaseEstimator): #, ABC):
None. None.


""" """
# If `load_gm_train`, load Gram matrix of training data.
if load_gm_train:
check_is_fitted(self, '_gm_train')
self._is_transformed = True
return self._gm_train # @todo: copy or not?

# Check if method "fit" had been called. # Check if method "fit" had been called.
check_is_fitted(self, '_graphs') check_is_fitted(self, '_graphs')


@@ -133,8 +146,7 @@ class GraphKernel(BaseEstimator): #, ABC):
return kernel_matrix return kernel_matrix





def fit_transform(self, X):
def fit_transform(self, X, save_gm_train=False):
"""Fit and transform: compute Gram matrix on the same data. """Fit and transform: compute Gram matrix on the same data.


Parameters Parameters
@@ -164,6 +176,9 @@ class GraphKernel(BaseEstimator): #, ABC):
finally: finally:
np.seterr(**old_settings) np.seterr(**old_settings)


if save_gm_train:
self._gm_train = gram_matrix

return gram_matrix return gram_matrix




@@ -260,7 +275,9 @@ class GraphKernel(BaseEstimator): #, ABC):
kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y) kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y)


elif self.parallel is None: elif self.parallel is None:
kernel_matrix = self._compute_kernel_matrix_series(Y)
Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
kernel_matrix = self._compute_kernel_matrix_series(Y_copy, graphs_copy)


self._run_time = time.time() - start_time self._run_time = time.time() - start_time
if self.verbose: if self.verbose:
@@ -270,26 +287,25 @@ class GraphKernel(BaseEstimator): #, ABC):
return kernel_matrix return kernel_matrix




def _compute_kernel_matrix_series(self, Y):
"""Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization.
def _compute_kernel_matrix_series(self, X, Y):
"""Compute the kernel matrix between two sets of graphs (X and Y) without parallelization.


Parameters Parameters
---------- ----------
Y : list of graphs, optional
The target graphs.
X, Y : list of graphs
The input graphs.


Returns Returns
------- -------
kernel_matrix : numpy array, shape = [n_targets, n_inputs]
kernel_matrix : numpy array, shape = [n_X, n_Y]
The computed kernel matrix. The computed kernel matrix.


""" """
kernel_matrix = np.zeros((len(Y), len(self._graphs)))
kernel_matrix = np.zeros((len(X), len(Y)))


for i_y, g_y in enumerate(Y):
for i_x, g_x in enumerate(self._graphs):
kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x)
for i_x, g_x in enumerate(X):
for i_y, g_y in enumerate(Y):
kernel_matrix[i_x, i_y] = self.pairwise_kernel(g_x, g_y)


return kernel_matrix return kernel_matrix


@@ -335,14 +351,16 @@ class GraphKernel(BaseEstimator): #, ABC):
except NotFittedError: except NotFittedError:
# Compute diagonals of X. # Compute diagonals of X.
self._X_diag = np.empty(shape=(len(self._graphs),)) self._X_diag = np.empty(shape=(len(self._graphs),))
for i, x in enumerate(self._graphs):
graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
for i, x in enumerate(graphs):
self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel? self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?


try: try:
# If transform has happened, return both diagonals. # If transform has happened, return both diagonals.
check_is_fitted(self, ['_Y']) check_is_fitted(self, ['_Y'])
self._Y_diag = np.empty(shape=(len(self._Y),)) self._Y_diag = np.empty(shape=(len(self._Y),))
for (i, y) in enumerate(self._Y):
Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
for (i, y) in enumerate(Y):
self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel? self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?


return self._X_diag, self._Y_diag return self._X_diag, self._Y_diag
@@ -484,7 +502,8 @@ class GraphKernel(BaseEstimator): #, ABC):
if self.parallel == 'imap_unordered': if self.parallel == 'imap_unordered':
gram_matrix = self._compute_gm_imap_unordered() gram_matrix = self._compute_gm_imap_unordered()
elif self.parallel is None: elif self.parallel is None:
gram_matrix = self._compute_gm_series()
graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
gram_matrix = self._compute_gm_series(graphs)
else: else:
raise Exception('Parallel mode is not set correctly.') raise Exception('Parallel mode is not set correctly.')


@@ -496,11 +515,11 @@ class GraphKernel(BaseEstimator): #, ABC):
return gram_matrix return gram_matrix




def _compute_gm_series(self):
def _compute_gm_series(self, graphs):
pass pass




def _compute_gm_imap_unordered(self):
def _compute_gm_imap_unordered(self, graphs):
pass pass






+ 39
- 27
gklearn/kernels/treelet.py View File

@@ -28,16 +28,16 @@ from gklearn.kernels import GraphKernel


class Treelet(GraphKernel): class Treelet(GraphKernel):


def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs):
def __init__(self, **kwargs):
"""Initialise a treelet kernel. """Initialise a treelet kernel.
""" """
super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose)
GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
self.node_labels = kwargs.get('node_labels', []) self.node_labels = kwargs.get('node_labels', [])
self.edge_labels = kwargs.get('edge_labels', []) self.edge_labels = kwargs.get('edge_labels', [])
self.sub_kernel = kwargs.get('sub_kernel', None) self.sub_kernel = kwargs.get('sub_kernel', None)
self.ds_infos = kwargs.get('ds_infos', {}) self.ds_infos = kwargs.get('ds_infos', {})
self.precompute_canonkeys = precompute_canonkeys
self.save_canonkeys = save_canonkeys
self.precompute_canonkeys = kwargs.get('precompute_canonkeys', True)
self.save_canonkeys = kwargs.get('save_canonkeys', True)




########################################################################## ##########################################################################
@@ -71,7 +71,7 @@ class Treelet(GraphKernel):
raise ValueError('Sub-kernel not set.') raise ValueError('Sub-kernel not set.')




def _compute_kernel_matrix_series(self, Y):
def _compute_kernel_matrix_series(self, Y, X=None, load_canonkeys=True):
"""Compute the kernel matrix between a given target graphs (Y) and """Compute the kernel matrix between a given target graphs (Y) and
the fitted graphs (X / self._graphs) without parallelization. the fitted graphs (X / self._graphs) without parallelization.


@@ -86,36 +86,45 @@ class Treelet(GraphKernel):
The computed kernel matrix. The computed kernel matrix.


""" """
if_comp_X_canonkeys = True

# if load saved canonkeys of X from the instance:
if load_canonkeys:
# Canonical keys for self._graphs.
try:
check_is_fitted(self, ['_canonkeys'])
canonkeys_list1 = self._canonkeys
if_comp_X_canonkeys = False
except NotFittedError:
import warnings
warnings.warn('The canonkeys of self._graphs are not computed/saved. The keys of `X` is computed instead.')
if_comp_X_canonkeys = True


# self._add_dummy_labels will modify the input in place.
self._add_dummy_labels() # For self._graphs
# Y = [g.copy() for g in Y] # @todo: ?
self._add_dummy_labels(Y)


# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.


# Canonical keys for self._graphs.
try:
check_is_fitted(self, ['_canonkeys'])
canonkeys_list1 = self._canonkeys
except NotFittedError:
# Compute the canonical keys of X.
if if_comp_X_canonkeys:
if X is None:
raise('X can not be None.')
# self._add_dummy_labels will modify the input in place.
self._add_dummy_labels(X) # for X
canonkeys_list1 = [] canonkeys_list1 = []
iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
iterator = get_iters(self._graphs, desc='Getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator: for g in iterator:
canonkeys_list1.append(self._get_canonkeys(g)) canonkeys_list1.append(self._get_canonkeys(g))


if self.save_canonkeys:
self._canonkeys = canonkeys_list1

# Canonical keys for Y. # Canonical keys for Y.
# Y = [g.copy() for g in Y] # @todo: ?
self._add_dummy_labels(Y)
canonkeys_list2 = [] canonkeys_list2 = []
iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
iterator = get_iters(Y, desc='Getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
for g in iterator: for g in iterator:
canonkeys_list2.append(self._get_canonkeys(g)) canonkeys_list2.append(self._get_canonkeys(g))


if self.save_canonkeys:
self._Y_canonkeys = canonkeys_list2
# if self.save_canonkeys:
# self._Y_canonkeys = canonkeys_list2


# compute kernel matrix. # compute kernel matrix.
kernel_matrix = np.zeros((len(Y), len(canonkeys_list1))) kernel_matrix = np.zeros((len(Y), len(canonkeys_list1)))
@@ -235,13 +244,13 @@ class Treelet(GraphKernel):
########################################################################## ##########################################################################




def _compute_gm_series(self):
self._add_dummy_labels(self._graphs)
def _compute_gm_series(self, graphs):
self._add_dummy_labels(graphs)


# get all canonical keys of all graphs before computing kernels to save # get all canonical keys of all graphs before computing kernels to save
# time, but this may cost a lot of memory for large dataset. # time, but this may cost a lot of memory for large dataset.
canonkeys = [] canonkeys = []
iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout,
iterator = get_iters(graphs, desc='getting canonkeys', file=sys.stdout,
verbose=(self.verbose >= 2)) verbose=(self.verbose >= 2))
for g in iterator: for g in iterator:
canonkeys.append(self._get_canonkeys(g)) canonkeys.append(self._get_canonkeys(g))
@@ -250,11 +259,11 @@ class Treelet(GraphKernel):
self._canonkeys = canonkeys self._canonkeys = canonkeys


# compute Gram matrix. # compute Gram matrix.
gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
gram_matrix = np.zeros((len(graphs), len(graphs)))


from itertools import combinations_with_replacement from itertools import combinations_with_replacement
itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
itr = combinations_with_replacement(range(0, len(graphs)), 2)
len_itr = int(len(graphs) * (len(graphs) + 1) / 2)
iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout, iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
length=len_itr, verbose=(self.verbose >= 2)) length=len_itr, verbose=(self.verbose >= 2))
for i, j in iterator: for i, j in iterator:
@@ -390,6 +399,9 @@ class Treelet(GraphKernel):
Treelet kernel between 2 graphs. Treelet kernel between 2 graphs.
""" """
keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
if len(keys) == 0: # There is nothing in common...
return 0

vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys]) vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys]) vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])




+ 52
- 103
gklearn/kernels/weisfeiler_lehman.py View File

@@ -28,7 +28,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.




def __init__(self, **kwargs): def __init__(self, **kwargs):
GraphKernel.__init__(self)
GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
self.node_labels = kwargs.get('node_labels', []) self.node_labels = kwargs.get('node_labels', [])
self.edge_labels = kwargs.get('edge_labels', []) self.edge_labels = kwargs.get('edge_labels', [])
self.height = int(kwargs.get('height', 0)) self.height = int(kwargs.get('height', 0))
@@ -50,7 +50,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
########################################################################## ##########################################################################




def _compute_gm_series(self):
def _compute_gm_series(self, graphs):
# if self.verbose >= 2: # if self.verbose >= 2:
# import warnings # import warnings
# warnings.warn('A part of the computation is parallelized.') # warnings.warn('A part of the computation is parallelized.')
@@ -59,19 +59,19 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


# for WL subtree kernel # for WL subtree kernel
if self._base_kernel == 'subtree': if self._base_kernel == 'subtree':
gram_matrix = self._subtree_kernel_do(self._graphs)
gram_matrix = self._subtree_kernel_do(graphs)


# for WL shortest path kernel # for WL shortest path kernel
elif self._base_kernel == 'sp': elif self._base_kernel == 'sp':
gram_matrix = self._sp_kernel_do(self._graphs)
gram_matrix = self._sp_kernel_do(graphs)


# for WL edge kernel # for WL edge kernel
elif self._base_kernel == 'edge': elif self._base_kernel == 'edge':
gram_matrix = self._edge_kernel_do(self._graphs)
gram_matrix = self._edge_kernel_do(graphs)


# for user defined base kernel # for user defined base kernel
else: else:
gram_matrix = self._user_kernel_do(self._graphs)
gram_matrix = self._user_kernel_do(graphs)


return gram_matrix return gram_matrix


@@ -204,70 +204,13 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.




def pairwise_kernel(self, g1, g2): def pairwise_kernel(self, g1, g2):
Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
kernel = 0

# initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

# for each graph
for G in Gn:
# set all labels into a tuple.
for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
# get the set of original labels
labels_ori = list(nx.get_node_attributes(G, 'lt').values())
# number of occurence of each label in G
all_num_of_each_label.append(dict(Counter(labels_ori)))

# Compute subtree kernel with the 0th iteration and add it to the final kernel.
kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)

# iterate each height
for h in range(1, self.height + 1):
all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
# all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
all_num_of_each_label = [] # number of occurence of each label in G

# @todo: parallel this part.
for G in Gn:

all_multisets = []
for node, attrs in G.nodes(data=True):
# Multiset-label determination.
multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]]
# sorting each multiset
multiset.sort()
multiset = [attrs['lt']] + multiset # add the prefix
all_multisets.append(tuple(multiset))

# label compression
set_unique = list(set(all_multisets)) # set of unique multiset labels
# a dictionary mapping original labels to new ones.
set_compressed = {}
# if a label occured before, assign its former compressed label,
# else assign the number of labels occured + 1 as the compressed label.
for value in set_unique:
if value in all_set_compressed.keys():
set_compressed[value] = all_set_compressed[value]
else:
set_compressed[value] = str(num_of_labels_occured + 1)
num_of_labels_occured += 1

all_set_compressed.update(set_compressed)

# relabel nodes
for idx, node in enumerate(G.nodes()):
G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

# get the set of compressed labels
labels_comp = list(nx.get_node_attributes(G, 'lt').values())
# all_labels_ori.update(labels_comp)
all_num_of_each_label.append(dict(Counter(labels_comp)))
# Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
Gn = [g1, g2]
# for WL subtree kernel
if self._base_kernel == 'subtree':
kernel = self._subtree_kernel_do(Gn, return_mat=False)


# Compute subtree kernel with h iterations and add it to the final kernel
kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
# @todo: other subkernels.


return kernel return kernel


@@ -291,7 +234,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
return kernel return kernel




def _subtree_kernel_do_nl(self, Gn):
def _subtree_kernel_do_nl(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with node labels. """Compute Weisfeiler-Lehman kernels between graphs with node labels.


Parameters Parameters
@@ -301,10 +244,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


Return Return
------ ------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix / float
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
""" """
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)


# initial for height = 0 # initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -324,7 +268,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))


# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


# iterate each height # iterate each height
for h in range(1, self.height + 1): for h in range(1, self.height + 1):
@@ -342,12 +286,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel # Compute subtree kernel with h iterations and add it to the final kernel
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


return gram_matrix
return kernel_matrix




def _subtree_kernel_do_el(self, Gn):
def _subtree_kernel_do_el(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with edge labels. """Compute Weisfeiler-Lehman kernels between graphs with edge labels.


Parameters Parameters
@@ -357,19 +301,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


Return Return
------ ------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
""" """
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)


# initial for height = 0 # initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration


# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
for i, j in iterator:
gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
gram_matrix[j][i] = gram_matrix[i][j]
iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
for i, j in iterator: # @todo: not correct if return_mat == False.
kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
kernel_matrix[j][i] = kernel_matrix[i][j]




# if h >= 1. # if h >= 1.
@@ -393,7 +338,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)




# Iterate along heights (>= 2). # Iterate along heights (>= 2).
@@ -407,12 +352,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


return gram_matrix
return kernel_matrix




def _subtree_kernel_do_labeled(self, Gn):
def _subtree_kernel_do_labeled(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs with both node and """Compute Weisfeiler-Lehman kernels between graphs with both node and
edge labels. edge labels.


@@ -423,10 +368,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


Return Return
------ ------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
""" """
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)


# initial for height = 0 # initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -446,10 +392,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label.append(dict(Counter(labels_ori))) all_num_of_each_label.append(dict(Counter(labels_ori)))


# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)




# if h >= 1.
# if h >= 1:
if self.height > 0: if self.height > 0:
# Set all edge labels into a tuple. # @todo: remove this original labels or not? # Set all edge labels into a tuple. # @todo: remove this original labels or not?
if self.verbose >= 2: if self.verbose >= 2:
@@ -470,7 +416,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)




# Iterate along heights. # Iterate along heights.
@@ -484,12 +430,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


return gram_matrix
return kernel_matrix




def _subtree_kernel_do_unlabeled(self, Gn):
def _subtree_kernel_do_unlabeled(self, Gn, return_mat=True):
"""Compute Weisfeiler-Lehman kernels between graphs without labels. """Compute Weisfeiler-Lehman kernels between graphs without labels.


Parameters Parameters
@@ -499,19 +445,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


Return Return
------ ------
gram_matrix : Numpy matrix
kernel_matrix : Numpy matrix
Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs. Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
""" """
gram_matrix = np.zeros((len(Gn), len(Gn)))
kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)


# initial for height = 0 # initial for height = 0
all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration


# Compute subtree kernel with the 0th iteration and add it to the final kernel. # Compute subtree kernel with the 0th iteration and add it to the final kernel.
iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
for i, j in iterator:
gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
gram_matrix[j][i] = gram_matrix[i][j]
iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
for i, j in iterator: # @todo: not correct if return_mat == False.
kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
kernel_matrix[j][i] = kernel_matrix[i][j]




# if h >= 1. # if h >= 1.
@@ -526,7 +473,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)




# Iterate along heights (>= 2). # Iterate along heights (>= 2).
@@ -540,9 +487,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured) num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)


# Compute subtree kernel with h iterations and add it to the final kernel. # Compute subtree kernel with h iterations and add it to the final kernel.
self._compute_gram_itr(gram_matrix, all_num_of_each_label)
kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)


return gram_matrix
return kernel_matrix




def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured): def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
@@ -717,6 +664,8 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
all_num_of_each_label[j]) all_num_of_each_label[j])
gram_matrix[j][i] = gram_matrix[i][j] gram_matrix[j][i] = gram_matrix[i][j]


return gram_matrix



def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2): def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2):
"""Compute the subtree kernel. """Compute the subtree kernel.


+ 5
- 0
gklearn/utils/kernels.py View File

@@ -68,6 +68,11 @@ def gaussian_kernel(x, y, gamma=None):
return np.exp((np.sum(np.subtract(x, y) ** 2)) * -gamma) return np.exp((np.sum(np.subtract(x, y) ** 2)) * -gamma)




def tanimoto_kernel(x, y):
xy = np.dot(x, y)
return xy / (np.dot(x, x) + np.dot(y, y) - xy)


def gaussiankernel(x, y, gamma=None): def gaussiankernel(x, y, gamma=None):
return gaussian_kernel(x, y, gamma=gamma) return gaussian_kernel(x, y, gamma=gamma)




Loading…
Cancel
Save