From e84bc768669e1a66c59ed17f98e929a59c87dbc0 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Sat, 26 Dec 2020 11:19:31 +0100
Subject: [PATCH] Add parallelization to WL subtree kernel.

---
 gklearn/kernels/weisfeiler_lehman.py | 462 ++++++++++++++++++++++-------------
 gklearn/tests/test_graph_kernels.py  |   1 +
 2 files changed, 292 insertions(+), 171 deletions(-)

diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py
index 8b36b37..da14ff8 100644
--- a/gklearn/kernels/weisfeiler_lehman.py
+++ b/gklearn/kernels/weisfeiler_lehman.py
@@ -7,22 +7,22 @@ Created on Tue Apr 14 15:16:34 2020
 
 @references:
 
-	[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM. 
-	Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 
+	[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM.
+	Weisfeiler-lehman graph kernels. Journal of Machine Learning Research.
 	2011;12(Sep):2539-61.
 """
 
 import numpy as np
 import networkx as nx
 from collections import Counter
-from functools import partial
+# from functools import partial
 from gklearn.utils import SpecialLabel
-from gklearn.utils.parallel import parallel_gm
+from gklearn.utils.parallel import parallel_gm, parallel_me
 from gklearn.kernels import GraphKernel
 
 
-class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge user kernel.
-	
+class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
+
 	def __init__(self, **kwargs):
 		GraphKernel.__init__(self)
 		self._node_labels = kwargs.get('node_labels', [])
@@ -33,115 +33,235 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
 
 
 	def _compute_gm_series(self):
-		if self._verbose >= 2:
-			import warnings
-			warnings.warn('A part of the computation is parallelized.')
-			
+# 		if self._verbose >= 2:
+# 			import warnings
+# 			warnings.warn('A part of the computation is parallelized.')
+
 		self._add_dummy_node_labels(self._graphs)
-		
+
 		# for WL subtree kernel
-		if self._base_kernel == 'subtree':		   
+		if self._base_kernel == 'subtree':
 			gram_matrix = self._subtree_kernel_do(self._graphs)
-	
+
 		# for WL shortest path kernel
 		elif self._base_kernel == 'sp':
 			gram_matrix = self._sp_kernel_do(self._graphs)
-	
+
 		# for WL edge kernel
 		elif self._base_kernel == 'edge':
 			gram_matrix = self._edge_kernel_do(self._graphs)
-	
+
 		# for user defined base kernel
 		else:
 			gram_matrix = self._user_kernel_do(self._graphs)
-				
+
 		return gram_matrix
-			
-			
+
+
 	def _compute_gm_imap_unordered(self):
-		if self._verbose >= 2:
-			import warnings
-			warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.')
-		return self._compute_gm_series()
-	
-	
+		self._add_dummy_node_labels(self._graphs)
+
+		if self._base_kernel == 'subtree':
+			gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
+
+			def init_worker(gn_toshare):
+				global G_gn
+				G_gn = gn_toshare
+			do_fun = self._wrapper_pairwise
+			parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
+			  glbv=(self._graphs,), n_jobs=self._n_jobs, verbose=self._verbose)
+			return gram_matrix
+		else:
+			if self._verbose >= 2:
+				import warnings
+				warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
+			return self._compute_gm_series()
+
+
 	def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
-		if self._verbose >= 2:
-			import warnings
-			warnings.warn('A part of the computation is parallelized.')
-			
+# 		if self._verbose >= 2:
+# 			import warnings
+# 			warnings.warn('A part of the computation is parallelized.')
+
 		self._add_dummy_node_labels(g_list + [g1])
-				
+
 		# for WL subtree kernel
-		if self._base_kernel == 'subtree':		   
+		if self._base_kernel == 'subtree':
 			gram_matrix = self._subtree_kernel_do(g_list + [g1])
-	
+
 		# for WL shortest path kernel
 		elif self._base_kernel == 'sp':
 			gram_matrix = self._sp_kernel_do(g_list + [g1])
-	
+
 		# for WL edge kernel
 		elif self._base_kernel == 'edge':
 			gram_matrix = self._edge_kernel_do(g_list + [g1])
-	
+
 		# for user defined base kernel
 		else:
 			gram_matrix = self._user_kernel_do(g_list + [g1])
-				
+
 		return list(gram_matrix[-1][0:-1])
-	
-	
+
+
 	def _compute_kernel_list_imap_unordered(self, g1, g_list):
-		if self._verbose >= 2:
-			import warnings
-			warnings.warn('Only a part of the computation is parallelized due to the structure of this kernel.')
-		return self._compute_kernel_list_series(g1, g_list)
-	
-	
+		self._add_dummy_node_labels(g_list + [g1])
+
+		if self._base_kernel == 'subtree':
+			kernel_list = [None] * len(g_list)
+
+			def init_worker(g1_toshare, g_list_toshare):
+				global G_g1, G_g_list
+				G_g1 = g1_toshare
+				G_g_list = g_list_toshare
+			do_fun = self._wrapper_kernel_list_do
+			def func_assign(result, var_to_assign):
+				var_to_assign[result[0]] = result[1]
+			itr = range(len(g_list))
+			len_itr = len(g_list)
+			parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
+				init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
+				n_jobs=self._n_jobs, itr_desc='Computing kernels', verbose=self._verbose)
+			return kernel_list
+		else:
+			if self._verbose >= 2:
+				import warnings
+				warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
+			return self._compute_kernel_list_series(g1, g_list)
+
+
 	def _wrapper_kernel_list_do(self, itr):
-		pass
-	
-	
+		return self._kernel_do_exp(G_g1, G_g_list[itr])
+
+
 	def _compute_single_kernel_series(self, g1, g2):  # @todo: this should be better.
 		self._add_dummy_node_labels([g1] + [g2])
 
 		# for WL subtree kernel
-		if self._base_kernel == 'subtree':		   
+		if self._base_kernel == 'subtree':
 			gram_matrix = self._subtree_kernel_do([g1] + [g2])
-	
+
 		# for WL shortest path kernel
 		elif self._base_kernel == 'sp':
 			gram_matrix = self._sp_kernel_do([g1] + [g2])
-	
+
 		# for WL edge kernel
 		elif self._base_kernel == 'edge':
 			gram_matrix = self._edge_kernel_do([g1] + [g2])
-	
+
 		# for user defined base kernel
 		else:
 			gram_matrix = self._user_kernel_do([g1] + [g2])
-				
+
 		return gram_matrix[0][1]
-	
-	
+
+
+	def pairwise_kernel(self, g1, g2):
+		Gn = [g1, g2]
+		kernel = 0
+
+		# initial for height = 0
+		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
+
+		# for each graph
+		for G in Gn:
+			# set all labels into a tuple.
+			for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
+				G.nodes[nd]['label_tuple'] = tuple(attrs[name] for name in self._node_labels)
+			# get the set of original labels
+			labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
+			# number of occurence of each label in G
+			all_num_of_each_label.append(dict(Counter(labels_ori)))
+
+		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
+		kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
+
+		# iterate each height
+		for h in range(1, self._height + 1):
+			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
+			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
+	#		all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
+			all_num_of_each_label = [] # number of occurence of each label in G
+
+			# @todo: parallel this part.
+			for idx, G in enumerate(Gn):
+
+				all_multisets = []
+				for node, attrs in G.nodes(data=True):
+					# Multiset-label determination.
+					multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
+					# sorting each multiset
+					multiset.sort()
+					multiset = [attrs['label_tuple']] + multiset # add the prefix
+					all_multisets.append(tuple(multiset))
+
+				# label compression
+				set_unique = list(set(all_multisets)) # set of unique multiset labels
+				# a dictionary mapping original labels to new ones.
+				set_compressed = {}
+				# if a label occured before, assign its former compressed label,
+				# else assign the number of labels occured + 1 as the compressed label.
+				for value in set_unique:
+					if value in all_set_compressed.keys():
+						set_compressed.update({value: all_set_compressed[value]})
+					else:
+						set_compressed.update({value: str(num_of_labels_occured + 1)})
+						num_of_labels_occured += 1
+
+				all_set_compressed.update(set_compressed)
+
+				# relabel nodes
+				for idx, node in enumerate(G.nodes()):
+					G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
+
+				# get the set of compressed labels
+				labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
+	#			all_labels_ori.update(labels_comp)
+				all_num_of_each_label.append(dict(Counter(labels_comp)))
+
+			# Compute subtree kernel with h iterations and add it to the final kernel
+			kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
+
+		return kernel
+
+
+	def _wrapper_pairwise(self, itr):
+		i = itr[0]
+		j = itr[1]
+		return i, j, self.pairwise_kernel(G_gn[i], G_gn[j])
+
+
+	def _compute_kernel_itr(kernel, all_num_of_each_label):
+		labels = set(list(all_num_of_each_label[0].keys()) +
+			   list(all_num_of_each_label[1].keys()))
+		vector1 = np.array([(all_num_of_each_label[0][label]
+							if (label in all_num_of_each_label[0].keys()) else 0)
+							for label in labels])
+		vector2 = np.array([(all_num_of_each_label[1][label]
+							if (label in all_num_of_each_label[1].keys()) else 0)
+							for label in labels])
+		kernel += np.dot(vector1, vector2)
+		return kernel
+
+
 	def _subtree_kernel_do(self, Gn):
 		"""Compute Weisfeiler-Lehman kernels between graphs.
-	
+
 		Parameters
 		----------
 		Gn : List of NetworkX graph
-			List of graphs between which the kernels are computed.	   
-	
+			List of graphs between which the kernels are computed.
+
 		Return
 		------
 		gram_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
 		"""
 		gram_matrix = np.zeros((len(Gn), len(Gn)))
-	
+
 		# initial for height = 0
 		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
-	
+
 		# for each graph
 		for G in Gn:
 			# set all labels into a tuple.
@@ -151,112 +271,112 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
 			labels_ori = list(nx.get_node_attributes(G, 'label_tuple').values())
 			# number of occurence of each label in G
 			all_num_of_each_label.append(dict(Counter(labels_ori)))
-	
+
 		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn)
-	
+		self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+
 		# iterate each height
 		for h in range(1, self._height + 1):
 			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
 			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
 	#		all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
 			all_num_of_each_label = [] # number of occurence of each label in G
-	
+
 			# @todo: parallel this part.
 			for idx, G in enumerate(Gn):
-	
+
 				all_multisets = []
 				for node, attrs in G.nodes(data=True):
 					# Multiset-label determination.
 					multiset = [G.nodes[neighbors]['label_tuple'] for neighbors in G[node]]
 					# sorting each multiset
 					multiset.sort()
-					multiset = [attrs['label_tuple']] + multiset # add the prefix 
+					multiset = [attrs['label_tuple']] + multiset # add the prefix
 					all_multisets.append(tuple(multiset))
-	
+
 				# label compression
 				set_unique = list(set(all_multisets)) # set of unique multiset labels
-				# a dictionary mapping original labels to new ones. 
+				# a dictionary mapping original labels to new ones.
 				set_compressed = {}
-				# if a label occured before, assign its former compressed label, 
-				# else assign the number of labels occured + 1 as the compressed label. 
+				# if a label occured before, assign its former compressed label,
+				# else assign the number of labels occured + 1 as the compressed label.
 				for value in set_unique:
 					if value in all_set_compressed.keys():
 						set_compressed.update({value: all_set_compressed[value]})
 					else:
 						set_compressed.update({value: str(num_of_labels_occured + 1)})
 						num_of_labels_occured += 1
-	
+
 				all_set_compressed.update(set_compressed)
-	
+
 				# relabel nodes
 				for idx, node in enumerate(G.nodes()):
 					G.nodes[node]['label_tuple'] = set_compressed[all_multisets[idx]]
-	
+
 				# get the set of compressed labels
 				labels_comp = list(nx.get_node_attributes(G, 'label_tuple').values())
 	#			all_labels_ori.update(labels_comp)
 				all_num_of_each_label.append(dict(Counter(labels_comp)))
-	
+
 			# Compute subtree kernel with h iterations and add it to the final kernel
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label, Gn)
-	
+			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+
 		return gram_matrix
 
-	
-	def _compute_gram_itr(self, gram_matrix, all_num_of_each_label, Gn):
+
+	def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
 		"""Compute Gram matrix using the base kernel.
 		"""
-		if self._parallel == 'imap_unordered':
-			# compute kernels.
-			def init_worker(alllabels_toshare):
-				global G_alllabels
-				G_alllabels = alllabels_toshare
-			do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
-			parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker, 
-						glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose)
-		elif self._parallel is None:
-			for i in range(len(gram_matrix)):
-				for j in range(i, len(gram_matrix)):
-					gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
-						   all_num_of_each_label[j], gram_matrix[i][j])
-					gram_matrix[j][i] = gram_matrix[i][j]
-					
-					
+# 		if self._parallel == 'imap_unordered':
+# 			# compute kernels.
+# 			def init_worker(alllabels_toshare):
+# 				global G_alllabels
+# 				G_alllabels = alllabels_toshare
+# 			do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
+# 			parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
+# 						glbv=(all_num_of_each_label,), n_jobs=self._n_jobs, verbose=self._verbose)
+# 		elif self._parallel is None:
+		for i in range(len(gram_matrix)):
+			for j in range(i, len(gram_matrix)):
+				gram_matrix[i][j] = self._compute_subtree_kernel(all_num_of_each_label[i],
+					   all_num_of_each_label[j], gram_matrix[i][j])
+				gram_matrix[j][i] = gram_matrix[i][j]
+
+
 	def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2, kernel):
 		"""Compute the subtree kernel.
 		"""
 		labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
-		vector1 = np.array([(num_of_each_label1[label] 
-							if (label in num_of_each_label1.keys()) else 0) 
+		vector1 = np.array([(num_of_each_label1[label]
+							if (label in num_of_each_label1.keys()) else 0)
 							for label in labels])
-		vector2 = np.array([(num_of_each_label2[label] 
-							if (label in num_of_each_label2.keys()) else 0) 
+		vector2 = np.array([(num_of_each_label2[label]
+							if (label in num_of_each_label2.keys()) else 0)
 							for label in labels])
 		kernel += np.dot(vector1, vector2)
 		return kernel
-	
-	
-	def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
-		i = itr[0]
-		j = itr[1]
-		return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
-				
-	
+
+
+# 	def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
+# 		i = itr[0]
+# 		j = itr[1]
+# 		return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])
+
+
 	def _wl_spkernel_do(Gn, node_label, edge_label, height):
 		"""Compute Weisfeiler-Lehman shortest path kernels between graphs.
-		
+
 		Parameters
 		----------
 		Gn : List of NetworkX graph
-			List of graphs between which the kernels are computed.	   
+			List of graphs between which the kernels are computed.
 		node_label : string
-			node attribute used as label.	  
+			node attribute used as label.
 		edge_label : string
-			edge attribute used as label.	   
+			edge attribute used as label.
 		height : int
 			subtree height.
-			
+
 		Return
 		------
 		gram_matrix : Numpy matrix
@@ -264,22 +384,22 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
 		"""
 		pass
 		from gklearn.utils.utils import getSPGraph
-		  
+
 		# init.
 		height = int(height)
 		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
-	
+
 		Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn
-		
+
 		# initial for height = 0
 		for i in range(0, len(Gn)):
 			for j in range(i, len(Gn)):
 				for e1 in Gn[i].edges(data = True):
-					for e2 in Gn[j].edges(data = True):		  
+					for e2 in Gn[j].edges(data = True):
 						if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
 							gram_matrix[i][j] += 1
 				gram_matrix[j][i] = gram_matrix[i][j]
-				
+
 		# iterate each height
 		for h in range(1, height + 1):
 			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
@@ -291,193 +411,193 @@ class WeisfeilerLehman(GraphKernel): # @todo: total parallelization and sp, edge
 					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
 					# sorting each multiset
 					multiset.sort()
-					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix 
-					set_multisets.append(multiset)		  
-	
+					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
+					set_multisets.append(multiset)
+
 				# label compression
 				set_unique = list(set(set_multisets)) # set of unique multiset labels
-				# a dictionary mapping original labels to new ones. 
+				# a dictionary mapping original labels to new ones.
 				set_compressed = {}
-				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label 
+				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
 				for value in set_unique:
 					if value in all_set_compressed.keys():
 						set_compressed.update({ value : all_set_compressed[value] })
 					else:
 						set_compressed.update({ value : str(num_of_labels_occured + 1) })
 						num_of_labels_occured += 1
-	
+
 				all_set_compressed.update(set_compressed)
-				
+
 				# relabel nodes
 				for node in G.nodes(data = True):
 					node[1][node_label] = set_compressed[set_multisets[node[0]]]
-					
+
 			# Compute subtree kernel with h iterations and add it to the final kernel
 			for i in range(0, len(Gn)):
 				for j in range(i, len(Gn)):
 					for e1 in Gn[i].edges(data = True):
-						for e2 in Gn[j].edges(data = True):		  
+						for e2 in Gn[j].edges(data = True):
 							if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
 								gram_matrix[i][j] += 1
 					gram_matrix[j][i] = gram_matrix[i][j]
-			
+
 		return gram_matrix
-	
-	
-	
+
+
+
 	def _wl_edgekernel_do(Gn, node_label, edge_label, height):
 		"""Compute Weisfeiler-Lehman edge kernels between graphs.
-		
+
 		Parameters
 		----------
 		Gn : List of NetworkX graph
-			List of graphs between which the kernels are computed.	   
+			List of graphs between which the kernels are computed.
 		node_label : string
-			node attribute used as label.	  
+			node attribute used as label.
 		edge_label : string
-			edge attribute used as label.	   
+			edge attribute used as label.
 		height : int
 			subtree height.
-			
+
 		Return
 		------
 		gram_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
-		"""	  
+		"""
 		pass
 		# init.
 		height = int(height)
 		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
-	  
+
 		# initial for height = 0
 		for i in range(0, len(Gn)):
 			for j in range(i, len(Gn)):
 				for e1 in Gn[i].edges(data = True):
-					for e2 in Gn[j].edges(data = True):		  
+					for e2 in Gn[j].edges(data = True):
 						if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
 							gram_matrix[i][j] += 1
 				gram_matrix[j][i] = gram_matrix[i][j]
-				
+
 		# iterate each height
 		for h in range(1, height + 1):
 			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
 			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
 			for G in Gn: # for each graph
-				set_multisets = []			
+				set_multisets = []
 				for node in G.nodes(data = True):
 					# Multiset-label determination.
 					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
 					# sorting each multiset
 					multiset.sort()
-					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix 
-					set_multisets.append(multiset)		  
-	
+					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
+					set_multisets.append(multiset)
+
 				# label compression
 				set_unique = list(set(set_multisets)) # set of unique multiset labels
-				# a dictionary mapping original labels to new ones. 
+				# a dictionary mapping original labels to new ones.
 				set_compressed = {}
-				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label 
+				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
 				for value in set_unique:
 					if value in all_set_compressed.keys():
 						set_compressed.update({ value : all_set_compressed[value] })
 					else:
 						set_compressed.update({ value : str(num_of_labels_occured + 1) })
 						num_of_labels_occured += 1
-	
+
 				all_set_compressed.update(set_compressed)
-				
+
 				# relabel nodes
 				for node in G.nodes(data = True):
 					node[1][node_label] = set_compressed[set_multisets[node[0]]]
-					
+
 			# Compute subtree kernel with h iterations and add it to the final kernel
 			for i in range(0, len(Gn)):
 				for j in range(i, len(Gn)):
 					for e1 in Gn[i].edges(data = True):
-						for e2 in Gn[j].edges(data = True):		  
+						for e2 in Gn[j].edges(data = True):
 							if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
 								gram_matrix[i][j] += 1
 					gram_matrix[j][i] = gram_matrix[i][j]
-			
+
 		return gram_matrix
-	
-	
+
+
 	def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
 		"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.
-		
+
 		Parameters
 		----------
 		Gn : List of NetworkX graph
-			List of graphs between which the kernels are computed.	   
+			List of graphs between which the kernels are computed.
 		node_label : string
-			node attribute used as label.	  
+			node attribute used as label.
 		edge_label : string
-			edge attribute used as label.	   
+			edge attribute used as label.
 		height : int
 			subtree height.
 		base_kernel : string
 			Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.
-			
+
 		Return
 		------
 		gram_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
-		"""	  
+		"""
 		pass
 		# init.
 		height = int(height)
 		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel
-	  
+
 		# initial for height = 0
 		gram_matrix = base_kernel(Gn, node_label, edge_label)
-				
+
 		# iterate each height
 		for h in range(1, height + 1):
 			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
 			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
 			for G in Gn: # for each graph
-				set_multisets = []		   
+				set_multisets = []
 				for node in G.nodes(data = True):
 					# Multiset-label determination.
 					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
 					# sorting each multiset
 					multiset.sort()
-					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix 
-					set_multisets.append(multiset)		  
-	
+					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
+					set_multisets.append(multiset)
+
 				# label compression
 				set_unique = list(set(set_multisets)) # set of unique multiset labels
-				# a dictionary mapping original labels to new ones. 
+				# a dictionary mapping original labels to new ones.
 				set_compressed = {}
-				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label 
+				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
 				for value in set_unique:
 					if value in all_set_compressed.keys():
 						set_compressed.update({ value : all_set_compressed[value] })
 					else:
 						set_compressed.update({ value : str(num_of_labels_occured + 1) })
 						num_of_labels_occured += 1
-	
+
 				all_set_compressed.update(set_compressed)
-				
+
 				# relabel nodes
 				for node in G.nodes(data = True):
 					node[1][node_label] = set_compressed[set_multisets[node[0]]]
-					
+
 			# Compute kernel with h iterations and add it to the final kernel
 			gram_matrix += base_kernel(Gn, node_label, edge_label)
-			
+
 		return gram_matrix
-	
-	
+
+
 	def _add_dummy_node_labels(self, Gn):
 		if len(self._node_labels) == 0 or (len(self._node_labels) == 1 and self._node_labels[0] == SpecialLabel.DUMMY):
 			for i in range(len(Gn)):
 				nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
 			self._node_labels = [SpecialLabel.DUMMY]
-			
-			
+
+
 class WLSubtree(WeisfeilerLehman):
-	
+
 	def __init__(self, **kwargs):
 		kwargs['base_kernel'] = 'subtree'
 		super().__init__(**kwargs)
\ No newline at end of file
diff --git a/gklearn/tests/test_graph_kernels.py b/gklearn/tests/test_graph_kernels.py
index 40f1d51..021b1cc 100644
--- a/gklearn/tests/test_graph_kernels.py
+++ b/gklearn/tests/test_graph_kernels.py
@@ -544,3 +544,4 @@ if __name__ == "__main__":
 #	test_RandomWalk('Acyclic', 'fp', None, None)
 #	test_RandomWalk('Acyclic', 'spectral', 'exp', 'imap_unordered')
 # 	test_CommonWalk('Alkane', 0.01, 'geo')
+# 	test_ShortestPath('Acyclic')