From dd58f602ecafb4caa5dea7a5709c2d645f57ae6b Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Thu, 2 Dec 2021 16:29:57 +0100
Subject: [PATCH 01/11] [Feature] Add kronecker_delta_kernel function between a
 pair of numpy vectors.

---
 gklearn/utils/kernels.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gklearn/utils/kernels.py b/gklearn/utils/kernels.py
index c500097..c35cc2f 100644
--- a/gklearn/utils/kernels.py
+++ b/gklearn/utils/kernels.py
@@ -4,7 +4,7 @@ These kernels are defined between pairs of vectors.
 import numpy as np
 
 
-def delta_kernel(x, y):
+def kronecker_delta_kernel(x, y):
 	"""Delta kernel. Return 1 if x == y, 0 otherwise.
 
 	Parameters
@@ -23,6 +23,10 @@ def delta_kernel(x, y):
 	labeled graphs. In Proceedings of the 20th International Conference on
 	Machine Learning, Washington, DC, United States, 2003.
 	"""
+	return (1 if np.array_equal(x, y) else 0)
+
+
+def delta_kernel(x, y):
 	return x == y  #(1 if condition else 0)
 
 

From 42acfd02364095f29e85a62d039f91bbe66a1100 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Thu, 2 Dec 2021 16:52:41 +0100
Subject: [PATCH 02/11] [Fix] Fix laplacian_kernel and cosine_kernel between
 pairs of numpy vectors.

---
 gklearn/utils/kernels.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gklearn/utils/kernels.py b/gklearn/utils/kernels.py
index c35cc2f..182668b 100644
--- a/gklearn/utils/kernels.py
+++ b/gklearn/utils/kernels.py
@@ -127,7 +127,7 @@ def linearkernel(x, y):
 
 
 def cosine_kernel(x, y):
-	return np.dot(x, y) / (np.abs(x) * np.abs(y))
+	return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))
 
 
 def sigmoid_kernel(x, y, gamma=None, coef0=1):
@@ -146,7 +146,7 @@ def laplacian_kernel(x, y, gamma=None):
 	if gamma is None:
 		gamma = 1.0 / len(x)
 
-	k = -gamma * np.abs(np.subtract(x, y))
+	k = -gamma * np.linalg.norm(np.subtract(x, y))
 	k = np.exp(k)
 	return k
 

From 45747641df470837b2e95dd866f93accfa922491 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Mon, 14 Feb 2022 20:20:19 +0100
Subject: [PATCH 03/11] [CI] Fix travis badge link.

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 1ff792a..2abcec7 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,6 @@
 # graphkit-learn
-[![Build Status](https://travis-ci.com/jajupmochi/graphkit-learn.svg?branch=master)](https://travis-ci.com/jajupmochi/graphkit-learn)
+
+[![Build Status](https://app.travis-ci.com/jajupmochi/graphkit-learn.svg?branch=master)](https://app.travis-ci.com/jajupmochi/graphkit-learn)
 [![Build status](https://ci.appveyor.com/api/projects/status/bdxsolk0t1uji9rd?svg=true)](https://ci.appveyor.com/project/jajupmochi/graphkit-learn)
 [![codecov](https://codecov.io/gh/jajupmochi/graphkit-learn/branch/master/graph/badge.svg)](https://codecov.io/gh/jajupmochi/graphkit-learn)
 [![Documentation Status](https://readthedocs.org/projects/graphkit-learn/badge/?version=master)](https://graphkit-learn.readthedocs.io/en/master/?badge=master)

From 08ee17d1538692b58606babebdcc12374f8b30bf Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Mon, 14 Feb 2022 20:23:41 +0100
Subject: [PATCH 04/11] [CI] Add tests on Python 3.9.

---
 .travis.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index b40cbcb..a569f8e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,8 @@ python:
 - '3.6'
 - '3.7'
 - '3.8'
+- '3.9'
+#- '3.10'
 
 before_install:
 - python --version

From 1946d469643d618d9c381354f25933af2da2aed0 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Mon, 14 Mar 2022 15:49:27 +0100
Subject: [PATCH 05/11] [Feature] Allow to permutate nodes in graphs when using
 bipartite to estimate GED. This feature is implementated in the  method in
 Python, which invokes GEDLIB in C++ by Cython.

---
 .../edit_costs.max_num_sols.ratios.bipartite.py    | 147 -------
 .../edit_costs.real_data.nums_sols.ratios.IPFP.py  |  11 +-
 ...t_costs.real_data.nums_sols.ratios.bipartite.py | 172 ++++++++
 gklearn/experiments/ged/stability/group_results.py |   1 +
 ..._costs.real_data.nums_sols.ratios.bipartite.py} |  27 +-
 gklearn/experiments/ged/stability/utils.py         |  18 +-
 gklearn/ged/util/util.py                           | 210 +++++++--
 gklearn/utils/utils.py                             | 471 ++++++++++++---------
 8 files changed, 665 insertions(+), 392 deletions(-)
 delete mode 100644 gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
 create mode 100644 gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
 rename gklearn/experiments/ged/stability/{run_job_edit_costs.max_nums_sols.ratios.bipartite.py => run_job_edit_costs.real_data.nums_sols.ratios.bipartite.py} (52%)

diff --git a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
deleted file mode 100644
index 1f01fd5..0000000
--- a/gklearn/experiments/ged/stability/edit_costs.max_num_sols.ratios.bipartite.py
+++ /dev/null
@@ -1,147 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Nov  2 16:17:01 2020
-
-@author: ljia
-"""	
-# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
-
-import os
-import multiprocessing
-import pickle
-import logging
-from gklearn.ged.util import compute_geds
-import time
-from utils import get_dataset
-import sys
-from group_results import group_trials
-
-
-def xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial):
-		
-	save_file_suffix = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
-	
-	# Return if the file exists.
-	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
-		return None, None
-
-	"""**2.  Set parameters.**"""
-
-	# Parameters for GED computation.
-	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
-  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
-				   'lsape_model': 'ECBP',  # 
-				   # ??when bigger than 1, then the method is considered mIPFP.
-				   # the actual number of computed solutions might be smaller than the specified value 
-				   'max_num_solutions': max_num_solutions,
-				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
-				   'greedy_method': 'BASIC',  # 
-				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
-				   'attr_distance': 'euclidean',
-				   'optimal': True, # if TRUE, the option --greedy-method has no effect 
-				   # parallel threads. Do not work if mpg_options['parallel'] = False.
-				   'threads': multiprocessing.cpu_count(),
-				   'centrality_method': 'NONE',
-				   'centrality_weight': 0.7,
-				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
-				   }
-	
-	edit_cost_constants = [i * ratio for i in [1, 1, 1]] + [1, 1, 1]
-# 	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
-# 	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
-
-	options = ged_options.copy()
-	options['edit_cost_constants'] = edit_cost_constants
-	options['node_labels'] = dataset.node_labels
-	options['edge_labels'] = dataset.edge_labels
-	options['node_attrs'] = dataset.node_attrs
-	options['edge_attrs'] = dataset.edge_attrs
-	parallel = True # if num_solutions == 1 else False
-	
-	"""**5.   Compute GED matrix.**"""
-	ged_mat = 'error'
-	runtime = 0
-	try:
-		time0 = time.time()
-		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs, options=options, repeats=1, parallel=parallel, verbose=True)
-		runtime = time.time() - time0
-	except Exception as exp:
-		print('An exception occured when running this experiment:')
-		LOG_FILENAME = save_dir + 'error.txt'
-		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
-		logging.exception(save_file_suffix)
-		print(repr(exp))
-					
-	"""**6. Get results.**"""
-	
-	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
-		pickle.dump(ged_mat, f)
-	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
-		pickle.dump(runtime, f)
-
-	return ged_mat, runtime
-
-	
-def save_trials_as_group(dataset, ds_name, max_num_solutions, ratio):
-	# Return if the group file exists.
-	name_middle = '.' + ds_name + '.mnum_sols_' + str(max_num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
-	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
-	if os.path.isfile(name_group):
-		return
-	
-	ged_mats = []
-	runtimes = []
-	for trial in range(1, 101):
-		print()
-		print('Trial:', trial)
-		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, max_num_solutions, ratio, trial)
-		ged_mats.append(ged_mat)
-		runtimes.append(runtime)
-		
-	# Group trials and Remove single files.
-	name_prefix = 'ged_matrix' + name_middle
-	group_trials(save_dir, name_prefix, True, True, False)
-	name_prefix = 'runtime' + name_middle
-	group_trials(save_dir, name_prefix, True, True, False)
-
-
-def results_for_a_dataset(ds_name):
-	"""**1.   Get dataset.**"""
-	dataset = get_dataset(ds_name)
-	
-	for max_num_solutions in mnum_solutions_list:
-		print()
-		print('Max # of solutions:', max_num_solutions)
-		for ratio in ratio_list:
-			print()
-			print('Ratio:', ratio)
-			save_trials_as_group(dataset, ds_name, max_num_solutions, ratio)
-			
-			
-def get_param_lists(ds_name):
-	if ds_name == 'AIDS_symb':
-		mnum_solutions_list = [1, 20, 40, 60, 80, 100]
-		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
-	else:
-		mnum_solutions_list = [1, 20, 40, 60, 80, 100]
-		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
-		
-	return mnum_solutions_list, ratio_list
-				
-
-if __name__ == '__main__':
-	if len(sys.argv) > 1:
-		ds_name_list = sys.argv[1:]
-	else:
-		ds_name_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
-		
-	save_dir = 'outputs/edit_costs.max_num_sols.ratios.bipartite/'
-	os.makedirs(save_dir, exist_ok=True)
-	os.makedirs(save_dir + 'groups/', exist_ok=True)
-		
-	for ds_name in ds_name_list:
-		print()
-		print('Dataset:', ds_name)
-		mnum_solutions_list, ratio_list = get_param_lists(ds_name)
-		results_for_a_dataset(ds_name)
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
index aa08579..82b6604 100644
--- a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
+++ b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.IPFP.py
@@ -13,7 +13,7 @@ import pickle
 import logging
 from gklearn.ged.util import compute_geds
 import time
-from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation
+from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation, mix_param_grids
 import sys
 from group_results import group_trials, check_group_existence, update_group_marker
 
@@ -125,9 +125,10 @@ def get_param_lists(ds_name, mode='test'):
 
 	elif mode == 'simple':
 		from sklearn.model_selection import ParameterGrid
-		param_grid = ParameterGrid([
-			{'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]), 'ratio': [10]},
-			{'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}])
+		param_grid = mix_param_grids([list(ParameterGrid([
+			{'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 40, 50, 60, 70, 80, 90, 100]), 'ratio': [10]}])),
+			list(ParameterGrid([
+			{'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}]))])
 # 		print(list(param_grid))
 
 	if ds_name == 'AIDS_symb':
@@ -148,7 +149,7 @@ if __name__ == '__main__':
 # 		ds_name_list = ['MUTAG'] # 'Alkane_unlabeled']
 # 		ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 
-	save_dir = 'outputs/edit_costs.real_data.num_sols.ratios.IPFP/'
+	save_dir = 'outputs/CRIANN/edit_costs.real_data.num_sols.ratios.IPFP/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 
diff --git a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
new file mode 100644
index 0000000..f450c1e
--- /dev/null
+++ b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Nov  2 16:17:01 2020
+
+@author: ljia
+"""
+# This script tests the influence of the ratios between node costs and edge costs on the stability of the GED computation, where the base edit costs are [1, 1, 1, 1, 1, 1]. The minimum solution from given numbers of repeats are computed.
+
+import os
+import multiprocessing
+import pickle
+import logging
+from gklearn.ged.util import compute_geds
+import time
+from utils import get_dataset, set_edit_cost_consts, dichotomous_permutation, mix_param_grids
+import sys
+from group_results import group_trials, check_group_existence, update_group_marker
+
+
+def xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial):
+
+	save_file_suffix = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.trial_' + str(trial)
+
+	# Return if the file exists.
+	if os.path.isfile(save_dir + 'ged_matrix' + save_file_suffix + '.pkl'):
+		return None, None
+
+	"""**2.  Set parameters.**"""
+
+	# Parameters for GED computation.
+	ged_options = {'method': 'BIPARTITE',  # use BIPARTITE huristic.
+  				   # 'initialization_method': 'RANDOM',  # or 'NODE', etc. (for GEDEnv)
+				   'lsape_model': 'ECBP',  #
+				   # ??when bigger than 1, then the method is considered mIPFP.
+				   # the actual number of computed solutions might be smaller than the specified value
+				   'max_num_solutions': 1, # @ max_num_solutions,
+				   'edit_cost': 'CONSTANT',  # use CONSTANT cost.
+				   'greedy_method': 'BASIC',  #
+				   # the distance between non-symbolic node/edge labels is computed by euclidean distance.
+				   'attr_distance': 'euclidean',
+				   'optimal': True, # if TRUE, the option --greedy-method has no effect
+				   # parallel threads. Do not work if mpg_options['parallel'] = False.
+				   'threads': multiprocessing.cpu_count(),
+				   'centrality_method': 'NONE',
+				   'centrality_weight': 0.7,
+				   'init_option': 'EAGER_WITHOUT_SHUFFLED_COPIES'
+				   }
+
+	edit_cost_constants = set_edit_cost_consts(ratio,
+											node_labeled=len(dataset.node_labels),
+											edge_labeled=len(dataset.edge_labels),
+											mode='uniform')
+#	edit_cost_constants = [item * 0.01 for item in edit_cost_constants]
+#	pickle.dump(edit_cost_constants, open(save_dir + "edit_costs" + save_file_suffix + ".pkl", "wb"))
+
+
+	options = ged_options.copy()
+	options['edit_cost_constants'] = edit_cost_constants
+	options['node_labels'] = dataset.node_labels
+	options['edge_labels'] = dataset.edge_labels
+	options['node_attrs'] = dataset.node_attrs
+	options['edge_attrs'] = dataset.edge_attrs
+	parallel = True # if num_solutions == 1 else False
+
+	"""**5.   Compute GED matrix.**"""
+	ged_mat = 'error'
+	runtime = 0
+	try:
+		time0 = time.time()
+		ged_vec_init, ged_mat, n_edit_operations = compute_geds(dataset.graphs,
+														  options=options,
+														  repeats=num_solutions,
+														  permute_nodes=True,
+														  random_state=None,
+														  parallel=parallel,
+														  verbose=True)
+		runtime = time.time() - time0
+	except Exception as exp:
+		print('An exception occured when running this experiment:')
+		LOG_FILENAME = save_dir + 'error.txt'
+		logging.basicConfig(filename=LOG_FILENAME, level=logging.DEBUG)
+		logging.exception(save_file_suffix)
+		print(repr(exp))
+
+	"""**6. Get results.**"""
+
+	with open(save_dir + 'ged_matrix' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(ged_mat, f)
+	with open(save_dir + 'runtime' + save_file_suffix + '.pkl', 'wb') as f:
+		pickle.dump(runtime, f)
+
+	return ged_mat, runtime
+
+
+def save_trials_as_group(dataset, ds_name, num_solutions, ratio):
+	# Return if the group file exists.
+	name_middle = '.' + ds_name + '.num_sols_' + str(num_solutions) + '.ratio_' + "{:.2f}".format(ratio) + '.'
+	name_group = save_dir + 'groups/ged_mats' +  name_middle + 'npy'
+	if check_group_existence(name_group):
+		return
+
+	ged_mats = []
+	runtimes = []
+	num_trials = 100
+	for trial in range(1, num_trials + 1):
+		print()
+		print('Trial:', trial)
+		ged_mat, runtime = xp_compute_ged_matrix(dataset, ds_name, num_solutions, ratio, trial)
+		ged_mats.append(ged_mat)
+		runtimes.append(runtime)
+
+	# Group trials and remove single files.
+	# @todo: if the program stops between the following lines, then there may be errors.
+	name_prefix = 'ged_matrix' + name_middle
+	group_trials(save_dir, name_prefix, True, True, False, num_trials=num_trials)
+	name_prefix = 'runtime' + name_middle
+	group_trials(save_dir, name_prefix, True, True, False, num_trials=num_trials)
+	update_group_marker(name_group)
+
+
+def results_for_a_dataset(ds_name):
+	"""**1. Get dataset.**"""
+	dataset = get_dataset(ds_name)
+
+	for params in list(param_grid):
+		print()
+		print(params)
+		save_trials_as_group(dataset, ds_name, params['num_solutions'], params['ratio'])
+
+
+def get_param_lists(ds_name, mode='test'):
+	if mode == 'test':
+		num_solutions_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
+		ratio_list = [10]
+		return num_solutions_list, ratio_list
+
+	elif mode == 'simple':
+		from sklearn.model_selection import ParameterGrid
+		param_grid = mix_param_grids([list(ParameterGrid([
+			{'num_solutions': dichotomous_permutation([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 40, 50, 60, 70, 80, 90, 100]), 'ratio': [10]}])),
+			list(ParameterGrid([
+			{'num_solutions': [10], 'ratio': dichotomous_permutation([0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10])}]))])
+# 		print(list(param_grid))
+
+	if ds_name == 'AIDS_symb':
+		num_solutions_list = [1, 20, 40, 60, 80, 100]
+		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9]
+	else:
+		num_solutions_list = [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100] # [1, 20, 40, 60, 80, 100]
+		ratio_list = [0.1, 0.3, 0.5, 0.7, 0.9, 1, 3, 5, 7, 9, 10][::-1]
+
+	return param_grid
+
+
+if __name__ == '__main__':
+	if len(sys.argv) > 1:
+		ds_name_list = sys.argv[1:]
+	else:
+		ds_name_list = ['Acyclic', 'Alkane_unlabeled', 'MAO_lite', 'Monoterpenoides', 'MUTAG']
+# 		ds_name_list = ['MUTAG'] # 'Alkane_unlabeled']
+# 		ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
+
+	save_dir = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/'
+	os.makedirs(save_dir, exist_ok=True)
+	os.makedirs(save_dir + 'groups/', exist_ok=True)
+
+	for ds_name in ds_name_list:
+		print()
+		print('Dataset:', ds_name)
+		param_grid = get_param_lists(ds_name, mode='simple')
+		results_for_a_dataset(ds_name)
\ No newline at end of file
diff --git a/gklearn/experiments/ged/stability/group_results.py b/gklearn/experiments/ged/stability/group_results.py
index bdbe89f..10f930c 100644
--- a/gklearn/experiments/ged/stability/group_results.py
+++ b/gklearn/experiments/ged/stability/group_results.py
@@ -32,6 +32,7 @@ def check_group_existence(file_name):
 
 
 def update_group_marker(file_name):
+	# @todo: possible error when seveal tasks are using this file at the same time.
 	path, name = os.path.split(file_name)
 	marker_fn = os.path.join(path, 'group_names_finished.pkl')
 	if os.path.isfile(marker_fn):
diff --git a/gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/run_job_edit_costs.real_data.nums_sols.ratios.bipartite.py
similarity index 52%
rename from gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py
rename to gklearn/experiments/ged/stability/run_job_edit_costs.real_data.nums_sols.ratios.bipartite.py
index 276a1a5..a33a9c1 100644
--- a/gklearn/experiments/ged/stability/run_job_edit_costs.max_nums_sols.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/run_job_edit_costs.real_data.nums_sols.ratios.bipartite.py
@@ -9,36 +9,45 @@ import os
 import re
 
 
+cur_path = os.path.dirname(os.path.abspath(__file__))
+
+
 def get_job_script(arg):
 	script = r"""
 #!/bin/bash
 
 #SBATCH --exclusive
 #SBATCH --job-name="st.""" + arg + r""".bp"
-#SBATCH --partition=tlong
+#SBATCH --partition=court
 #SBATCH --mail-type=ALL
 #SBATCH --mail-user=jajupmochi@gmail.com
-#SBATCH --output="outputs/output_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
-#SBATCH --error="errors/error_edit_costs.max_num_sols.ratios.bipartite.""" + arg + """.txt"
+#SBATCH --output="outputs/output_edit_costs.real_data.nums_sols.ratios.bipartite.""" + arg + """.txt"
+#SBATCH --error="errors/error_edit_costs.real_data.nums_sols.ratios.bipartite.""" + arg + """.txt"
 #
 #SBATCH --ntasks=1
 #SBATCH --nodes=1
 #SBATCH --cpus-per-task=1
-#SBATCH --time=300:00:00
+#SBATCH --time=48:00:00
 #SBATCH --mem-per-cpu=4000
 
 srun hostname
-srun cd /home/2019015/ljia02/graphkit-learn/gklearn/experiments/ged/stability
-srun python3 edit_costs.max_nums_sols.ratios.bipartite.py """ + arg
+cd """ + cur_path + r"""
+echo Working directory : $PWD
+echo Local work dir : $LOCAL_WORK_DIR
+python3 edit_costs.real_data.nums_sols.ratios.bipartite.py """ + arg
 	script = script.strip()
 	script = re.sub('\n\t+', '\n', script)
 	script = re.sub('\n +', '\n', script)
-	
+
 	return script
 
 if __name__ == '__main__':
-	ds_list = ['MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
-	for ds_name in [ds_list[i] for i in [0, 1, 2, 3]]:
+
+	os.makedirs('outputs/', exist_ok=True)
+	os.makedirs('errors/', exist_ok=True)
+
+	ds_list = ['Acyclic', 'Alkane_unlabeled', 'MAO_lite', 'Monoterpenoides', 'MUTAG']
+	for ds_name in [ds_list[i] for i in [0, 1, 2, 3, 4]]:
 		job_script = get_job_script(ds_name)
 		command = 'sbatch <<EOF\n' + job_script + '\nEOF'
 # 		print(command)
diff --git a/gklearn/experiments/ged/stability/utils.py b/gklearn/experiments/ged/stability/utils.py
index e743b27..6d08d86 100644
--- a/gklearn/experiments/ged/stability/utils.py
+++ b/gklearn/experiments/ged/stability/utils.py
@@ -325,6 +325,22 @@ def dichotomous_permutation(arr, layer=0):
 # 	return new_arr
 
 
+def mix_param_grids(list_of_grids):
+	mixed_grids = []
+	not_finished = [True] * len(list_of_grids)
+	idx = 0
+	while sum(not_finished) > 0:
+		for g_idx, grid in enumerate(list_of_grids):
+			if idx < len(grid):
+				mixed_grids.append(grid[idx])
+			else:
+				not_finished[g_idx] = False
+		idx += 1
+
+	return mixed_grids
+
+
+
 if __name__ == '__main__':
 	root_dir = 'outputs/CRIANN/'
 #	for dir_ in sorted(os.listdir(root_dir)):
@@ -337,4 +353,4 @@ if __name__ == '__main__':
 #					get_relative_errors(save_dir)
 #				except Exception as exp:
 #					print('An exception occured when running this experiment:')
-#					print(repr(exp))
\ No newline at end of file
+#					print(repr(exp))
diff --git a/gklearn/ged/util/util.py b/gklearn/ged/util/util.py
index a5a5ac5..d75939a 100644
--- a/gklearn/ged/util/util.py
+++ b/gklearn/ged/util/util.py
@@ -64,10 +64,12 @@ def pairwise_ged(g1, g2, options={}, sort=True, repeats=1, parallel=False, verbo
 	g = listID[0]
 	h = listID[1]
 	dis_min = np.inf
+# 	print('------------------------------------------')
 	for i in range(0, repeats):
 		ged_env.run_method(g, h)
 		upper = ged_env.get_upper_bound(g, h)
 		dis = upper
+# 		print(dis)
 		if dis < dis_min:
 			dis_min = dis
 			pi_forward = ged_env.get_forward_map(g, h)
@@ -169,12 +171,100 @@ def compute_geds_cml(graphs, options={}, sort=True, parallel=False, verbose=True
 	return ged_vec, ged_mat, n_edit_operations
 
 
-def compute_geds(graphs, options={}, sort=True, repeats=1, parallel=False, n_jobs=None, verbose=True):
+#%%
+
+
+def compute_geds(graphs,
+				 options={},
+				 sort=True,
+				 repeats=1,
+				 permute_nodes=False,
+				 random_state=None,
+				 parallel=False,
+				 n_jobs=None,
+				 verbose=True):
+	"""Compute graph edit distance matrix using GEDLIB.
+	"""
+	if permute_nodes:
+		return _compute_geds_with_permutation(graphs,
+										options=options,
+										sort=sort,
+										repeats=repeats,
+										random_state=random_state,
+										parallel=parallel,
+										n_jobs=n_jobs,
+										verbose=verbose)
+	else:
+		return _compute_geds_without_permutation(graphs,
+										options=options,
+										sort=sort,
+										repeats=repeats,
+										parallel=parallel,
+										n_jobs=n_jobs,
+										verbose=verbose)
+
+
+#%%
+
+
+def _compute_geds_with_permutation(graphs,
+								   options={},
+								   sort=True,
+								   repeats=1,
+								   random_state=None,
+								   parallel=False,
+								   n_jobs=None,
+								   verbose=True):
+
+	from gklearn.utils.utils import nx_permute_nodes
+
+	# Initialze variables.
+	ged_mat_optim = np.full((len(graphs), len(graphs)), np.inf)
+	np.fill_diagonal(ged_mat_optim, 0)
+	len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
+	ged_vec = [0] * len_itr
+	n_edit_operations = [0] * len_itr
+
+	# for each repeats:
+	for i in range(0, repeats):
+		# Permutate nodes.
+		graphs_pmut = [nx_permute_nodes(g, random_state=random_state) for g in graphs]
+
+		out = _compute_geds_without_permutation(graphs_pmut,
+										  options=options,
+										  sort=sort,
+										  repeats=1,
+										  parallel=parallel,
+										  n_jobs=n_jobs,
+										  verbose=verbose)
+
+		# Compare current results with the best one.
+		idx_cnt = 0
+		for i in range(len(graphs)):
+			for j in range(i + 1, len(graphs)):
+				if out[1][i, j] < ged_mat_optim[i ,j]:
+					ged_mat_optim[i, j] = out[1][i, j]
+					ged_mat_optim[j, i] = out[1][j, i]
+					ged_vec[idx_cnt] = out[0][idx_cnt]
+					n_edit_operations[idx_cnt] = out[2][idx_cnt]
+				idx_cnt += 1
+
+	return ged_vec, ged_mat_optim, n_edit_operations
+
+
+def _compute_geds_without_permutation(graphs,
+									  options={},
+									  sort=True,
+									  repeats=1,
+									  parallel=False,
+									  n_jobs=None,
+									  verbose=True):
 	from gklearn.gedlib import librariesImport, gedlibpy
 
 	# initialize ged env.
 	ged_env = gedlibpy.GEDEnv()
 	ged_env.set_edit_cost(options['edit_cost'], edit_cost_constant=options['edit_cost_constants'])
+
 	for g in graphs:
 		ged_env.add_nx_graph(g, '')
 	listID = ged_env.get_all_graph_ids()
@@ -266,6 +356,11 @@ def _compute_ged(env, gid1, gid2, g1, g2, repeats):
 		dis = upper
 
 		# make the map label correct (label remove map as np.inf)
+		# Attention: using node indices instead of NetworkX node labels (as
+		# implemented here) may cause several issues:
+		# - Fail if NetworkX node labels are not consecutive integers;
+		# - Return wrong mappings if nodes are permutated (e.g., by using
+		# `gklearn.utis.utils.nx_permute_nodes()`.)
 		nodes1 = [n for n in g1.nodes()]
 		nodes2 = [n for n in g2.nodes()]
 		nb1 = nx.number_of_nodes(g1)
@@ -278,46 +373,57 @@ def _compute_ged(env, gid1, gid2, g1, g2, repeats):
 			pi_forward_min = pi_forward
 			pi_backward_min = pi_backward
 
+# 	print('-----')
+# 	print(pi_forward_min)
+# 	print(pi_backward_min)
+
 	return dis_min, pi_forward_min, pi_backward_min
 
 
-def label_costs_to_matrix(costs, nb_labels):
-	"""Reform a label cost vector to a matrix.
+#%%
+
+
+def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
+	"""Calculate the numbers of the occurence of each edit operation in a given
+	edit path.
 
 	Parameters
 	----------
-	costs : numpy.array
-		The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs.
-	nb_labels : integer
-		Number of labels.
+	g1 : TYPE
+		DESCRIPTION.
+	g2 : TYPE
+		DESCRIPTION.
+	forward_map : TYPE
+		DESCRIPTION.
+	backward_map : TYPE
+		DESCRIPTION.
+	edit_cost : TYPE, optional
+		DESCRIPTION. The default is None.
+	is_cml : TYPE, optional
+		DESCRIPTION. The default is False.
+	**kwargs : TYPE
+		DESCRIPTION.
+
+	Raises
+	------
+	Exception
+		DESCRIPTION.
 
 	Returns
 	-------
-	cost_matrix : numpy.array.
-		The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
+	TYPE
+		DESCRIPTION.
+
+	Notes
+	-----
+	Attention: when implementing a function to get the numbers of edit
+	operations, make sure that:
+		- It does not fail if NetworkX node labels are not consecutive integers;
+		- It returns correct results if nodes are permutated (e.g., by using
+		`gklearn.utis.utils.nx_permute_nodes()`.)
+	Generally speaking, it means you need to distinguish the NetworkX label of
+	a node from the position (index) of that node in the node list.
 	"""
-	# Initialize label cost matrix.
-	cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1))
-	i = 0
-	# Costs of insertions.
-	for col in range(1, nb_labels + 1):
-		cost_matrix[0, col] = costs[i]
-		i += 1
-	# Costs of deletions.
-	for row in range(1, nb_labels + 1):
-		cost_matrix[row, 0] = costs[i]
-		i += 1
-	# Costs of substitutions.
-	for row in range(1, nb_labels + 1):
-		for col in range(row + 1, nb_labels + 1):
-			cost_matrix[row, col] = costs[i]
-			cost_matrix[col, row] = costs[i]
-			i += 1
-
-	return cost_matrix
-
-
-def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, is_cml=False, **kwargs):
 	if is_cml:
 		if edit_cost == 'CONSTANT':
 			node_labels = kwargs.get('node_labels', [])
@@ -611,6 +717,48 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
 	return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
 
 
+#%%
+
+
+def label_costs_to_matrix(costs, nb_labels):
+	"""Reform a label cost vector to a matrix.
+
+	Parameters
+	----------
+	costs : numpy.array
+		The vector containing costs between labels, in the order of node insertion costs, node deletion costs, node substitition costs, edge insertion costs, edge deletion costs, edge substitition costs.
+	nb_labels : integer
+		Number of labels.
+
+	Returns
+	-------
+	cost_matrix : numpy.array.
+		The reformed label cost matrix of size (nb_labels, nb_labels). Each row/column of cost_matrix corresponds to a label, and the first label is the dummy label. This is the same setting as in GEDData.
+	"""
+	# Initialize label cost matrix.
+	cost_matrix = np.zeros((nb_labels + 1, nb_labels + 1))
+	i = 0
+	# Costs of insertions.
+	for col in range(1, nb_labels + 1):
+		cost_matrix[0, col] = costs[i]
+		i += 1
+	# Costs of deletions.
+	for row in range(1, nb_labels + 1):
+		cost_matrix[row, 0] = costs[i]
+		i += 1
+	# Costs of substitutions.
+	for row in range(1, nb_labels + 1):
+		for col in range(row + 1, nb_labels + 1):
+			cost_matrix[row, col] = costs[i]
+			cost_matrix[col, row] = costs[i]
+			i += 1
+
+	return cost_matrix
+
+
+#%%
+
+
 def ged_options_to_string(options):
 	opt_str = ' '
 	for key, val in options.items():
diff --git a/gklearn/utils/utils.py b/gklearn/utils/utils.py
index 5758291..f0e49fd 100644
--- a/gklearn/utils/utils.py
+++ b/gklearn/utils/utils.py
@@ -7,6 +7,9 @@ from enum import Enum, unique
 # from tqdm import tqdm
 
 
+#%%
+
+
 def getSPLengths(G1):
 	sp = nx.shortest_path(G1)
 	distances = np.zeros((G1.number_of_nodes(), G1.number_of_nodes()))
@@ -286,81 +289,146 @@ def direct_product_graph(G1, G2, node_labels, edge_labels):
 	return gt
 
 
-def graph_deepcopy(G):
-	"""Deep copy a graph, including deep copy of all nodes, edges and
-	attributes of the graph, nodes and edges.
+def find_paths(G, source_node, length):
+	"""Find all paths with a certain length those start from a source node.
+	A recursive depth first search is applied.
 
-	Note
-	----
-	It is the same as the NetworkX function graph.copy(), as far as I know.
+	Parameters
+	----------
+	G : NetworkX graphs
+		The graph in which paths are searched.
+	source_node : integer
+		The number of the node from where all paths start.
+	length : integer
+		The length of paths.
+
+	Return
+	------
+	path : list of list
+		List of paths retrieved, where each path is represented by a list of nodes.
 	"""
-	# add graph attributes.
-	labels = {}
-	for k, v in G.graph.items():
-		labels[k] = deepcopy(v)
-	if G.is_directed():
-		G_copy = nx.DiGraph(**labels)
-	else:
-		G_copy = nx.Graph(**labels)
+	if length == 0:
+		return [[source_node]]
+	path = [[source_node] + path for neighbor in G[source_node] \
+		for path in find_paths(G, neighbor, length - 1) if source_node not in path]
+	return path
 
-	# add nodes
-	for nd, attrs in G.nodes(data=True):
-		labels = {}
-		for k, v in attrs.items():
-			labels[k] = deepcopy(v)
-		G_copy.add_node(nd, **labels)
 
-	# add edges.
-	for nd1, nd2, attrs in G.edges(data=True):
-		labels = {}
-		for k, v in attrs.items():
-			labels[k] = deepcopy(v)
-		G_copy.add_edge(nd1, nd2, **labels)
+def find_all_paths(G, length, is_directed):
+	"""Find all paths with a certain length in a graph. A recursive depth first
+	search is applied.
 
-	return G_copy
+	Parameters
+	----------
+	G : NetworkX graphs
+		The graph in which paths are searched.
+	length : integer
+		The length of paths.
 
+	Return
+	------
+	path : list of list
+		List of paths retrieved, where each path is represented by a list of nodes.
+	"""
+	all_paths = []
+	for node in G:
+		all_paths.extend(find_paths(G, node, length))
 
-def graph_isIdentical(G1, G2):
-	"""Check if two graphs are identical, including: same nodes, edges, node
-	labels/attributes, edge labels/attributes.
+	if not is_directed:
+		# For each path, two presentations are retrieved from its two extremities.
+		# Remove one of them.
+		all_paths_r = [path[::-1] for path in all_paths]
+		for idx, path in enumerate(all_paths[:-1]):
+			for path2 in all_paths_r[idx+1::]:
+				if path == path2:
+					all_paths[idx] = []
+					break
+		all_paths = list(filter(lambda a: a != [], all_paths))
 
-	Notes
-	-----
-	1. The type of graphs has to be the same.
+	return all_paths
 
-	2. Global/Graph attributes are neglected as they may contain names for graphs.
-	"""
-	# check nodes.
-	nlist1 = [n for n in G1.nodes(data=True)]
-	nlist2 = [n for n in G2.nodes(data=True)]
-	if not nlist1 == nlist2:
-		return False
-	# check edges.
-	elist1 = [n for n in G1.edges(data=True)]
-	elist2 = [n for n in G2.edges(data=True)]
-	if not elist1 == elist2:
-		return False
-	# check graph attributes.
 
-	return True
+# @todo: use it in ShortestPath.
+def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
+	"""Compute kernels between each pair of vertices in two graphs.
 
+	Parameters
+	----------
+	g1, g2 : NetworkX graph
+		The kernels bewteen pairs of vertices in these two graphs are computed.
+	node_kernels : dict
+		A dictionary of kernel functions for nodes, including 3 items: 'symb'
+		for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
+		for both labels. The first 2 functions take two node labels as
+		parameters, and the 'mix' function takes 4 parameters, a symbolic and a
+		non-symbolic label for each the two nodes. Each label is in form of 2-D
+		dimension array (n_samples, n_features). Each function returns a number
+		as the kernel value. Ignored when nodes are unlabeled. This argument
+		is designated to conjugate gradient method and fixed-point iterations.
+	node_labels : list, optional
+		The list of the name strings of the node labels. The default is [].
+	node_attrs : list, optional
+		The list of the name strings of the node attributes. The default is [].
 
-def get_node_labels(Gn, node_label):
-	"""Get node labels of dataset Gn.
-	"""
-	nl = set()
-	for G in Gn:
-		nl = nl | set(nx.get_node_attributes(G, node_label).values())
-	return nl
+	Returns
+	-------
+	vk_dict : dict
+		Vertex kernels keyed by vertices.
 
+	Notes
+	-----
+	This function is used by ``gklearn.kernels.FixedPoint'' and
+	``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].
 
-def get_edge_labels(Gn, edge_label):
-	"""Get edge labels of dataset Gn.
+	References
+	----------
+	.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
+	Parallelization of shortest path graph kernels on multi-core cpus and gpus.
+	Proceedings of the Programmability Issues for Heterogeneous Multicores
+	(MultiProg), Vienna, Austria, 2014.
 	"""
-	el = set()
-	for G in Gn:
-		el = el | set(nx.get_edge_attributes(G, edge_label).values())
-	return el
+	vk_dict = {}  # shortest path matrices dict
+	if len(node_labels) > 0:
+		# node symb and non-synb labeled
+		if len(node_attrs) > 0:
+			kn = node_kernels['mix']
+			for n1 in g1.nodes(data=True):
+				for n2 in g2.nodes(data=True):
+					n1_labels = [n1[1][nl] for nl in node_labels]
+					n2_labels = [n2[1][nl] for nl in node_labels]
+					n1_attrs = [n1[1][na] for na in node_attrs]
+					n2_attrs = [n2[1][na] for na in node_attrs]
+					vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
+		# node symb labeled
+		else:
+			kn = node_kernels['symb']
+			for n1 in g1.nodes(data=True):
+				for n2 in g2.nodes(data=True):
+					n1_labels = [n1[1][nl] for nl in node_labels]
+					n2_labels = [n2[1][nl] for nl in node_labels]
+					vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
+	else:
+		# node non-synb labeled
+		if len(node_attrs) > 0:
+			kn = node_kernels['nsymb']
+			for n1 in g1.nodes(data=True):
+				for n2 in g2.nodes(data=True):
+					n1_attrs = [n1[1][na] for na in node_attrs]
+					n2_attrs = [n2[1][na] for na in node_attrs]
+					vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
+		# node unlabeled
+		else:
+			pass # @todo: add edge weights.
+# 			for e1 in g1.edges(data=True):
+# 				for e2 in g2.edges(data=True):
+# 					if e1[2]['cost'] == e2[2]['cost']:
+# 						kernel += 1
+# 			return kernel
+
+	return vk_dict
+
+
+#%%
 
 
 def get_graph_kernel_by_name(name, node_labels=None, edge_labels=None, node_attrs=None, edge_attrs=None, ds_infos=None, kernel_options={}, **kwargs):
@@ -513,79 +581,6 @@ def compute_gram_matrices_by_class(ds_name, kernel_options, save_results=True, d
 	print('\ncomplete.')
 
 
-def find_paths(G, source_node, length):
-	"""Find all paths with a certain length those start from a source node.
-	A recursive depth first search is applied.
-
-	Parameters
-	----------
-	G : NetworkX graphs
-		The graph in which paths are searched.
-	source_node : integer
-		The number of the node from where all paths start.
-	length : integer
-		The length of paths.
-
-	Return
-	------
-	path : list of list
-		List of paths retrieved, where each path is represented by a list of nodes.
-	"""
-	if length == 0:
-		return [[source_node]]
-	path = [[source_node] + path for neighbor in G[source_node] \
-		for path in find_paths(G, neighbor, length - 1) if source_node not in path]
-	return path
-
-
-def find_all_paths(G, length, is_directed):
-	"""Find all paths with a certain length in a graph. A recursive depth first
-	search is applied.
-
-	Parameters
-	----------
-	G : NetworkX graphs
-		The graph in which paths are searched.
-	length : integer
-		The length of paths.
-
-	Return
-	------
-	path : list of list
-		List of paths retrieved, where each path is represented by a list of nodes.
-	"""
-	all_paths = []
-	for node in G:
-		all_paths.extend(find_paths(G, node, length))
-
-	if not is_directed:
-		# For each path, two presentations are retrieved from its two extremities.
-		# Remove one of them.
-		all_paths_r = [path[::-1] for path in all_paths]
-		for idx, path in enumerate(all_paths[:-1]):
-			for path2 in all_paths_r[idx+1::]:
-				if path == path2:
-					all_paths[idx] = []
-					break
-		all_paths = list(filter(lambda a: a != [], all_paths))
-
-	return all_paths
-
-
-def get_mlti_dim_node_attrs(G, attr_names):
-	attributes = []
-	for nd, attrs in G.nodes(data=True):
-		attributes.append(tuple(attrs[aname] for aname in attr_names))
-	return attributes
-
-
-def get_mlti_dim_edge_attrs(G, attr_names):
-	attributes = []
-	for ed, attrs in G.edges(data=True):
-		attributes.append(tuple(attrs[aname] for aname in attr_names))
-	return attributes
-
-
 def normalize_gram_matrix(gram_matrix):
 	diag = gram_matrix.diagonal().copy()
 	old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
@@ -621,84 +616,162 @@ def compute_distance_matrix(gram_matrix):
 	return dis_mat, dis_max, dis_min, dis_mean
 
 
-# @todo: use it in ShortestPath.
-def compute_vertex_kernels(g1, g2, node_kernels, node_labels=[], node_attrs=[]):
-	"""Compute kernels between each pair of vertices in two graphs.
+#%%
+
+
+def graph_deepcopy(G):
+	"""Deep copy a graph, including deep copy of all nodes, edges and
+	attributes of the graph, nodes and edges.
+
+	Note
+	----
+	- It is the same as the NetworkX function graph.copy(), as far as I know.
+
+	- This function only supports Networkx.Graph and Networkx.DiGraph.
+	"""
+	# add graph attributes.
+	labels = {}
+	for k, v in G.graph.items():
+		labels[k] = deepcopy(v)
+	if G.is_directed():
+		G_copy = nx.DiGraph(**labels)
+	else:
+		G_copy = nx.Graph(**labels)
+
+	# add nodes
+	for nd, attrs in G.nodes(data=True):
+		labels = {}
+		for k, v in attrs.items():
+			labels[k] = deepcopy(v)
+		G_copy.add_node(nd, **labels)
+
+	# add edges.
+	for nd1, nd2, attrs in G.edges(data=True):
+		labels = {}
+		for k, v in attrs.items():
+			labels[k] = deepcopy(v)
+		G_copy.add_edge(nd1, nd2, **labels)
+
+	return G_copy
+
+
+def graph_isIdentical(G1, G2):
+	"""Check if two graphs are identical, including: same nodes, edges, node
+	labels/attributes, edge labels/attributes.
+
+	Notes
+	-----
+	1. The type of graphs has to be the same.
+
+	2. Global/Graph attributes are neglected as they may contain names for graphs.
+	"""
+	# check nodes.
+	nlist1 = [n for n in G1.nodes(data=True)]
+	nlist2 = [n for n in G2.nodes(data=True)]
+	if not nlist1 == nlist2:
+		return False
+	# check edges.
+	elist1 = [n for n in G1.edges(data=True)]
+	elist2 = [n for n in G2.edges(data=True)]
+	if not elist1 == elist2:
+		return False
+	# check graph attributes.
+
+	return True
+
+
+def get_node_labels(Gn, node_label):
+	"""Get node labels of dataset Gn.
+	"""
+	nl = set()
+	for G in Gn:
+		nl = nl | set(nx.get_node_attributes(G, node_label).values())
+	return nl
+
+
+def get_edge_labels(Gn, edge_label):
+	"""Get edge labels of dataset Gn.
+	"""
+	el = set()
+	for G in Gn:
+		el = el | set(nx.get_edge_attributes(G, edge_label).values())
+	return el
+
+
+def get_mlti_dim_node_attrs(G, attr_names):
+	attributes = []
+	for nd, attrs in G.nodes(data=True):
+		attributes.append(tuple(attrs[aname] for aname in attr_names))
+	return attributes
+
+
+def get_mlti_dim_edge_attrs(G, attr_names):
+	attributes = []
+	for ed, attrs in G.edges(data=True):
+		attributes.append(tuple(attrs[aname] for aname in attr_names))
+	return attributes
+
+
+def nx_permute_nodes(G, random_state=None):
+	"""Permute node indices in a NetworkX graph.
 
 	Parameters
 	----------
-	g1, g2 : NetworkX graph
-		The kernels bewteen pairs of vertices in these two graphs are computed.
-	node_kernels : dict
-		A dictionary of kernel functions for nodes, including 3 items: 'symb'
-		for symbolic node labels, 'nsymb' for non-symbolic node labels, 'mix'
-		for both labels. The first 2 functions take two node labels as
-		parameters, and the 'mix' function takes 4 parameters, a symbolic and a
-		non-symbolic label for each the two nodes. Each label is in form of 2-D
-		dimension array (n_samples, n_features). Each function returns a number
-		as the kernel value. Ignored when nodes are unlabeled. This argument
-		is designated to conjugate gradient method and fixed-point iterations.
-	node_labels : list, optional
-		The list of the name strings of the node labels. The default is [].
-	node_attrs : list, optional
-		The list of the name strings of the node attributes. The default is [].
+	G : TYPE
+		DESCRIPTION.
+	random_state : TYPE, optional
+		DESCRIPTION. The default is None.
 
 	Returns
 	-------
-	vk_dict : dict
-		Vertex kernels keyed by vertices.
+	G_new : TYPE
+		DESCRIPTION.
 
 	Notes
 	-----
-	This function is used by ``gklearn.kernels.FixedPoint'' and
-	``gklearn.kernels.StructuralSP''. The method is borrowed from FCSP [1].
-
-	References
-	----------
-	.. [1] Lifan Xu, Wei Wang, M Alvarez, John Cavazos, and Dongping Zhang.
-	Parallelization of shortest path graph kernels on multi-core cpus and gpus.
-	Proceedings of the Programmability Issues for Heterogeneous Multicores
-	(MultiProg), Vienna, Austria, 2014.
+	- This function only supports Networkx.Graph and Networkx.DiGraph.
 	"""
-	vk_dict = {}  # shortest path matrices dict
-	if len(node_labels) > 0:
-		# node symb and non-synb labeled
-		if len(node_attrs) > 0:
-			kn = node_kernels['mix']
-			for n1 in g1.nodes(data=True):
-				for n2 in g2.nodes(data=True):
-					n1_labels = [n1[1][nl] for nl in node_labels]
-					n2_labels = [n2[1][nl] for nl in node_labels]
-					n1_attrs = [n1[1][na] for na in node_attrs]
-					n2_attrs = [n2[1][na] for na in node_attrs]
-					vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels, n1_attrs, n2_attrs)
-		# node symb labeled
-		else:
-			kn = node_kernels['symb']
-			for n1 in g1.nodes(data=True):
-				for n2 in g2.nodes(data=True):
-					n1_labels = [n1[1][nl] for nl in node_labels]
-					n2_labels = [n2[1][nl] for nl in node_labels]
-					vk_dict[(n1[0], n2[0])] = kn(n1_labels, n2_labels)
+	# @todo: relabel node with integers? (in case something went wrong...)
+	# Add graph attributes.
+	labels = {}
+	for k, v in G.graph.items():
+		labels[k] = deepcopy(v)
+	if G.is_directed():
+		G_new = nx.DiGraph(**labels)
 	else:
-		# node non-synb labeled
-		if len(node_attrs) > 0:
-			kn = node_kernels['nsymb']
-			for n1 in g1.nodes(data=True):
-				for n2 in g2.nodes(data=True):
-					n1_attrs = [n1[1][na] for na in node_attrs]
-					n2_attrs = [n2[1][na] for na in node_attrs]
-					vk_dict[(n1[0], n2[0])] = kn(n1_attrs, n2_attrs)
-		# node unlabeled
-		else:
-			pass # @todo: add edge weights.
-# 			for e1 in g1.edges(data=True):
-# 				for e2 in g2.edges(data=True):
-# 					if e1[2]['cost'] == e2[2]['cost']:
-# 						kernel += 1
-# 			return kernel
+		G_new = nx.Graph(**labels)
 
-	return vk_dict
+	# Create a random mapping old node indices <-> new indices.
+	nb_nodes = nx.number_of_nodes(G)
+	indices_orig = range(nb_nodes)
+	idx_mapping = np.random.RandomState(seed=random_state).permutation(indices_orig)
+
+	# Add nodes.
+	nodes_orig = list(G.nodes)
+	for i_orig in range(nb_nodes):
+		i_new = idx_mapping[i_orig]
+		labels = {}
+		for k, v in G.nodes[nodes_orig[i_new]].items():
+			labels[k] = deepcopy(v)
+		G_new.add_node(nodes_orig[i_new], **labels)
+
+	# Add edges.
+	for nd1, nd2, attrs in G.edges(data=True):
+		labels = {}
+		for k, v in attrs.items():
+			labels[k] = deepcopy(v)
+		G_new.add_edge(nd1, nd2, **labels)
+
+
+# 	# create a random mapping old label -> new label
+# 	node_mapping = dict(zip(G.nodes(), np.random.RandomState(seed=random_state).permutation(G.nodes())))
+# 	# build a new graph
+# 	G_new = nx.relabel_nodes(G, node_mapping)
+
+	return G_new
+
+
+#%%
 
 
 def dummy_node():

From a7e189134d5cc3541ab272d77a6bf3668a216ac1 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 18 Mar 2022 10:24:21 +0100
Subject: [PATCH 06/11] [Fix] Change the output directory name.

---
 .../ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
index f450c1e..d2d4db5 100644
--- a/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
+++ b/gklearn/experiments/ged/stability/edit_costs.real_data.nums_sols.ratios.bipartite.py
@@ -161,7 +161,7 @@ if __name__ == '__main__':
 # 		ds_name_list = ['MUTAG'] # 'Alkane_unlabeled']
 # 		ds_name_list = ['Acyclic', 'MAO', 'Monoterpenoides', 'MUTAG', 'AIDS_symb']
 
-	save_dir = 'outputs/CRIANN/edit_costs.max_num_sols.ratios.bipartite/'
+	save_dir = 'outputs/CRIANN/edit_costs.real_data.nums_sols.ratios.bipartite/'
 	os.makedirs(save_dir, exist_ok=True)
 	os.makedirs(save_dir + 'groups/', exist_ok=True)
 
@@ -169,4 +169,4 @@ if __name__ == '__main__':
 		print()
 		print('Dataset:', ds_name)
 		param_grid = get_param_lists(ds_name, mode='simple')
-		results_for_a_dataset(ds_name)
\ No newline at end of file
+		results_for_a_dataset(ds_name)

From 5e37d4447f96953ffd7004d0e695dd30d53f9242 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 6 May 2022 14:12:31 +0200
Subject: [PATCH 07/11] [Major Features] Add GEDModel which is compatibale with
 .

---
 gklearn/ged/__init__.py          |   1 +
 gklearn/ged/model/distances.py   |  43 +++
 gklearn/ged/model/ged_com.py     |  97 ++++++
 gklearn/ged/model/ged_model.py   | 724 +++++++++++++++++++++++++++++++++++++++
 gklearn/ged/model/optim_costs.py | 149 ++++++++
 5 files changed, 1014 insertions(+)
 create mode 100644 gklearn/ged/model/distances.py
 create mode 100644 gklearn/ged/model/ged_com.py
 create mode 100644 gklearn/ged/model/ged_model.py
 create mode 100644 gklearn/ged/model/optim_costs.py

diff --git a/gklearn/ged/__init__.py b/gklearn/ged/__init__.py
index e69de29..8696f76 100644
--- a/gklearn/ged/__init__.py
+++ b/gklearn/ged/__init__.py
@@ -0,0 +1 @@
+from gklearn.ged.model.ged_model import GEDModel
\ No newline at end of file
diff --git a/gklearn/ged/model/distances.py b/gklearn/ged/model/distances.py
new file mode 100644
index 0000000..3e27eb3
--- /dev/null
+++ b/gklearn/ged/model/distances.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+
+def sum_squares(a, b):
+    """
+    Return the sum of squares of the difference between a and b, aka MSE
+    """
+    return np.sum([(a[i] - b[i])**2 for i in range(len(a))])
+
+
+def euclid_d(x, y):
+    """
+    1D euclidean distance
+    """
+    return np.sqrt((x-y)**2)
+
+
+def man_d(x, y):
+    """
+    1D manhattan distance
+    """
+    return np.abs((x-y))
+
+
+def classif_d(x, y):
+    """ 
+    Function adapted to classification problems
+    """
+    return np.array(0 if x == y else 1)
+
+
+def rmse(pred, ground_truth):
+    import numpy as np
+    return np.sqrt(sum_squares(pred, ground_truth)/len(ground_truth))
+
+
+def accuracy(pred, ground_truth):
+    import numpy as np
+    return np.mean([a == b for a, b in zip(pred, ground_truth)])
+
+
+def rbf_k(D, sigma=1):
+    return np.exp(-(D**2)/sigma)
diff --git a/gklearn/ged/model/ged_com.py b/gklearn/ged/model/ged_com.py
new file mode 100644
index 0000000..9da5f87
--- /dev/null
+++ b/gklearn/ged/model/ged_com.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May  5 14:02:17 2022
+
+@author: ljia
+"""
+import sys
+from gklearn.ged.model.distances import euclid_d
+from gklearn.ged.util import  pairwise_ged, get_nb_edit_operations
+from gklearn.utils import get_iters
+
+
+def compute_ged(Gi, Gj, edit_cost, method='BIPARTITE', **kwargs):
+	"""
+	Compute GED between two graph according to edit_cost
+	"""
+	ged_options = {'edit_cost': 'CONSTANT',
+				'method': method,
+				'edit_cost_constants': edit_cost}
+	node_labels = kwargs.get('node_labels', [])
+	edge_labels = kwargs.get('edge_labels', [])
+	dis, pi_forward, pi_backward = pairwise_ged(Gi, Gj, ged_options, repeats=10)
+	n_eo_tmp = get_nb_edit_operations(Gi, Gj, pi_forward, pi_backward, edit_cost='CONSTANT', node_labels=node_labels, edge_labels=edge_labels)
+	return dis, n_eo_tmp
+
+
+def compute_ged_all_dataset(Gn, edit_cost, ed_method, **kwargs):
+	N = len(Gn)
+	G_pairs = []
+	for i in range(N):
+		for j in range(i, N):
+			G_pairs.append([i, j])
+	return compute_geds(G_pairs, Gn, edit_cost, ed_method, **kwargs)
+
+
+def compute_geds(G_pairs, Gn, edit_cost, ed_method, verbose=True, **kwargs):
+	"""
+	Compute GED between all indexes in G_pairs given edit_cost
+	:return: ged_vec : the list of computed distances, n_edit_operations : the list of edit operations
+	"""
+	ged_vec = []
+	n_edit_operations = []
+	for k in get_iters(range(len(G_pairs)), desc='Computing GED', file=sys.stdout, length=len(G_pairs), verbose=verbose):
+		[i, j] = G_pairs[k]
+		dis, n_eo_tmp = compute_ged(
+			Gn[i], Gn[j], edit_cost=edit_cost, method=ed_method, **kwargs)
+		ged_vec.append(dis)
+		n_edit_operations.append(n_eo_tmp)
+
+	return ged_vec, n_edit_operations
+
+
+def compute_D(G_app, edit_cost, G_test=None, ed_method='BIPARTITE', **kwargs):
+	import numpy as np
+	N = len(G_app)
+	D_app = np.zeros((N, N))
+
+	for i, G1 in get_iters(enumerate(G_app), desc='Computing D - app', file=sys.stdout, length=N):
+		for j, G2 in enumerate(G_app[i+1:], i+1):
+			D_app[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs)
+			D_app[j, i] = D_app[i, j]
+	if (G_test is None):
+		return D_app, edit_cost
+	else:
+		D_test = np.zeros((len(G_test), N))
+		for i, G1 in get_iters(enumerate(G_test), desc='Computing D - test', file=sys.stdout, length=len(G_test)):
+			for j, G2 in enumerate(G_app):
+				D_test[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs)
+		return D_app, D_test, edit_cost
+
+
+def compute_D_random(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	import numpy as np
+	edit_costs = np.random.rand(6)
+	return compute_D(G_app, edit_costs, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_expert(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	edit_cost = [3, 3, 1, 3, 3, 1]
+	return compute_D(G_app, edit_cost, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_fitted(G_app, y_app, G_test=None, y_distance=euclid_d,
+					 mode='reg', unlabeled=False, ed_method='BIPARTITE', **kwargs):
+	from gklearn.ged.models.optim_costs import compute_optimal_costs
+
+	costs_optim = compute_optimal_costs(
+		G_app, y_app, y_distance=y_distance,
+		mode=mode, unlabeled=unlabeled, ed_method=ed_method, **kwargs)
+	return compute_D(G_app, costs_optim, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_GH2020(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	from gklearn.ged.optim_costs import get_optimal_costs_GH2020
+	costs_optim = get_optimal_costs_GH2020(**kwargs)
+	return compute_D(G_app, costs_optim, G_test, ed_method=ed_method, **kwargs)
diff --git a/gklearn/ged/model/ged_model.py b/gklearn/ged/model/ged_model.py
new file mode 100644
index 0000000..9bdbc90
--- /dev/null
+++ b/gklearn/ged/model/ged_model.py
@@ -0,0 +1,724 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May  5 09:42:30 2022
+
+@author: ljia
+"""
+import sys
+import multiprocessing
+import time
+import numpy as np
+import networkx as nx
+
+# from abc import ABC, abstractmethod
+from sklearn.base import BaseEstimator # , TransformerMixin
+from sklearn.utils.validation import check_is_fitted # check_X_y, check_array,
+from sklearn.exceptions import NotFittedError
+
+from gklearn.ged.model.distances import euclid_d
+from gklearn.ged.util import pairwise_ged, get_nb_edit_operations
+# from gklearn.utils import normalize_gram_matrix
+from gklearn.utils import get_iters
+
+
+class GEDModel(BaseEstimator): #, ABC):
+	"""The graph edit distance model class compatible with `scikit-learn`.
+
+	Attributes
+    ----------
+    _graphs : list
+        Stores the input graphs on fit input data.
+        Default format of the list objects is `NetworkX` graphs.
+		**We don't guarantee that the input graphs remain unchanged during the
+		computation.**
+
+	References
+	----------
+	https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
+	"""
+
+	def __init__(self,
+			  ed_method='BIPARTITE',
+			  edit_cost_fun='CONSTANT',
+			  init_edit_cost_constants=[3, 3, 1, 3, 3, 1],
+			  optim_method='init',
+			  optim_options={'y_distance': euclid_d, 'mode': 'reg'},
+			  node_labels=[],
+			  edge_labels=[],
+			  parallel=None,
+			  n_jobs=None,
+			  chunksize=None,
+#			  normalize=True,
+			  copy_graphs=True, # make sure it is a full deep copy. and faster!
+			  verbose=2):
+		"""`__init__` for `GEDModel` object."""
+		# @todo: the default settings of the parameters are different from those in the self.compute method.
+#		self._graphs = None
+		self.ed_method = ed_method
+		self.edit_cost_fun = edit_cost_fun
+		self.init_edit_cost_constants = init_edit_cost_constants
+		self.optim_method=optim_method
+		self.optim_options=optim_options
+		self.node_labels=node_labels
+		self.edge_labels=edge_labels
+		self.parallel = parallel
+		self.n_jobs = n_jobs
+		self.chunksize = chunksize
+#		self.normalize = normalize
+		self.copy_graphs = copy_graphs
+		self.verbose = verbose
+#		self._run_time = 0
+#		self._gram_matrix = None
+#		self._gram_matrix_unnorm = None
+
+
+	##########################################################################
+	# The following is the 1st paradigm to compute GED distance matrix, which is
+	# compatible with `scikit-learn`.
+	##########################################################################
+
+
+	def fit(self, X, y=None):
+		"""Fit a graph dataset for a transformer.
+
+		Parameters
+		----------
+		X : iterable
+			DESCRIPTION.
+
+		y : None, optional
+			There is no need of a target in a transformer, yet the `scikit-learn`
+			pipeline API requires this parameter.
+
+		Returns
+		-------
+		object
+			Returns self.
+
+		"""
+#		self._is_tranformed = False
+
+		# Clear any prior attributes stored on the estimator, # @todo: unless warm_start is used;
+		self.clear_attributes()
+
+		# Validate parameters for the transformer.
+		self.validate_parameters()
+
+		# Validate the input.
+		self._graphs = self.validate_input(X)
+		if y is not None:
+ 			self._targets = y
+ 			# self._targets = self.validate_input(y)
+
+#		self._X = X
+#		self._kernel = self._get_kernel_instance()
+
+		# Return the transformer.
+		return self
+
+
+	def transform(self, X=None, return_dm_train=False):
+		"""Compute the graph kernel matrix between given and fitted data.
+
+		Parameters
+		----------
+		X : TYPE
+			DESCRIPTION.
+
+		Raises
+		------
+		ValueError
+			DESCRIPTION.
+
+		Returns
+		-------
+		None.
+
+		"""
+		# If `return_dm_train`, return the fitted GED distance matrix of training data.
+		if return_dm_train:
+			check_is_fitted(self, '_dm_train')
+			self._is_transformed = True
+			return self._dm_train # @todo: copy or not?
+
+		# Check if method "fit" had been called.
+		check_is_fitted(self, '_graphs')
+
+		# Validate the input.
+		Y = self.validate_input(X)
+
+		# Transform: compute the graph kernel matrix.
+		dis_matrix = self.compute_distance_matrix(Y)
+		self._Y = Y
+
+		# Self transform must appear before the diagonal call on normilization.
+		self._is_transformed = True
+# 		if self.normalize:
+# 			X_diag, Y_diag = self.diagonals()
+# 			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
+# 			try:
+# 				kernel_matrix /= np.sqrt(np.outer(Y_diag, X_diag))
+# 			except:
+# 				raise
+# 			finally:
+# 				np.seterr(**old_settings)
+
+		return dis_matrix
+
+
+	def fit_transform(self, X, y=None, save_dm_train=False):
+		"""Fit and transform: compute GED distance matrix on the same data.
+
+		Parameters
+		----------
+		X : list of graphs
+			Input graphs.
+
+		Returns
+		-------
+		dis_matrix : numpy array, shape = [len(X), len(X)]
+			The distance matrix of X.
+
+		"""
+		self.fit(X, y)
+
+		# Compute edit cost constants.
+		self.compute_edit_costs()
+
+		# Transform: compute Gram matrix.
+		dis_matrix = self.compute_distance_matrix()
+
+#		# Normalize.
+#		if self.normalize:
+#			self._X_diag = np.diagonal(gram_matrix).copy()
+#			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
+#			try:
+#				gram_matrix /= np.sqrt(np.outer(self._X_diag, self._X_diag))
+#			except:
+#				raise
+#			finally:
+#				np.seterr(**old_settings)
+
+		if save_dm_train:
+			self._dm_train = dis_matrix
+
+		return dis_matrix
+
+
+	def get_params(self):
+		pass
+
+
+	def set_params(self):
+		pass
+
+
+	def clear_attributes(self): # @todo: update
+#		if hasattr(self, '_X_diag'):
+#			delattr(self, '_X_diag')
+		if hasattr(self, '_graphs'):
+			delattr(self, '_graphs')
+		if hasattr(self, '_Y'):
+			delattr(self, '_Y')
+		if hasattr(self, '_run_time'):
+			delattr(self, '_run_time')
+
+
+	def validate_parameters(self):
+		"""Validate all parameters for the transformer.
+
+		Returns
+		-------
+		None.
+
+		"""
+		if self.parallel is not None and self.parallel != 'imap_unordered':
+			raise ValueError('Parallel mode is not set correctly.')
+
+		if self.parallel == 'imap_unordered' and self.n_jobs is None:
+			self.n_jobs = multiprocessing.cpu_count()
+
+
+	def validate_input(self, X):
+		"""Validate the given input and raise errors if it is invalid.
+
+		Parameters
+		----------
+		X : list
+			The input to check. Should be a list of graph.
+
+		Raises
+		------
+		ValueError
+			Raise if the input is not correct.
+
+		Returns
+		-------
+		X : list
+			The input. A list of graph.
+
+		"""
+		if X is None:
+			raise ValueError('Please add graphs before computing.')
+		elif not isinstance(X, list):
+			raise ValueError('Cannot detect graphs. The input must be a list.')
+		elif len(X) == 0:
+			raise ValueError('The graph list given is empty. No computation will be performed.')
+
+		return X
+
+
+	def compute_distance_matrix(self, Y=None):
+		"""Compute the distance matrix between a given target graphs (Y) and
+		the fitted graphs (X / self._graphs) or the distance matrix for the fitted
+		graphs (X / self._graphs).
+
+		Parameters
+		----------
+		Y : list of graphs, optional
+			The target graphs. The default is None. If None kernel is computed
+			between X and itself.
+
+		Returns
+		-------
+		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
+			The computed kernel matrix.
+
+		"""
+		if Y is None:
+			# Compute Gram matrix for self._graphs (X).
+			dis_matrix = self._compute_X_distance_matrix()
+#			self._gram_matrix_unnorm = np.copy(self._gram_matrix)
+
+		else:
+			# Compute kernel matrix between Y and self._graphs (X).
+			start_time = time.time()
+
+			if self.parallel == 'imap_unordered':
+				dis_matrix = self._compute_distance_matrix_imap_unordered(Y)
+
+			elif self.parallel is None:
+				Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
+				graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+				dis_matrix = self._compute_distance_matrix_series(Y_copy, graphs_copy)
+
+			self._run_time = time.time() - start_time
+			if self.verbose:
+				print('Distance matrix of size (%d, %d) built in %s seconds.'
+				  % (len(Y), len(self._graphs), self._run_time))
+
+		return dis_matrix
+
+
+	def _compute_distance_matrix_series(self, X, Y):
+		"""Compute the GED distance matrix between two sets of graphs (X and Y)
+		without parallelization.
+
+		Parameters
+		----------
+		X, Y : list of graphs
+			The input graphs.
+
+		Returns
+		-------
+		dis_matrix : numpy array, shape = [n_X, n_Y]
+			The computed distance matrix.
+
+		"""
+		dis_matrix = np.zeros((len(X), len(Y)))
+
+		for i_x, g_x in enumerate(X):
+			for i_y, g_y in enumerate(Y):
+				dis_matrix[i_x, i_y], _ = self.compute_ged(g_x, g_y)
+
+		return dis_matrix
+
+
+	def _compute_kernel_matrix_imap_unordered(self, Y):
+		"""Compute the kernel matrix between a given target graphs (Y) and
+		the fitted graphs (X / self._graphs) using imap unordered parallelization.
+
+		Parameters
+		----------
+		Y : list of graphs, optional
+			The target graphs.
+
+		Returns
+		-------
+		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
+			The computed kernel matrix.
+
+		"""
+		raise Exception('Parallelization for kernel matrix is not implemented.')
+
+
+	def diagonals(self):
+		"""Compute the kernel matrix diagonals of the fit/transformed data.
+
+		Returns
+		-------
+        X_diag : numpy array
+            The diagonal of the kernel matrix between the fitted data.
+            This consists of each element calculated with itself.
+
+        Y_diag : numpy array
+            The diagonal of the kernel matrix, of the transform.
+            This consists of each element calculated with itself.
+
+		"""
+		# Check if method "fit" had been called.
+		check_is_fitted(self, ['_graphs'])
+
+		# Check if the diagonals of X exist.
+		try:
+			check_is_fitted(self, ['_X_diag'])
+		except NotFittedError:
+			# Compute diagonals of X.
+			self._X_diag = np.empty(shape=(len(self._graphs),))
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			for i, x in enumerate(graphs):
+				self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?
+
+		try:
+            # If transform has happened, return both diagonals.
+			check_is_fitted(self, ['_Y'])
+			self._Y_diag = np.empty(shape=(len(self._Y),))
+			Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
+			for (i, y) in enumerate(Y):
+				self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?
+
+			return self._X_diag, self._Y_diag
+		except NotFittedError:
+            # Else just return both X_diag
+			return self._X_diag
+
+
+#	@abstractmethod
+	def pairwise_distance(self, x, y):
+		"""Compute pairwise kernel between two graphs.
+
+		Parameters
+		----------
+		x, y : NetworkX Graph.
+			Graphs bewteen which the kernel is computed.
+
+		Returns
+		-------
+		kernel: float
+			The computed kernel.
+
+#		Notes
+#		-----
+#		This method is abstract and must be implemented by a subclass.
+
+		"""
+		raise NotImplementedError('Pairwise kernel computation is not implemented!')
+
+
+
+	def compute_edit_costs(self, Y=None, Y_targets=None):
+		"""Compute edit cost constants. When optimizing method is `fiited`,
+		apply Jia2021's metric learning method by using a given target graphs (Y)
+		the fitted graphs (X / self._graphs).
+
+		Parameters
+		----------
+		Y : TYPE, optional
+			DESCRIPTION. The default is None.
+
+		Returns
+		-------
+		None.
+
+		"""
+		# Get or compute.
+		if self.optim_method == 'random':
+			self._edit_cost_constants = np.random.rand(6)
+
+		elif self.optim_method == 'init':
+			self._edit_cost_constants = self.init_edit_cost_constants
+
+
+		elif self.optim_method == 'expert':
+			self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
+
+
+		elif self.optim_method == 'fitted': # Jia2021 method
+			# Get proper inputs.
+			if Y is None:
+				check_is_fitted(self, ['_graphs'])
+				check_is_fitted(self, ['_targets'])
+				graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+				targets = self._targets
+			else:
+				graphs = ([g.copy() for g in Y] if self.copy_graphs else Y)
+				targets = Y_targets
+
+			# Get optimization options.
+			node_labels = self.node_labels
+			edge_labels = self.edge_labels
+			unlabeled = (len(node_labels) == 0 and len(edge_labels) == 0)
+			from gklearn.ged.model.optim_costs import compute_optimal_costs
+			self._edit_cost_constants = compute_optimal_costs(
+				graphs, targets,
+				node_labels=node_labels, edge_labels=edge_labels,
+				unlabeled=unlabeled, ed_method=self.ed_method,
+				verbose=(self.verbose >= 2),
+				**self.optim_options)
+
+
+	##########################################################################
+	# The following is the 2nd paradigm to compute kernel matrix. It is
+	# simplified and not compatible with `scikit-learn`.
+	##########################################################################
+
+
+#	def compute(self, *graphs, **kwargs):
+#		self.parallel = kwargs.get('parallel', 'imap_unordered')
+#		self.n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
+#		self.normalize = kwargs.get('normalize', True)
+#		self.verbose = kwargs.get('verbose', 2)
+#		self.copy_graphs = kwargs.get('copy_graphs', True)
+#		self.save_unnormed = kwargs.get('save_unnormed', True)
+#		self.validate_parameters()
+
+#		# If the inputs is a list of graphs.
+#		if len(graphs) == 1:
+#			if not isinstance(graphs[0], list):
+#				raise Exception('Cannot detect graphs.')
+#			elif len(graphs[0]) == 0:
+#				raise Exception('The graph list given is empty. No computation was performed.')
+#			else:
+#				if self.copy_graphs:
+#					self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow.
+#				else:
+#					self._graphs = graphs
+#				self._gram_matrix = self._compute_gram_matrix()
+
+#				if self.save_unnormed:
+#					self._gram_matrix_unnorm = np.copy(self._gram_matrix)
+#				if self.normalize:
+#					self._gram_matrix = normalize_gram_matrix(self._gram_matrix)
+#				return self._gram_matrix, self._run_time
+
+#		elif len(graphs) == 2:
+#			# If the inputs are two graphs.
+#			if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
+#				if self.copy_graphs:
+#					G0, G1 = graphs[0].copy(), graphs[1].copy()
+#				else:
+#					G0, G1 = graphs[0], graphs[1]
+#				kernel = self._compute_single_kernel(G0, G1)
+#				return kernel, self._run_time
+
+#			# If the inputs are a graph and a list of graphs.
+#			elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
+#				if self.copy_graphs:
+#					g1 = graphs[0].copy()
+#					g_list = [g.copy() for g in graphs[1]]
+#					kernel_list = self._compute_kernel_list(g1, g_list)
+#				else:
+#					kernel_list = self._compute_kernel_list(graphs[0], graphs[1])
+#				return kernel_list, self._run_time
+
+#			elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
+#				if self.copy_graphs:
+#					g1 = graphs[1].copy()
+#					g_list = [g.copy() for g in graphs[0]]
+#					kernel_list = self._compute_kernel_list(g1, g_list)
+#				else:
+#					kernel_list = self._compute_kernel_list(graphs[1], graphs[0])
+#				return kernel_list, self._run_time
+
+#			else:
+#				raise Exception('Cannot detect graphs.')
+
+#		elif len(graphs) == 0 and self._graphs is None:
+#			raise Exception('Please add graphs before computing.')
+
+#		else:
+#			raise Exception('Cannot detect graphs.')
+
+
+#	def normalize_gm(self, gram_matrix):
+#		import warnings
+#		warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning)
+
+#		diag = gram_matrix.diagonal().copy()
+#		for i in range(len(gram_matrix)):
+#			for j in range(i, len(gram_matrix)):
+#				gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
+#				gram_matrix[j][i] = gram_matrix[i][j]
+#		return gram_matrix
+
+
+#	def compute_distance_matrix(self):
+#		if self._gram_matrix is None:
+#			raise Exception('Please compute the Gram matrix before computing distance matrix.')
+#		dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix)))
+#		for i in range(len(self._gram_matrix)):
+#			for j in range(i, len(self._gram_matrix)):
+#				dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j]
+#				if dis < 0:
+#					if dis > -1e-10:
+#						dis = 0
+#					else:
+#						raise ValueError('The distance is negative.')
+#				dis_mat[i, j] = np.sqrt(dis)
+#				dis_mat[j, i] = dis_mat[i, j]
+#		dis_max = np.max(np.max(dis_mat))
+#		dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
+#		dis_mean = np.mean(np.mean(dis_mat))
+#		return dis_mat, dis_max, dis_min, dis_mean
+
+
+	def _compute_X_distance_matrix(self):
+		start_time = time.time()
+
+		if self.parallel == 'imap_unordered':
+			dis_matrix = self._compute_X_dm_imap_unordered()
+		elif self.parallel is None:
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			dis_matrix = self._compute_X_dm_series(graphs)
+		else:
+			raise Exception('Parallel mode is not set correctly.')
+
+		self._run_time = time.time() - start_time
+		if self.verbose:
+			print('Distance matrix of size %d built in %s seconds.'
+			  % (len(self._graphs), self._run_time))
+
+		return dis_matrix
+
+
+	def _compute_X_dm_series(self, graphs):
+		N = len(graphs)
+		dis_matrix = np.zeros((N, N))
+
+		for i, G1 in get_iters(enumerate(graphs), desc='Computing distance matrix', file=sys.stdout, verbose=(self.verbose >= 2)):
+			for j, G2 in enumerate(graphs[i+1:], i+1):
+				dis_matrix[i, j], _ = self.compute_ged(G1, G2)
+				dis_matrix[j, i] = dis_matrix[i, j]
+		return dis_matrix
+
+
+	def _compute_X_dm_imap_unordered(self, graphs):
+		pass
+
+
+	def compute_ged(self, Gi, Gj, **kwargs):
+		"""
+		Compute GED between two graph according to edit_cost.
+		"""
+		ged_options = {'edit_cost': self.edit_cost_fun,
+				 'method': self.ed_method,
+				 'edit_cost_constants': self._edit_cost_constants}
+		dis, pi_forward, pi_backward = pairwise_ged(Gi, Gj, ged_options, repeats=10)
+		n_eo_tmp = get_nb_edit_operations(Gi, Gj, pi_forward, pi_backward,
+									 edit_cost=self.edit_cost_fun,
+									 node_labels=self.node_labels,
+									 edge_labels=self.edge_labels)
+		return dis, n_eo_tmp
+
+
+# 	def _compute_kernel_list(self, g1, g_list):
+# 		start_time = time.time()
+
+# 		if self.parallel == 'imap_unordered':
+# 			kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
+# 		elif self.parallel is None:
+# 			kernel_list = self._compute_kernel_list_series(g1, g_list)
+# 		else:
+# 			raise Exception('Parallel mode is not set correctly.')
+
+# 		self._run_time = time.time() - start_time
+# 		if self.verbose:
+# 			print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.'
+# 			  % (len(g_list), self._run_time))
+
+# 		return kernel_list
+
+
+# 	def _compute_kernel_list_series(self, g1, g_list):
+# 		pass
+
+
+# 	def _compute_kernel_list_imap_unordered(self, g1, g_list):
+# 		pass
+
+
+# 	def _compute_single_kernel(self, g1, g2):
+# 		start_time = time.time()
+
+# 		kernel = self._compute_single_kernel_series(g1, g2)
+
+# 		self._run_time = time.time() - start_time
+# 		if self.verbose:
+# 			print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time))
+
+# 		return kernel
+
+
+# 	def _compute_single_kernel_series(self, g1, g2):
+# 		pass
+
+
+	def is_graph(self, graph):
+		if isinstance(graph, nx.Graph):
+			return True
+		if isinstance(graph, nx.DiGraph):
+			return True
+		if isinstance(graph, nx.MultiGraph):
+			return True
+		if isinstance(graph, nx.MultiDiGraph):
+			return True
+		return False
+
+
+	@property
+	def graphs(self):
+		return self._graphs
+
+
+#	@property
+#	def parallel(self):
+#		return self.parallel
+
+
+#	@property
+#	def n_jobs(self):
+#		return self.n_jobs
+
+
+#	@property
+#	def verbose(self):
+#		return self.verbose
+
+
+#	@property
+#	def normalize(self):
+#		return self.normalize
+
+
+	@property
+	def run_time(self):
+		return self._run_time
+
+
+	@property
+	def dis_matrix(self):
+		return self._dis_matrix
+
+	@dis_matrix.setter
+	def dis_matrix(self, value):
+		self._dis_matrix = value
+
+
+# 	@property
+# 	def gram_matrix_unnorm(self):
+# 		return self._gram_matrix_unnorm
+
+# 	@gram_matrix_unnorm.setter
+# 	def gram_matrix_unnorm(self, value):
+# 		self._gram_matrix_unnorm = value
\ No newline at end of file
diff --git a/gklearn/ged/model/optim_costs.py b/gklearn/ged/model/optim_costs.py
new file mode 100644
index 0000000..1e23732
--- /dev/null
+++ b/gklearn/ged/model/optim_costs.py
@@ -0,0 +1,149 @@
+import numpy as np
+
+from gklearn.ged.model.distances import sum_squares, euclid_d
+from gklearn.ged.model.ged_com import compute_geds
+
+
+def optimize_costs_unlabeled(nb_cost_mat, dis_k_vec):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+	:param dis_k_vec: The N distances to fit
+	"""
+	import cvxpy as cp
+	import numpy as np
+	MAX_SAMPLE = 1000
+	nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat])
+	dis_k_vec = np.array(dis_k_vec)
+	# dis_k_vec_norm = dis_k_vec/np.max(dis_k_vec)
+
+	# import pickle
+	# pickle.dump([nb_cost_mat, dis_k_vec], open('debug', 'wb'))
+	N = nb_cost_mat_m.shape[0]
+	sub_sample = np.random.permutation(np.arange(N))
+	sub_sample = sub_sample[:MAX_SAMPLE]
+
+	x = cp.Variable(nb_cost_mat_m.shape[1])
+	cost = cp.sum_squares((nb_cost_mat_m[sub_sample, :] @ x) - dis_k_vec[sub_sample])
+	prob = cp.Problem(cp.Minimize(cost), [x >= 0])
+	prob.solve()
+	edit_costs_new = [x.value[0], x.value[1], 0, x.value[2], x.value[3], 0]
+	edit_costs_new = [xi if xi > 0 else 0 for xi in edit_costs_new]
+	residual = prob.value
+	return edit_costs_new, residual
+
+
+def optimize_costs_classif_unlabeled(nb_cost_mat, Y):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in
+	nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit
+	operations for each pair of graph
+	:param dis_k_vec: {-1,1}^N vector of common classes
+	"""
+	# import cvxpy as cp
+	from ml import reg_log
+	# import pickle
+	# pickle.dump([nb_cost_mat, Y], open('debug', 'wb'))
+	nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]]
+							  for x in nb_cost_mat])
+	w, J, _ = reg_log(nb_cost_mat_m, Y, pos_contraint=True)
+	edit_costs_new = [w[0], w[1], 0, w[2], w[3], 0]
+	residual = J[-1]
+
+	return edit_costs_new, residual
+
+
+def optimize_costs_classif(nb_cost_mat, Y):
+	"""
+		Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+		! take care that nb_cost_mat do not contains 0 lines
+		:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+		:param dis_k_vec: {-1,1}^N vector of common classes
+	"""
+	#import pickle
+	# pickle.dump([nb_cost_mat, Y], open("test.pickle", "wb"))
+	from ml import reg_log
+	w, J, _ = reg_log(nb_cost_mat, Y, pos_contraint=True)
+	return w, J[-1]
+
+
+def optimize_costs(nb_cost_mat, dis_k_vec):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+	:param dis_k_vec: The N distances to fit
+	"""
+	import cvxpy as cp
+	x = cp.Variable(nb_cost_mat.shape[1])
+	cost = cp.sum_squares((nb_cost_mat @ x) - dis_k_vec)
+	constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
+				   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
+				   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
+	prob = cp.Problem(cp.Minimize(cost), constraints)
+	prob.solve()
+	edit_costs_new = x.value
+	residual = prob.value
+
+	return edit_costs_new, residual
+
+
+def compute_optimal_costs(G, y, init_costs=[3, 3, 1, 3, 3, 1],
+						  y_distance=euclid_d,
+						  mode='reg', unlabeled=False,
+						  ed_method='BIPARTITE',
+						  verbose=True,
+						  **kwargs):
+	N = len(y)
+
+	G_pairs = []
+	distances_vec = []
+
+	for i in range(N):
+		for j in range(i+1, N):
+			G_pairs.append([i, j])
+			distances_vec.append(y_distance(y[i], y[j]))
+	ged_vec_init, n_edit_operations = compute_geds(G_pairs, G, init_costs, ed_method,
+												verbose=verbose, **kwargs)
+
+	residual_list = [sum_squares(ged_vec_init, distances_vec)]
+
+	if (mode == 'reg'):
+		if unlabeled:
+			method_optim = optimize_costs_unlabeled
+		else:
+			method_optim = optimize_costs
+
+	elif (mode == 'classif'):
+		if unlabeled:
+			method_optim = optimize_costs_classif_unlabeled
+		else:
+			method_optim = optimize_costs_classif
+
+	ite_max = 5
+	for i in range(ite_max):
+		if verbose:
+			print('ite', i + 1, '/', ite_max, ':')
+		# compute GEDs and numbers of edit operations.
+		edit_costs_new, residual = method_optim(
+			np.array(n_edit_operations), distances_vec)
+		ged_vec, n_edit_operations = compute_geds(G_pairs, G, edit_costs_new, ed_method,
+											verbose=verbose, **kwargs)
+		residual_list.append(sum_squares(ged_vec, distances_vec))
+
+	return edit_costs_new
+
+
+def get_optimal_costs_GH2020(**kwargs):
+	import pickle
+	import os
+	dir_root = 'cj/output/'
+	ds_name = kwargs.get('ds_name')
+	nb_trial = kwargs.get('nb_trial')
+	file_name = os.path.join(dir_root, 'costs.' + ds_name + '.' + str(nb_trial) + '.pkl')
+	with open(file_name, 'rb') as f:
+		edit_costs = pickle.load(f)
+	return edit_costs

From a76335ed16c0d635a636c477dbe4d179981b4452 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 6 May 2022 14:15:03 +0200
Subject: [PATCH 08/11] [Features][API Changes] Update kernel classes.

---
 gklearn/kernels/graph_kernel.py      |  59 ++++++++-----
 gklearn/kernels/treelet.py           |  66 +++++++++------
 gklearn/kernels/weisfeiler_lehman.py | 155 ++++++++++++-----------------------
 gklearn/utils/kernels.py             |   5 ++
 4 files changed, 135 insertions(+), 150 deletions(-)

diff --git a/gklearn/kernels/graph_kernel.py b/gklearn/kernels/graph_kernel.py
index 1db38b3..c7a5718 100644
--- a/gklearn/kernels/graph_kernel.py
+++ b/gklearn/kernels/graph_kernel.py
@@ -32,7 +32,13 @@ class GraphKernel(BaseEstimator): #, ABC):
 	https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
 	"""
 
-	def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2):
+	def __init__(self,
+			  parallel=None,
+			  n_jobs=None,
+			  chunksize=None,
+			  normalize=True,
+			  copy_graphs=True, # make sure it is a full deep copy. and faster!
+			  verbose=2):
 		"""`__init__` for `GraphKernel` object."""
 		# @todo: the default settings of the parameters are different from those in the self.compute method.
 # 		self._graphs = None
@@ -40,6 +46,7 @@ class GraphKernel(BaseEstimator): #, ABC):
 		self.n_jobs = n_jobs
 		self.chunksize = chunksize
 		self.normalize = normalize
+		self.copy_graphs = copy_graphs
 		self.verbose = verbose
 # 		self._run_time = 0
 # 		self._gram_matrix = None
@@ -90,7 +97,7 @@ class GraphKernel(BaseEstimator): #, ABC):
 		return self
 
 
-	def transform(self, X):
+	def transform(self, X=None, load_gm_train=False):
 		"""Compute the graph kernel matrix between given and fitted data.
 
 		Parameters
@@ -108,6 +115,12 @@ class GraphKernel(BaseEstimator): #, ABC):
 		None.
 
 		"""
+		# If `load_gm_train`, load Gram matrix of training data.
+		if load_gm_train:
+			check_is_fitted(self, '_gm_train')
+			self._is_transformed = True
+			return self._gm_train # @todo: copy or not?
+
 		# Check if method "fit" had been called.
 		check_is_fitted(self, '_graphs')
 
@@ -133,8 +146,7 @@ class GraphKernel(BaseEstimator): #, ABC):
 		return kernel_matrix
 
 
-
-	def fit_transform(self, X):
+	def fit_transform(self, X, save_gm_train=False):
 		"""Fit and transform: compute Gram matrix on the same data.
 
 		Parameters
@@ -164,6 +176,9 @@ class GraphKernel(BaseEstimator): #, ABC):
 			finally:
 				np.seterr(**old_settings)
 
+		if save_gm_train:
+			self._gm_train = gram_matrix
+
 		return gram_matrix
 
 
@@ -260,7 +275,9 @@ class GraphKernel(BaseEstimator): #, ABC):
 				kernel_matrix = self._compute_kernel_matrix_imap_unordered(Y)
 
 			elif self.parallel is None:
-				kernel_matrix = self._compute_kernel_matrix_series(Y)
+				Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
+				graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+				kernel_matrix = self._compute_kernel_matrix_series(Y_copy, graphs_copy)
 
 			self._run_time = time.time() - start_time
 			if self.verbose:
@@ -270,26 +287,25 @@ class GraphKernel(BaseEstimator): #, ABC):
 		return kernel_matrix
 
 
-	def _compute_kernel_matrix_series(self, Y):
-		"""Compute the kernel matrix between a given target graphs (Y) and
-		the fitted graphs (X / self._graphs) without parallelization.
+	def _compute_kernel_matrix_series(self, X, Y):
+		"""Compute the kernel matrix between two sets of graphs (X and Y) without parallelization.
 
 		Parameters
 		----------
-		Y : list of graphs, optional
-			The target graphs.
+		X, Y : list of graphs
+			The input graphs.
 
 		Returns
 		-------
-		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
+		kernel_matrix : numpy array, shape = [n_X, n_Y]
 			The computed kernel matrix.
 
 		"""
-		kernel_matrix = np.zeros((len(Y), len(self._graphs)))
+		kernel_matrix = np.zeros((len(X), len(Y)))
 
-		for i_y, g_y in enumerate(Y):
-			for i_x, g_x in enumerate(self._graphs):
-				kernel_matrix[i_y, i_x] = self.pairwise_kernel(g_y, g_x)
+		for i_x, g_x in enumerate(X):
+			for i_y, g_y in enumerate(Y):
+				kernel_matrix[i_x, i_y] = self.pairwise_kernel(g_x, g_y)
 
 		return kernel_matrix
 
@@ -335,14 +351,16 @@ class GraphKernel(BaseEstimator): #, ABC):
 		except NotFittedError:
 			# Compute diagonals of X.
 			self._X_diag = np.empty(shape=(len(self._graphs),))
-			for i, x in enumerate(self._graphs):
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			for i, x in enumerate(graphs):
 				self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?
 
 		try:
             # If transform has happened, return both diagonals.
 			check_is_fitted(self, ['_Y'])
 			self._Y_diag = np.empty(shape=(len(self._Y),))
-			for (i, y) in enumerate(self._Y):
+			Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
+			for (i, y) in enumerate(Y):
 				self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?
 
 			return self._X_diag, self._Y_diag
@@ -484,7 +502,8 @@ class GraphKernel(BaseEstimator): #, ABC):
 		if self.parallel == 'imap_unordered':
 			gram_matrix = self._compute_gm_imap_unordered()
 		elif self.parallel is None:
-			gram_matrix = self._compute_gm_series()
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			gram_matrix = self._compute_gm_series(graphs)
 		else:
 			raise Exception('Parallel mode is not set correctly.')
 
@@ -496,11 +515,11 @@ class GraphKernel(BaseEstimator): #, ABC):
 		return gram_matrix
 
 
-	def _compute_gm_series(self):
+	def _compute_gm_series(self, graphs):
 		pass
 
 
-	def _compute_gm_imap_unordered(self):
+	def _compute_gm_imap_unordered(self, graphs):
 		pass
 
 
diff --git a/gklearn/kernels/treelet.py b/gklearn/kernels/treelet.py
index e42142b..c981bdf 100644
--- a/gklearn/kernels/treelet.py
+++ b/gklearn/kernels/treelet.py
@@ -28,16 +28,16 @@ from gklearn.kernels import GraphKernel
 
 class Treelet(GraphKernel):
 
-	def __init__(self, parallel=None, n_jobs=None, chunksize=None, normalize=True, verbose=2, precompute_canonkeys=True, save_canonkeys=False, **kwargs):
+	def __init__(self, **kwargs):
 		"""Initialise a treelet kernel.
 		"""
-		super().__init__(parallel=parallel, n_jobs=n_jobs, chunksize=chunksize, normalize=normalize, verbose=verbose)
+		GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
 		self.node_labels = kwargs.get('node_labels', [])
 		self.edge_labels = kwargs.get('edge_labels', [])
 		self.sub_kernel = kwargs.get('sub_kernel', None)
 		self.ds_infos = kwargs.get('ds_infos', {})
-		self.precompute_canonkeys = precompute_canonkeys
-		self.save_canonkeys = save_canonkeys
+		self.precompute_canonkeys = kwargs.get('precompute_canonkeys', True)
+		self.save_canonkeys = kwargs.get('save_canonkeys', True)
 
 
 	##########################################################################
@@ -71,7 +71,7 @@ class Treelet(GraphKernel):
 			raise ValueError('Sub-kernel not set.')
 
 
-	def _compute_kernel_matrix_series(self, Y):
+	def _compute_kernel_matrix_series(self, Y, X=None, load_canonkeys=True):
 		"""Compute the kernel matrix between a given target graphs (Y) and
 		the fitted graphs (X / self._graphs) without parallelization.
 
@@ -86,36 +86,45 @@ class Treelet(GraphKernel):
 			The computed kernel matrix.
 
 		"""
+		if_comp_X_canonkeys = True
+
+		# if load saved canonkeys of X from the instance:
+		if load_canonkeys:
+			# Canonical keys for self._graphs.
+			try:
+				check_is_fitted(self, ['_canonkeys'])
+				canonkeys_list1 = self._canonkeys
+				if_comp_X_canonkeys = False
+			except NotFittedError:
+				import warnings
+				warnings.warn('The canonkeys of self._graphs are not computed/saved. The keys of `X` is computed instead.')
+				if_comp_X_canonkeys = True
 
-		# self._add_dummy_labels will modify the input in place.
-		self._add_dummy_labels() # For self._graphs
-# 		Y = [g.copy() for g in Y] # @todo: ?
-		self._add_dummy_labels(Y)
 
 		# get all canonical keys of all graphs before computing kernels to save
 		# time, but this may cost a lot of memory for large dataset.
 
-		# Canonical keys for self._graphs.
-		try:
-			check_is_fitted(self, ['_canonkeys'])
-			canonkeys_list1 = self._canonkeys
-		except NotFittedError:
+		# Compute the canonical keys of X.
+		if if_comp_X_canonkeys:
+			if X is None:
+				raise('X can not be None.')
+			# self._add_dummy_labels will modify the input in place.
+			self._add_dummy_labels(X) # for X
 			canonkeys_list1 = []
-			iterator = get_iters(self._graphs, desc='getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
+			iterator = get_iters(self._graphs, desc='Getting canonkeys for X', file=sys.stdout, verbose=(self.verbose >= 2))
 			for g in iterator:
 				canonkeys_list1.append(self._get_canonkeys(g))
 
-			if self.save_canonkeys:
-				self._canonkeys = canonkeys_list1
-
 		# Canonical keys for Y.
+# 		Y = [g.copy() for g in Y] # @todo: ?
+		self._add_dummy_labels(Y)
 		canonkeys_list2 = []
-		iterator = get_iters(Y, desc='getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
+		iterator = get_iters(Y, desc='Getting canonkeys for Y', file=sys.stdout, verbose=(self.verbose >= 2))
 		for g in iterator:
 			canonkeys_list2.append(self._get_canonkeys(g))
 
-		if self.save_canonkeys:
-			self._Y_canonkeys = canonkeys_list2
+# 		if self.save_canonkeys:
+# 			self._Y_canonkeys = canonkeys_list2
 
 		# compute kernel matrix.
 		kernel_matrix = np.zeros((len(Y), len(canonkeys_list1)))
@@ -235,13 +244,13 @@ class Treelet(GraphKernel):
 	##########################################################################
 
 
-	def _compute_gm_series(self):
-		self._add_dummy_labels(self._graphs)
+	def _compute_gm_series(self, graphs):
+		self._add_dummy_labels(graphs)
 
 		# get all canonical keys of all graphs before computing kernels to save
 		# time, but this may cost a lot of memory for large dataset.
 		canonkeys = []
-		iterator = get_iters(self._graphs, desc='getting canonkeys', file=sys.stdout,
+		iterator = get_iters(graphs, desc='getting canonkeys', file=sys.stdout,
 					   verbose=(self.verbose >= 2))
 		for g in iterator:
 			canonkeys.append(self._get_canonkeys(g))
@@ -250,11 +259,11 @@ class Treelet(GraphKernel):
 			self._canonkeys = canonkeys
 
 		# compute Gram matrix.
-		gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))
+		gram_matrix = np.zeros((len(graphs), len(graphs)))
 
 		from itertools import combinations_with_replacement
-		itr = combinations_with_replacement(range(0, len(self._graphs)), 2)
-		len_itr = int(len(self._graphs) * (len(self._graphs) + 1) / 2)
+		itr = combinations_with_replacement(range(0, len(graphs)), 2)
+		len_itr = int(len(graphs) * (len(graphs) + 1) / 2)
 		iterator = get_iters(itr, desc='Computing kernels', file=sys.stdout,
 					length=len_itr, verbose=(self.verbose >= 2))
 		for i, j in iterator:
@@ -390,6 +399,9 @@ class Treelet(GraphKernel):
 			Treelet kernel between 2 graphs.
 		"""
 		keys = set(canonkey1.keys()) & set(canonkey2.keys()) # find same canonical keys in both graphs
+		if len(keys) == 0: # There is nothing in common...
+			return 0
+
 		vector1 = np.array([(canonkey1[key] if (key in canonkey1.keys()) else 0) for key in keys])
 		vector2 = np.array([(canonkey2[key] if (key in canonkey2.keys()) else 0) for key in keys])
 
diff --git a/gklearn/kernels/weisfeiler_lehman.py b/gklearn/kernels/weisfeiler_lehman.py
index f02926e..905b31f 100644
--- a/gklearn/kernels/weisfeiler_lehman.py
+++ b/gklearn/kernels/weisfeiler_lehman.py
@@ -28,7 +28,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 
 	def __init__(self, **kwargs):
-		GraphKernel.__init__(self)
+		GraphKernel.__init__(self, **{k: kwargs.get(k) for k in ['parallel', 'n_jobs', 'chunksize', 'normalize', 'copy_graphs', 'verbose'] if k in kwargs})
 		self.node_labels = kwargs.get('node_labels', [])
 		self.edge_labels = kwargs.get('edge_labels', [])
 		self.height = int(kwargs.get('height', 0))
@@ -50,7 +50,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 	##########################################################################
 
 
-	def _compute_gm_series(self):
+	def _compute_gm_series(self, graphs):
 #		if self.verbose >= 2:
 #			import warnings
 #			warnings.warn('A part of the computation is parallelized.')
@@ -59,19 +59,19 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 		# for WL subtree kernel
 		if self._base_kernel == 'subtree':
-			gram_matrix = self._subtree_kernel_do(self._graphs)
+			gram_matrix = self._subtree_kernel_do(graphs)
 
 		# for WL shortest path kernel
 		elif self._base_kernel == 'sp':
-			gram_matrix = self._sp_kernel_do(self._graphs)
+			gram_matrix = self._sp_kernel_do(graphs)
 
 		# for WL edge kernel
 		elif self._base_kernel == 'edge':
-			gram_matrix = self._edge_kernel_do(self._graphs)
+			gram_matrix = self._edge_kernel_do(graphs)
 
 		# for user defined base kernel
 		else:
-			gram_matrix = self._user_kernel_do(self._graphs)
+			gram_matrix = self._user_kernel_do(graphs)
 
 		return gram_matrix
 
@@ -204,70 +204,13 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 
 	def pairwise_kernel(self, g1, g2):
-		Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
-		kernel = 0
-
-		# initial for height = 0
-		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
-
-		# for each graph
-		for G in Gn:
-			# set all labels into a tuple.
-			for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
-				G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
-			# get the set of original labels
-			labels_ori = list(nx.get_node_attributes(G, 'lt').values())
-			# number of occurence of each label in G
-			all_num_of_each_label.append(dict(Counter(labels_ori)))
-
-		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
-
-		# iterate each height
-		for h in range(1, self.height + 1):
-			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
-			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
-	#		all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
-			all_num_of_each_label = [] # number of occurence of each label in G
-
-			# @todo: parallel this part.
-			for G in Gn:
-
-				all_multisets = []
-				for node, attrs in G.nodes(data=True):
-					# Multiset-label determination.
-					multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]]
-					# sorting each multiset
-					multiset.sort()
-					multiset = [attrs['lt']] + multiset # add the prefix
-					all_multisets.append(tuple(multiset))
-
-				# label compression
-				set_unique = list(set(all_multisets)) # set of unique multiset labels
-				# a dictionary mapping original labels to new ones.
-				set_compressed = {}
-				# if a label occured before, assign its former compressed label,
-				# else assign the number of labels occured + 1 as the compressed label.
-				for value in set_unique:
-					if value in all_set_compressed.keys():
-						set_compressed[value] = all_set_compressed[value]
-					else:
-						set_compressed[value] = str(num_of_labels_occured + 1)
-						num_of_labels_occured += 1
-
-				all_set_compressed.update(set_compressed)
-
-				# relabel nodes
-				for idx, node in enumerate(G.nodes()):
-					G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]
-
-				# get the set of compressed labels
-				labels_comp = list(nx.get_node_attributes(G, 'lt').values())
-	#			all_labels_ori.update(labels_comp)
-				all_num_of_each_label.append(dict(Counter(labels_comp)))
+# 		Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
+		Gn = [g1, g2]
+		# for WL subtree kernel
+		if self._base_kernel == 'subtree':
+			kernel = self._subtree_kernel_do(Gn, return_mat=False)
 
-			# Compute subtree kernel with h iterations and add it to the final kernel
-			kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)
+		# @todo: other subkernels.
 
 		return kernel
 
@@ -291,7 +234,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 		return kernel
 
 
-	def _subtree_kernel_do_nl(self, Gn):
+	def _subtree_kernel_do_nl(self, Gn, return_mat=True):
 		"""Compute Weisfeiler-Lehman kernels between graphs with node labels.
 
 		Parameters
@@ -301,10 +244,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 		Return
 		------
-		gram_matrix : Numpy matrix
+		kernel_matrix : Numpy matrix / float
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
 		"""
-		gram_matrix = np.zeros((len(Gn), len(Gn)))
+		kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
+		gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)
 
 		# initial for height = 0
 		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -324,7 +268,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 			all_num_of_each_label.append(dict(Counter(labels_ori)))
 
 		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+		kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
 		# iterate each height
 		for h in range(1, self.height + 1):
@@ -342,12 +286,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
-		return gram_matrix
+		return kernel_matrix
 
 
-	def _subtree_kernel_do_el(self, Gn):
+	def _subtree_kernel_do_el(self, Gn, return_mat=True):
 		"""Compute Weisfeiler-Lehman kernels between graphs with edge labels.
 
 		Parameters
@@ -357,19 +301,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 		Return
 		------
-		gram_matrix : Numpy matrix
+		kernel_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
 		"""
-		gram_matrix = np.zeros((len(Gn), len(Gn)))
+		kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
+		gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)
 
 		# initial for height = 0
 		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
 
 		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
-		for i, j in iterator:
-			gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
-			gram_matrix[j][i] = gram_matrix[i][j]
+		iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
+		for i, j in iterator: # @todo: not correct if return_mat == False.
+			kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
+			kernel_matrix[j][i] = kernel_matrix[i][j]
 
 
 		# if h >= 1.
@@ -393,7 +338,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
 
 		# Iterate along heights (>= 2).
@@ -407,12 +352,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
-		return gram_matrix
+		return kernel_matrix
 
 
-	def _subtree_kernel_do_labeled(self, Gn):
+	def _subtree_kernel_do_labeled(self, Gn, return_mat=True):
 		"""Compute Weisfeiler-Lehman kernels between graphs with both node and
 		edge labels.
 
@@ -423,10 +368,11 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 		Return
 		------
-		gram_matrix : Numpy matrix
+		kernel_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
 		"""
-		gram_matrix = np.zeros((len(Gn), len(Gn)))
+		kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
+		gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)
 
 		# initial for height = 0
 		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
@@ -446,10 +392,10 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 			all_num_of_each_label.append(dict(Counter(labels_ori)))
 
 		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+		kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
 
-		# if h >= 1.
+		# if h >= 1:
 		if self.height > 0:
 			# Set all edge labels into a tuple. # @todo: remove this original labels or not?
 			if self.verbose >= 2:
@@ -470,7 +416,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
 
 		# Iterate along heights.
@@ -484,12 +430,12 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
-		return gram_matrix
+		return kernel_matrix
 
 
-	def _subtree_kernel_do_unlabeled(self, Gn):
+	def _subtree_kernel_do_unlabeled(self, Gn, return_mat=True):
 		"""Compute Weisfeiler-Lehman kernels between graphs without labels.
 
 		Parameters
@@ -499,19 +445,20 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 
 		Return
 		------
-		gram_matrix : Numpy matrix
+		kernel_matrix : Numpy matrix
 			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
 		"""
-		gram_matrix = np.zeros((len(Gn), len(Gn)))
+		kernel_matrix = (np.zeros((len(Gn), len(Gn))) if return_mat else 0)
+		gram_itr_fun = (self._compute_gram_itr if return_mat else self._compute_kernel_itr)
 
 		# initial for height = 0
 		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration
 
 		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
-		iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
-		for i, j in iterator:
-			gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
-			gram_matrix[j][i] = gram_matrix[i][j]
+		iterator = combinations_with_replacement(range(0, len(kernel_matrix)), 2)
+		for i, j in iterator: # @todo: not correct if return_mat == False.
+			kernel_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
+			kernel_matrix[j][i] = kernel_matrix[i][j]
 
 
 		# if h >= 1.
@@ -526,7 +473,7 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
 
 		# Iterate along heights (>= 2).
@@ -540,9 +487,9 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)
 
 			# Compute subtree kernel with h iterations and add it to the final kernel.
-			self._compute_gram_itr(gram_matrix, all_num_of_each_label)
+			kernel_matrix = gram_itr_fun(kernel_matrix, all_num_of_each_label)
 
-		return gram_matrix
+		return kernel_matrix
 
 
 	def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
@@ -717,6 +664,8 @@ class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.
 				   all_num_of_each_label[j])
 			gram_matrix[j][i] = gram_matrix[i][j]
 
+		return gram_matrix
+
 
 	def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2):
 		"""Compute the subtree kernel.
diff --git a/gklearn/utils/kernels.py b/gklearn/utils/kernels.py
index 182668b..1e1ea52 100644
--- a/gklearn/utils/kernels.py
+++ b/gklearn/utils/kernels.py
@@ -68,6 +68,11 @@ def gaussian_kernel(x, y, gamma=None):
 	return np.exp((np.sum(np.subtract(x, y) ** 2)) * -gamma)
 
 
+def tanimoto_kernel(x, y):
+	xy = np.dot(x, y)
+	return xy / (np.dot(x, x) + np.dot(y, y) - xy)
+
+
 def gaussiankernel(x, y, gamma=None):
 	return gaussian_kernel(x, y, gamma=gamma)
 

From 5eb90655a4986b301ccb2a4988ec4d3ddf651bf3 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 24 Jun 2022 17:45:16 +0200
Subject: [PATCH 09/11] [Features] Add model seletion methods with validation
 set: , , . Required version of scikit-learn is upgraded to 1.1.0, to support
 the  argument of  used in .

---
 gklearn/model_selection/__init__.py |  25 ++++
 gklearn/model_selection/_split.py   | 285 ++++++++++++++++++++++++++++++++++++
 requirements.txt                    |   2 +-
 requirements_pypi.txt               |   4 +-
 4 files changed, 313 insertions(+), 3 deletions(-)
 create mode 100644 gklearn/model_selection/__init__.py
 create mode 100644 gklearn/model_selection/_split.py

diff --git a/gklearn/model_selection/__init__.py b/gklearn/model_selection/__init__.py
new file mode 100644
index 0000000..661478b
--- /dev/null
+++ b/gklearn/model_selection/__init__.py
@@ -0,0 +1,25 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 24 14:25:57 2022
+
+@author: ljia
+"""
+
+from ._split import BaseCrossValidatorWithValid
+# from ._split import BaseShuffleSplit
+from ._split import KFoldWithValid
+# from ._split import GroupKFold
+# from ._split import StratifiedKFoldWithValid
+# from ._split import TimeSeriesSplit
+# from ._split import LeaveOneGroupOut
+# from ._split import LeaveOneOut
+# from ._split import LeavePGroupsOut
+# from ._split import LeavePOut
+from ._split import RepeatedKFoldWithValid
+# from ._split import RepeatedStratifiedKFold
+# from ._split import ShuffleSplit
+# from ._split import GroupShuffleSplit
+# from ._split import StratifiedShuffleSplit
+# from ._split import StratifiedGroupKFold
+# from ._split import PredefinedSplit
\ No newline at end of file
diff --git a/gklearn/model_selection/_split.py b/gklearn/model_selection/_split.py
new file mode 100644
index 0000000..a982fec
--- /dev/null
+++ b/gklearn/model_selection/_split.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 24 11:13:26 2022
+
+@author: ljia
+"""
+from abc import abstractmethod
+import numbers
+import warnings
+import numpy as np
+from sklearn.utils import check_random_state, check_array, column_or_1d, indexable
+from sklearn.utils.validation import _num_samples
+from sklearn.utils.multiclass import type_of_target
+
+
+class BaseCrossValidatorWithValid(object):
+	"""Base class for all cross-validators.
+	Implementations must define `_iter_valid_test_masks` or `_iter_valid_stest_indices`.
+	"""
+
+	def split(self, X, y=None, groups=None):
+		"""Generate indices to split data into training, valid, and test set.
+
+		Parameters
+		----------
+
+		X : array-like of shape (n_samples, n_features)
+			Training data, where `n_samples` is the number of samples
+			and `n_features` is the number of features.
+
+		y : array-like of shape (n_samples,)
+			The target variable for supervised learning problems.
+
+		groups : array-like of shape (n_samples,), default=None
+			Group labels for the samples used while splitting the dataset into
+			train/test set.
+
+		Yields
+		------
+		train : ndarray
+			The training set indices for that split.
+
+		valid : ndarray
+			The valid set indices for that split.
+
+		test : ndarray
+			The testing set indices for that split.
+		"""
+		X, y, groups = indexable(X, y, groups)
+		indices = np.arange(_num_samples(X))
+		for valid_index, test_index in self._iter_valid_test_masks(X, y, groups):
+			train_index = indices[np.logical_not(np.logical_or(valid_index, test_index))]
+			valid_index = indices[valid_index]
+			test_index = indices[test_index]
+			yield train_index, valid_index, test_index
+
+
+	# Since subclasses must implement either _iter_valid_test_masks or
+	# _iter_valid_test_indices, neither can be abstract.
+	def _iter_valid_test_masks(self, X=None, y=None, groups=None):
+		"""Generates boolean masks corresponding to valid and test sets.
+		By default, delegates to _iter_valid_test_indices(X, y, groups)
+		"""
+		for valid_index, test_index in self._iter_valid_test_indices(X, y, groups):
+			valid_mask = np.zeros(_num_samples(X), dtype=bool)
+			test_mask = np.zeros(_num_samples(X), dtype=bool)
+			valid_mask[valid_index] = True
+			test_mask[test_index] = True
+			yield valid_mask, test_mask
+
+
+	def _iter_valid_test_indices(self, X=None, y=None, groups=None):
+		"""Generates integer indices corresponding to valid and test sets."""
+		raise NotImplementedError
+
+
+	@abstractmethod
+	def get_n_splits(self, X=None, y=None, groups=None):
+		"""Returns the number of splitting iterations in the cross-validator"""
+
+
+	def __repr__(self):
+		return _build_repr(self)
+
+
+class _BaseKFoldWithValid(BaseCrossValidatorWithValid):
+	"""Base class for KFold, GroupKFold, and StratifiedKFold"""
+
+	@abstractmethod
+	def __init__(self, n_splits, *, stratify, shuffle, random_state):
+		if not isinstance(n_splits, numbers.Integral):
+			raise ValueError(
+				'The number of folds must be of Integral type. '
+				'%s of type %s was passed.' % (n_splits, type(n_splits))
+			)
+		n_splits = int(n_splits)
+
+		if n_splits <= 2:
+			raise ValueError(
+				'k-fold cross-validation requires at least one'
+				' train/valid/test split by setting n_splits=3 or more,'
+				' got n_splits={0}.'.format(n_splits)
+			)
+
+		if not isinstance(shuffle, bool):
+			raise TypeError('shuffle must be True or False; got {0}'.format(shuffle))
+
+		if not shuffle and random_state is not None:  # None is the default
+			raise ValueError(
+				'Setting a random_state has no effect since shuffle is '
+				'False. You should leave '
+				'random_state to its default (None), or set shuffle=True.',
+			)
+
+		self.n_splits = n_splits
+		self.stratify = stratify
+		self.shuffle = shuffle
+		self.random_state = random_state
+
+
+	def split(self, X, y=None, groups=None):
+		"""Generate indices to split data into training, valid and test set."""
+		X, y, groups = indexable(X, y, groups)
+		n_samples = _num_samples(X)
+		if self.n_splits > n_samples:
+			raise ValueError(
+				(
+				 'Cannot have number of splits n_splits={0} greater'
+				 ' than the number of samples: n_samples={1}.'
+				 ).format(self.n_splits, n_samples)
+			)
+
+		for train, valid, test in super().split(X, y, groups):
+			yield train, valid, test
+
+
+class KFoldWithValid(_BaseKFoldWithValid):
+
+
+	def __init__(
+			self,
+			n_splits=5,
+			*,
+			stratify=False,
+			shuffle=False,
+			random_state=None
+			):
+		super().__init__(
+			n_splits=n_splits,
+			stratify=stratify,
+			shuffle=shuffle,
+			random_state=random_state
+			)
+
+
+	def _make_valid_test_folds(self, X, y=None):
+		rng = check_random_state(self.random_state)
+		y = np.asarray(y)
+		type_of_target_y = type_of_target(y)
+		allowed_target_types = ('binary', 'multiclass')
+		if type_of_target_y not in allowed_target_types:
+			raise ValueError(
+				'Supported target types are: {}. Got {!r} instead.'.format(
+					allowed_target_types, type_of_target_y
+				)
+			)
+
+		y = column_or_1d(y)
+
+		_, y_idx, y_inv = np.unique(y, return_index=True, return_inverse=True)
+		# y_inv encodes y according to lexicographic order. We invert y_idx to
+		# map the classes so that they are encoded by order of appearance:
+		# 0 represents the first label appearing in y, 1 the second, etc.
+		_, class_perm = np.unique(y_idx, return_inverse=True)
+		y_encoded = class_perm[y_inv]
+
+		n_classes = len(y_idx)
+		y_counts = np.bincount(y_encoded)
+		min_groups = np.min(y_counts)
+		if np.all(self.n_splits > y_counts):
+			raise ValueError(
+				"n_splits=%d cannot be greater than the"
+				" number of members in each class." % (self.n_splits)
+			)
+		if self.n_splits > min_groups:
+			warnings.warn(
+				"The least populated class in y has only %d"
+				" members, which is less than n_splits=%d."
+				% (min_groups, self.n_splits),
+				UserWarning,
+			)
+
+		# Determine the optimal number of samples from each class in each fold,
+		# using round robin over the sorted y. (This can be done direct from
+		# counts, but that code is unreadable.)
+		y_order = np.sort(y_encoded)
+		allocation = np.asarray(
+			[
+				np.bincount(y_order[i :: self.n_splits], minlength=n_classes)
+				for i in range(self.n_splits)
+			]
+		)
+
+		# To maintain the data order dependencies as best as possible within
+		# the stratification constraint, we assign samples from each class in
+		# blocks (and then mess that up when shuffle=True).
+		test_folds = np.empty(len(y), dtype='i')
+		for k in range(n_classes):
+			# since the kth column of allocation stores the number of samples
+			# of class k in each test set, this generates blocks of fold
+			# indices corresponding to the allocation for class k.
+			folds_for_class = np.arange(self.n_splits).repeat(allocation[:, k])
+			if self.shuffle:
+				rng.shuffle(folds_for_class)
+			test_folds[y_encoded == k] = folds_for_class
+		return test_folds
+
+
+	def _iter_valid_test_masks(self, X, y=None, groups=None):
+		test_folds = self._make_valid_test_folds(X, y)
+		for i in range(self.n_splits):
+			if i + 1 < self.n_splits:
+				j = i + 1
+			else:
+				j = 0
+			yield test_folds == i, test_folds == j
+
+
+	def split(self, X, y, groups=None):
+		y = check_array(y, input_name='y', ensure_2d=False, dtype=None)
+		return super().split(X, y, groups)
+
+
+class _RepeatedSplitsWithValid(object):
+
+
+	def __init__(
+			self,
+			cv,
+			*,
+			n_repeats=10,
+			random_state=None,
+			**cvargs
+			):
+		if not isinstance(n_repeats, int):
+			raise ValueError('Number of repetitions must be of integer type.')
+
+		if n_repeats <= 0:
+			raise ValueError('Number of repetitions must be greater than 0.')
+
+		self.cv = cv
+		self.n_repeats = n_repeats
+		self.random_state = random_state
+		self.cvargs = cvargs
+
+
+	def split(self, X, y=None, groups=None):
+		n_repeats = self.n_repeats
+		rng = check_random_state(self.random_state)
+
+		for idx in range(n_repeats):
+			cv = self.cv(random_state=rng, shuffle=True, **self.cvargs)
+			for train_index, valid_index, test_index in cv.split(X, y, groups):
+				yield train_index, valid_index, test_index
+
+
+class RepeatedKFoldWithValid(_RepeatedSplitsWithValid):
+
+
+	def __init__(
+			self,
+			*,
+			n_splits=5,
+			n_repeats=10,
+			stratify=False,
+			random_state=None
+			):
+		super().__init__(
+			KFoldWithValid,
+			n_repeats=n_repeats,
+			stratify=stratify,
+			random_state=random_state,
+			n_splits=n_splits,
+			)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 4b25bb3..da822f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ numpy>=1.16.2
 scipy>=1.1.0
 matplotlib>=3.1.0
 networkx>=2.2
-scikit-learn>=0.20.0
+scikit-learn>=1.1.0
 tabulate>=0.8.2
 tqdm>=4.26.0
 control>=0.8.2 # for generalized random walk kernels only.
diff --git a/requirements_pypi.txt b/requirements_pypi.txt
index 3c68618..d1718a0 100644
--- a/requirements_pypi.txt
+++ b/requirements_pypi.txt
@@ -1,8 +1,8 @@
 numpy>=1.16.2
 scipy>=1.1.0
-matplotlib>=3.0.0
+matplotlib>=3.1.0
 networkx>=2.2
-scikit-learn>=0.20.0
+scikit-learn>=1.1.0
 tabulate>=0.8.2
 tqdm>=4.26.0
 control>=0.8.2 # for generalized random walk kernels only.

From b36eaae177f84168fad15ffd105f5cb70d4c5c1d Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 24 Jun 2022 17:46:28 +0200
Subject: [PATCH 10/11] [Features] Add model seletion methods with validation
 set: KFoldWithValid, RepeatedKFoldWithValid, BaseCrossValidatorWithValid.
 Required version of scikit-learn is upgraded to 1.1.0, to support the
 input_name argument of sklearn.utils.check_array used in
 gklearn.model_selection._split.py.

---
 gklearn/model_selection/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gklearn/model_selection/__init__.py b/gklearn/model_selection/__init__.py
index 661478b..23f5f67 100644
--- a/gklearn/model_selection/__init__.py
+++ b/gklearn/model_selection/__init__.py
@@ -5,7 +5,6 @@ Created on Fri Jun 24 14:25:57 2022
 
 @author: ljia
 """
-
 from ._split import BaseCrossValidatorWithValid
 # from ._split import BaseShuffleSplit
 from ._split import KFoldWithValid

From ed78e65dd8492c17edbb72f04c93bd0917cd5c9d Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 24 Jun 2022 17:48:16 +0200
Subject: [PATCH 11/11] [Features] Add model seletion methods with validation
 set: KFoldWithValid, RepeatedKFoldWithValid, BaseCrossValidatorWithValid.
 Required version of scikit-learn is upgraded to 1.1.0, to support the
 input_name argument of sklearn.utils.check_array used in
 gklearn.model_selection._split.py.

---
 gklearn/model_selection/_split.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gklearn/model_selection/_split.py b/gklearn/model_selection/_split.py
index a982fec..10d5cc8 100644
--- a/gklearn/model_selection/_split.py
+++ b/gklearn/model_selection/_split.py
@@ -4,6 +4,8 @@
 Created on Fri Jun 24 11:13:26 2022
 
 @author: ljia
+
+Reference: scikit-learn.
 """
 from abc import abstractmethod
 import numbers
@@ -85,7 +87,7 @@ class BaseCrossValidatorWithValid(object):
 
 
 class _BaseKFoldWithValid(BaseCrossValidatorWithValid):
-	"""Base class for KFold, GroupKFold, and StratifiedKFold"""
+	"""Base class for KFoldWithValid, GroupKFoldWithValid, and StratifiedKFoldWithValid"""
 
 	@abstractmethod
 	def __init__(self, n_splits, *, stratify, shuffle, random_state):