From 5e37d4447f96953ffd7004d0e695dd30d53f9242 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 6 May 2022 14:12:31 +0200
Subject: [PATCH] [Major Features] Add GEDModel which is compatibale with .

---
 gklearn/ged/__init__.py          |   1 +
 gklearn/ged/model/distances.py   |  43 +++
 gklearn/ged/model/ged_com.py     |  97 ++++++
 gklearn/ged/model/ged_model.py   | 724 +++++++++++++++++++++++++++++++++++++++
 gklearn/ged/model/optim_costs.py | 149 ++++++++
 5 files changed, 1014 insertions(+)
 create mode 100644 gklearn/ged/model/distances.py
 create mode 100644 gklearn/ged/model/ged_com.py
 create mode 100644 gklearn/ged/model/ged_model.py
 create mode 100644 gklearn/ged/model/optim_costs.py

diff --git a/gklearn/ged/__init__.py b/gklearn/ged/__init__.py
index e69de29..8696f76 100644
--- a/gklearn/ged/__init__.py
+++ b/gklearn/ged/__init__.py
@@ -0,0 +1 @@
+from gklearn.ged.model.ged_model import GEDModel
\ No newline at end of file
diff --git a/gklearn/ged/model/distances.py b/gklearn/ged/model/distances.py
new file mode 100644
index 0000000..3e27eb3
--- /dev/null
+++ b/gklearn/ged/model/distances.py
@@ -0,0 +1,43 @@
+import numpy as np
+
+
+def sum_squares(a, b):
+    """
+    Return the sum of squares of the difference between a and b, aka MSE
+    """
+    return np.sum([(a[i] - b[i])**2 for i in range(len(a))])
+
+
+def euclid_d(x, y):
+    """
+    1D euclidean distance
+    """
+    return np.sqrt((x-y)**2)
+
+
+def man_d(x, y):
+    """
+    1D manhattan distance
+    """
+    return np.abs((x-y))
+
+
+def classif_d(x, y):
+    """ 
+    Function adapted to classification problems
+    """
+    return np.array(0 if x == y else 1)
+
+
+def rmse(pred, ground_truth):
+    import numpy as np
+    return np.sqrt(sum_squares(pred, ground_truth)/len(ground_truth))
+
+
+def accuracy(pred, ground_truth):
+    import numpy as np
+    return np.mean([a == b for a, b in zip(pred, ground_truth)])
+
+
+def rbf_k(D, sigma=1):
+    return np.exp(-(D**2)/sigma)
diff --git a/gklearn/ged/model/ged_com.py b/gklearn/ged/model/ged_com.py
new file mode 100644
index 0000000..9da5f87
--- /dev/null
+++ b/gklearn/ged/model/ged_com.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May  5 14:02:17 2022
+
+@author: ljia
+"""
+import sys
+from gklearn.ged.model.distances import euclid_d
+from gklearn.ged.util import  pairwise_ged, get_nb_edit_operations
+from gklearn.utils import get_iters
+
+
+def compute_ged(Gi, Gj, edit_cost, method='BIPARTITE', **kwargs):
+	"""
+	Compute GED between two graph according to edit_cost
+	"""
+	ged_options = {'edit_cost': 'CONSTANT',
+				'method': method,
+				'edit_cost_constants': edit_cost}
+	node_labels = kwargs.get('node_labels', [])
+	edge_labels = kwargs.get('edge_labels', [])
+	dis, pi_forward, pi_backward = pairwise_ged(Gi, Gj, ged_options, repeats=10)
+	n_eo_tmp = get_nb_edit_operations(Gi, Gj, pi_forward, pi_backward, edit_cost='CONSTANT', node_labels=node_labels, edge_labels=edge_labels)
+	return dis, n_eo_tmp
+
+
+def compute_ged_all_dataset(Gn, edit_cost, ed_method, **kwargs):
+	N = len(Gn)
+	G_pairs = []
+	for i in range(N):
+		for j in range(i, N):
+			G_pairs.append([i, j])
+	return compute_geds(G_pairs, Gn, edit_cost, ed_method, **kwargs)
+
+
+def compute_geds(G_pairs, Gn, edit_cost, ed_method, verbose=True, **kwargs):
+	"""
+	Compute GED between all indexes in G_pairs given edit_cost
+	:return: ged_vec : the list of computed distances, n_edit_operations : the list of edit operations
+	"""
+	ged_vec = []
+	n_edit_operations = []
+	for k in get_iters(range(len(G_pairs)), desc='Computing GED', file=sys.stdout, length=len(G_pairs), verbose=verbose):
+		[i, j] = G_pairs[k]
+		dis, n_eo_tmp = compute_ged(
+			Gn[i], Gn[j], edit_cost=edit_cost, method=ed_method, **kwargs)
+		ged_vec.append(dis)
+		n_edit_operations.append(n_eo_tmp)
+
+	return ged_vec, n_edit_operations
+
+
+def compute_D(G_app, edit_cost, G_test=None, ed_method='BIPARTITE', **kwargs):
+	import numpy as np
+	N = len(G_app)
+	D_app = np.zeros((N, N))
+
+	for i, G1 in get_iters(enumerate(G_app), desc='Computing D - app', file=sys.stdout, length=N):
+		for j, G2 in enumerate(G_app[i+1:], i+1):
+			D_app[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs)
+			D_app[j, i] = D_app[i, j]
+	if (G_test is None):
+		return D_app, edit_cost
+	else:
+		D_test = np.zeros((len(G_test), N))
+		for i, G1 in get_iters(enumerate(G_test), desc='Computing D - test', file=sys.stdout, length=len(G_test)):
+			for j, G2 in enumerate(G_app):
+				D_test[i, j], _ = compute_ged(G1, G2, edit_cost, method=ed_method, **kwargs)
+		return D_app, D_test, edit_cost
+
+
+def compute_D_random(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	import numpy as np
+	edit_costs = np.random.rand(6)
+	return compute_D(G_app, edit_costs, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_expert(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	edit_cost = [3, 3, 1, 3, 3, 1]
+	return compute_D(G_app, edit_cost, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_fitted(G_app, y_app, G_test=None, y_distance=euclid_d,
+					 mode='reg', unlabeled=False, ed_method='BIPARTITE', **kwargs):
+	from gklearn.ged.models.optim_costs import compute_optimal_costs
+
+	costs_optim = compute_optimal_costs(
+		G_app, y_app, y_distance=y_distance,
+		mode=mode, unlabeled=unlabeled, ed_method=ed_method, **kwargs)
+	return compute_D(G_app, costs_optim, G_test, ed_method=ed_method, **kwargs)
+
+
+def compute_D_GH2020(G_app, G_test=None, ed_method='BIPARTITE', **kwargs):
+	from gklearn.ged.optim_costs import get_optimal_costs_GH2020
+	costs_optim = get_optimal_costs_GH2020(**kwargs)
+	return compute_D(G_app, costs_optim, G_test, ed_method=ed_method, **kwargs)
diff --git a/gklearn/ged/model/ged_model.py b/gklearn/ged/model/ged_model.py
new file mode 100644
index 0000000..9bdbc90
--- /dev/null
+++ b/gklearn/ged/model/ged_model.py
@@ -0,0 +1,724 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu May  5 09:42:30 2022
+
+@author: ljia
+"""
+import sys
+import multiprocessing
+import time
+import numpy as np
+import networkx as nx
+
+# from abc import ABC, abstractmethod
+from sklearn.base import BaseEstimator # , TransformerMixin
+from sklearn.utils.validation import check_is_fitted # check_X_y, check_array,
+from sklearn.exceptions import NotFittedError
+
+from gklearn.ged.model.distances import euclid_d
+from gklearn.ged.util import pairwise_ged, get_nb_edit_operations
+# from gklearn.utils import normalize_gram_matrix
+from gklearn.utils import get_iters
+
+
+class GEDModel(BaseEstimator): #, ABC):
+	"""The graph edit distance model class compatible with `scikit-learn`.
+
+	Attributes
+    ----------
+    _graphs : list
+        Stores the input graphs on fit input data.
+        Default format of the list objects is `NetworkX` graphs.
+		**We don't guarantee that the input graphs remain unchanged during the
+		computation.**
+
+	References
+	----------
+	https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
+	"""
+
+	def __init__(self,
+			  ed_method='BIPARTITE',
+			  edit_cost_fun='CONSTANT',
+			  init_edit_cost_constants=[3, 3, 1, 3, 3, 1],
+			  optim_method='init',
+			  optim_options={'y_distance': euclid_d, 'mode': 'reg'},
+			  node_labels=[],
+			  edge_labels=[],
+			  parallel=None,
+			  n_jobs=None,
+			  chunksize=None,
+#			  normalize=True,
+			  copy_graphs=True, # make sure it is a full deep copy. and faster!
+			  verbose=2):
+		"""`__init__` for `GEDModel` object."""
+		# @todo: the default settings of the parameters are different from those in the self.compute method.
+#		self._graphs = None
+		self.ed_method = ed_method
+		self.edit_cost_fun = edit_cost_fun
+		self.init_edit_cost_constants = init_edit_cost_constants
+		self.optim_method=optim_method
+		self.optim_options=optim_options
+		self.node_labels=node_labels
+		self.edge_labels=edge_labels
+		self.parallel = parallel
+		self.n_jobs = n_jobs
+		self.chunksize = chunksize
+#		self.normalize = normalize
+		self.copy_graphs = copy_graphs
+		self.verbose = verbose
+#		self._run_time = 0
+#		self._gram_matrix = None
+#		self._gram_matrix_unnorm = None
+
+
+	##########################################################################
+	# The following is the 1st paradigm to compute GED distance matrix, which is
+	# compatible with `scikit-learn`.
+	##########################################################################
+
+
+	def fit(self, X, y=None):
+		"""Fit a graph dataset for a transformer.
+
+		Parameters
+		----------
+		X : iterable
+			DESCRIPTION.
+
+		y : None, optional
+			There is no need of a target in a transformer, yet the `scikit-learn`
+			pipeline API requires this parameter.
+
+		Returns
+		-------
+		object
+			Returns self.
+
+		"""
+#		self._is_tranformed = False
+
+		# Clear any prior attributes stored on the estimator, # @todo: unless warm_start is used;
+		self.clear_attributes()
+
+		# Validate parameters for the transformer.
+		self.validate_parameters()
+
+		# Validate the input.
+		self._graphs = self.validate_input(X)
+		if y is not None:
+ 			self._targets = y
+ 			# self._targets = self.validate_input(y)
+
+#		self._X = X
+#		self._kernel = self._get_kernel_instance()
+
+		# Return the transformer.
+		return self
+
+
+	def transform(self, X=None, return_dm_train=False):
+		"""Compute the graph kernel matrix between given and fitted data.
+
+		Parameters
+		----------
+		X : TYPE
+			DESCRIPTION.
+
+		Raises
+		------
+		ValueError
+			DESCRIPTION.
+
+		Returns
+		-------
+		None.
+
+		"""
+		# If `return_dm_train`, return the fitted GED distance matrix of training data.
+		if return_dm_train:
+			check_is_fitted(self, '_dm_train')
+			self._is_transformed = True
+			return self._dm_train # @todo: copy or not?
+
+		# Check if method "fit" had been called.
+		check_is_fitted(self, '_graphs')
+
+		# Validate the input.
+		Y = self.validate_input(X)
+
+		# Transform: compute the graph kernel matrix.
+		dis_matrix = self.compute_distance_matrix(Y)
+		self._Y = Y
+
+		# Self transform must appear before the diagonal call on normilization.
+		self._is_transformed = True
+# 		if self.normalize:
+# 			X_diag, Y_diag = self.diagonals()
+# 			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
+# 			try:
+# 				kernel_matrix /= np.sqrt(np.outer(Y_diag, X_diag))
+# 			except:
+# 				raise
+# 			finally:
+# 				np.seterr(**old_settings)
+
+		return dis_matrix
+
+
+	def fit_transform(self, X, y=None, save_dm_train=False):
+		"""Fit and transform: compute GED distance matrix on the same data.
+
+		Parameters
+		----------
+		X : list of graphs
+			Input graphs.
+
+		Returns
+		-------
+		dis_matrix : numpy array, shape = [len(X), len(X)]
+			The distance matrix of X.
+
+		"""
+		self.fit(X, y)
+
+		# Compute edit cost constants.
+		self.compute_edit_costs()
+
+		# Transform: compute Gram matrix.
+		dis_matrix = self.compute_distance_matrix()
+
+#		# Normalize.
+#		if self.normalize:
+#			self._X_diag = np.diagonal(gram_matrix).copy()
+#			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
+#			try:
+#				gram_matrix /= np.sqrt(np.outer(self._X_diag, self._X_diag))
+#			except:
+#				raise
+#			finally:
+#				np.seterr(**old_settings)
+
+		if save_dm_train:
+			self._dm_train = dis_matrix
+
+		return dis_matrix
+
+
+	def get_params(self):
+		pass
+
+
+	def set_params(self):
+		pass
+
+
+	def clear_attributes(self): # @todo: update
+#		if hasattr(self, '_X_diag'):
+#			delattr(self, '_X_diag')
+		if hasattr(self, '_graphs'):
+			delattr(self, '_graphs')
+		if hasattr(self, '_Y'):
+			delattr(self, '_Y')
+		if hasattr(self, '_run_time'):
+			delattr(self, '_run_time')
+
+
+	def validate_parameters(self):
+		"""Validate all parameters for the transformer.
+
+		Returns
+		-------
+		None.
+
+		"""
+		if self.parallel is not None and self.parallel != 'imap_unordered':
+			raise ValueError('Parallel mode is not set correctly.')
+
+		if self.parallel == 'imap_unordered' and self.n_jobs is None:
+			self.n_jobs = multiprocessing.cpu_count()
+
+
+	def validate_input(self, X):
+		"""Validate the given input and raise errors if it is invalid.
+
+		Parameters
+		----------
+		X : list
+			The input to check. Should be a list of graph.
+
+		Raises
+		------
+		ValueError
+			Raise if the input is not correct.
+
+		Returns
+		-------
+		X : list
+			The input. A list of graph.
+
+		"""
+		if X is None:
+			raise ValueError('Please add graphs before computing.')
+		elif not isinstance(X, list):
+			raise ValueError('Cannot detect graphs. The input must be a list.')
+		elif len(X) == 0:
+			raise ValueError('The graph list given is empty. No computation will be performed.')
+
+		return X
+
+
+	def compute_distance_matrix(self, Y=None):
+		"""Compute the distance matrix between a given target graphs (Y) and
+		the fitted graphs (X / self._graphs) or the distance matrix for the fitted
+		graphs (X / self._graphs).
+
+		Parameters
+		----------
+		Y : list of graphs, optional
+			The target graphs. The default is None. If None kernel is computed
+			between X and itself.
+
+		Returns
+		-------
+		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
+			The computed kernel matrix.
+
+		"""
+		if Y is None:
+			# Compute Gram matrix for self._graphs (X).
+			dis_matrix = self._compute_X_distance_matrix()
+#			self._gram_matrix_unnorm = np.copy(self._gram_matrix)
+
+		else:
+			# Compute kernel matrix between Y and self._graphs (X).
+			start_time = time.time()
+
+			if self.parallel == 'imap_unordered':
+				dis_matrix = self._compute_distance_matrix_imap_unordered(Y)
+
+			elif self.parallel is None:
+				Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
+				graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+				dis_matrix = self._compute_distance_matrix_series(Y_copy, graphs_copy)
+
+			self._run_time = time.time() - start_time
+			if self.verbose:
+				print('Distance matrix of size (%d, %d) built in %s seconds.'
+				  % (len(Y), len(self._graphs), self._run_time))
+
+		return dis_matrix
+
+
+	def _compute_distance_matrix_series(self, X, Y):
+		"""Compute the GED distance matrix between two sets of graphs (X and Y)
+		without parallelization.
+
+		Parameters
+		----------
+		X, Y : list of graphs
+			The input graphs.
+
+		Returns
+		-------
+		dis_matrix : numpy array, shape = [n_X, n_Y]
+			The computed distance matrix.
+
+		"""
+		dis_matrix = np.zeros((len(X), len(Y)))
+
+		for i_x, g_x in enumerate(X):
+			for i_y, g_y in enumerate(Y):
+				dis_matrix[i_x, i_y], _ = self.compute_ged(g_x, g_y)
+
+		return dis_matrix
+
+
+	def _compute_kernel_matrix_imap_unordered(self, Y):
+		"""Compute the kernel matrix between a given target graphs (Y) and
+		the fitted graphs (X / self._graphs) using imap unordered parallelization.
+
+		Parameters
+		----------
+		Y : list of graphs, optional
+			The target graphs.
+
+		Returns
+		-------
+		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
+			The computed kernel matrix.
+
+		"""
+		raise Exception('Parallelization for kernel matrix is not implemented.')
+
+
+	def diagonals(self):
+		"""Compute the kernel matrix diagonals of the fit/transformed data.
+
+		Returns
+		-------
+        X_diag : numpy array
+            The diagonal of the kernel matrix between the fitted data.
+            This consists of each element calculated with itself.
+
+        Y_diag : numpy array
+            The diagonal of the kernel matrix, of the transform.
+            This consists of each element calculated with itself.
+
+		"""
+		# Check if method "fit" had been called.
+		check_is_fitted(self, ['_graphs'])
+
+		# Check if the diagonals of X exist.
+		try:
+			check_is_fitted(self, ['_X_diag'])
+		except NotFittedError:
+			# Compute diagonals of X.
+			self._X_diag = np.empty(shape=(len(self._graphs),))
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			for i, x in enumerate(graphs):
+				self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?
+
+		try:
+            # If transform has happened, return both diagonals.
+			check_is_fitted(self, ['_Y'])
+			self._Y_diag = np.empty(shape=(len(self._Y),))
+			Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
+			for (i, y) in enumerate(Y):
+				self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?
+
+			return self._X_diag, self._Y_diag
+		except NotFittedError:
+            # Else just return both X_diag
+			return self._X_diag
+
+
+#	@abstractmethod
+	def pairwise_distance(self, x, y):
+		"""Compute pairwise kernel between two graphs.
+
+		Parameters
+		----------
+		x, y : NetworkX Graph.
+			Graphs bewteen which the kernel is computed.
+
+		Returns
+		-------
+		kernel: float
+			The computed kernel.
+
+#		Notes
+#		-----
+#		This method is abstract and must be implemented by a subclass.
+
+		"""
+		raise NotImplementedError('Pairwise kernel computation is not implemented!')
+
+
+
+	def compute_edit_costs(self, Y=None, Y_targets=None):
+		"""Compute edit cost constants. When optimizing method is `fiited`,
+		apply Jia2021's metric learning method by using a given target graphs (Y)
+		the fitted graphs (X / self._graphs).
+
+		Parameters
+		----------
+		Y : TYPE, optional
+			DESCRIPTION. The default is None.
+
+		Returns
+		-------
+		None.
+
+		"""
+		# Get or compute.
+		if self.optim_method == 'random':
+			self._edit_cost_constants = np.random.rand(6)
+
+		elif self.optim_method == 'init':
+			self._edit_cost_constants = self.init_edit_cost_constants
+
+
+		elif self.optim_method == 'expert':
+			self._edit_cost_constants = [3, 3, 1, 3, 3, 1]
+
+
+		elif self.optim_method == 'fitted': # Jia2021 method
+			# Get proper inputs.
+			if Y is None:
+				check_is_fitted(self, ['_graphs'])
+				check_is_fitted(self, ['_targets'])
+				graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+				targets = self._targets
+			else:
+				graphs = ([g.copy() for g in Y] if self.copy_graphs else Y)
+				targets = Y_targets
+
+			# Get optimization options.
+			node_labels = self.node_labels
+			edge_labels = self.edge_labels
+			unlabeled = (len(node_labels) == 0 and len(edge_labels) == 0)
+			from gklearn.ged.model.optim_costs import compute_optimal_costs
+			self._edit_cost_constants = compute_optimal_costs(
+				graphs, targets,
+				node_labels=node_labels, edge_labels=edge_labels,
+				unlabeled=unlabeled, ed_method=self.ed_method,
+				verbose=(self.verbose >= 2),
+				**self.optim_options)
+
+
+	##########################################################################
+	# The following is the 2nd paradigm to compute kernel matrix. It is
+	# simplified and not compatible with `scikit-learn`.
+	##########################################################################
+
+
+#	def compute(self, *graphs, **kwargs):
+#		self.parallel = kwargs.get('parallel', 'imap_unordered')
+#		self.n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
+#		self.normalize = kwargs.get('normalize', True)
+#		self.verbose = kwargs.get('verbose', 2)
+#		self.copy_graphs = kwargs.get('copy_graphs', True)
+#		self.save_unnormed = kwargs.get('save_unnormed', True)
+#		self.validate_parameters()
+
+#		# If the inputs is a list of graphs.
+#		if len(graphs) == 1:
+#			if not isinstance(graphs[0], list):
+#				raise Exception('Cannot detect graphs.')
+#			elif len(graphs[0]) == 0:
+#				raise Exception('The graph list given is empty. No computation was performed.')
+#			else:
+#				if self.copy_graphs:
+#					self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow.
+#				else:
+#					self._graphs = graphs
+#				self._gram_matrix = self._compute_gram_matrix()
+
+#				if self.save_unnormed:
+#					self._gram_matrix_unnorm = np.copy(self._gram_matrix)
+#				if self.normalize:
+#					self._gram_matrix = normalize_gram_matrix(self._gram_matrix)
+#				return self._gram_matrix, self._run_time
+
+#		elif len(graphs) == 2:
+#			# If the inputs are two graphs.
+#			if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
+#				if self.copy_graphs:
+#					G0, G1 = graphs[0].copy(), graphs[1].copy()
+#				else:
+#					G0, G1 = graphs[0], graphs[1]
+#				kernel = self._compute_single_kernel(G0, G1)
+#				return kernel, self._run_time
+
+#			# If the inputs are a graph and a list of graphs.
+#			elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
+#				if self.copy_graphs:
+#					g1 = graphs[0].copy()
+#					g_list = [g.copy() for g in graphs[1]]
+#					kernel_list = self._compute_kernel_list(g1, g_list)
+#				else:
+#					kernel_list = self._compute_kernel_list(graphs[0], graphs[1])
+#				return kernel_list, self._run_time
+
+#			elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
+#				if self.copy_graphs:
+#					g1 = graphs[1].copy()
+#					g_list = [g.copy() for g in graphs[0]]
+#					kernel_list = self._compute_kernel_list(g1, g_list)
+#				else:
+#					kernel_list = self._compute_kernel_list(graphs[1], graphs[0])
+#				return kernel_list, self._run_time
+
+#			else:
+#				raise Exception('Cannot detect graphs.')
+
+#		elif len(graphs) == 0 and self._graphs is None:
+#			raise Exception('Please add graphs before computing.')
+
+#		else:
+#			raise Exception('Cannot detect graphs.')
+
+
+#	def normalize_gm(self, gram_matrix):
+#		import warnings
+#		warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning)
+
+#		diag = gram_matrix.diagonal().copy()
+#		for i in range(len(gram_matrix)):
+#			for j in range(i, len(gram_matrix)):
+#				gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
+#				gram_matrix[j][i] = gram_matrix[i][j]
+#		return gram_matrix
+
+
+#	def compute_distance_matrix(self):
+#		if self._gram_matrix is None:
+#			raise Exception('Please compute the Gram matrix before computing distance matrix.')
+#		dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix)))
+#		for i in range(len(self._gram_matrix)):
+#			for j in range(i, len(self._gram_matrix)):
+#				dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j]
+#				if dis < 0:
+#					if dis > -1e-10:
+#						dis = 0
+#					else:
+#						raise ValueError('The distance is negative.')
+#				dis_mat[i, j] = np.sqrt(dis)
+#				dis_mat[j, i] = dis_mat[i, j]
+#		dis_max = np.max(np.max(dis_mat))
+#		dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
+#		dis_mean = np.mean(np.mean(dis_mat))
+#		return dis_mat, dis_max, dis_min, dis_mean
+
+
+	def _compute_X_distance_matrix(self):
+		start_time = time.time()
+
+		if self.parallel == 'imap_unordered':
+			dis_matrix = self._compute_X_dm_imap_unordered()
+		elif self.parallel is None:
+			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
+			dis_matrix = self._compute_X_dm_series(graphs)
+		else:
+			raise Exception('Parallel mode is not set correctly.')
+
+		self._run_time = time.time() - start_time
+		if self.verbose:
+			print('Distance matrix of size %d built in %s seconds.'
+			  % (len(self._graphs), self._run_time))
+
+		return dis_matrix
+
+
+	def _compute_X_dm_series(self, graphs):
+		N = len(graphs)
+		dis_matrix = np.zeros((N, N))
+
+		for i, G1 in get_iters(enumerate(graphs), desc='Computing distance matrix', file=sys.stdout, verbose=(self.verbose >= 2)):
+			for j, G2 in enumerate(graphs[i+1:], i+1):
+				dis_matrix[i, j], _ = self.compute_ged(G1, G2)
+				dis_matrix[j, i] = dis_matrix[i, j]
+		return dis_matrix
+
+
+	def _compute_X_dm_imap_unordered(self, graphs):
+		pass
+
+
+	def compute_ged(self, Gi, Gj, **kwargs):
+		"""
+		Compute GED between two graph according to edit_cost.
+		"""
+		ged_options = {'edit_cost': self.edit_cost_fun,
+				 'method': self.ed_method,
+				 'edit_cost_constants': self._edit_cost_constants}
+		dis, pi_forward, pi_backward = pairwise_ged(Gi, Gj, ged_options, repeats=10)
+		n_eo_tmp = get_nb_edit_operations(Gi, Gj, pi_forward, pi_backward,
+									 edit_cost=self.edit_cost_fun,
+									 node_labels=self.node_labels,
+									 edge_labels=self.edge_labels)
+		return dis, n_eo_tmp
+
+
+# 	def _compute_kernel_list(self, g1, g_list):
+# 		start_time = time.time()
+
+# 		if self.parallel == 'imap_unordered':
+# 			kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
+# 		elif self.parallel is None:
+# 			kernel_list = self._compute_kernel_list_series(g1, g_list)
+# 		else:
+# 			raise Exception('Parallel mode is not set correctly.')
+
+# 		self._run_time = time.time() - start_time
+# 		if self.verbose:
+# 			print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.'
+# 			  % (len(g_list), self._run_time))
+
+# 		return kernel_list
+
+
+# 	def _compute_kernel_list_series(self, g1, g_list):
+# 		pass
+
+
+# 	def _compute_kernel_list_imap_unordered(self, g1, g_list):
+# 		pass
+
+
+# 	def _compute_single_kernel(self, g1, g2):
+# 		start_time = time.time()
+
+# 		kernel = self._compute_single_kernel_series(g1, g2)
+
+# 		self._run_time = time.time() - start_time
+# 		if self.verbose:
+# 			print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time))
+
+# 		return kernel
+
+
+# 	def _compute_single_kernel_series(self, g1, g2):
+# 		pass
+
+
+	def is_graph(self, graph):
+		if isinstance(graph, nx.Graph):
+			return True
+		if isinstance(graph, nx.DiGraph):
+			return True
+		if isinstance(graph, nx.MultiGraph):
+			return True
+		if isinstance(graph, nx.MultiDiGraph):
+			return True
+		return False
+
+
+	@property
+	def graphs(self):
+		return self._graphs
+
+
+#	@property
+#	def parallel(self):
+#		return self.parallel
+
+
+#	@property
+#	def n_jobs(self):
+#		return self.n_jobs
+
+
+#	@property
+#	def verbose(self):
+#		return self.verbose
+
+
+#	@property
+#	def normalize(self):
+#		return self.normalize
+
+
+	@property
+	def run_time(self):
+		return self._run_time
+
+
+	@property
+	def dis_matrix(self):
+		return self._dis_matrix
+
+	@dis_matrix.setter
+	def dis_matrix(self, value):
+		self._dis_matrix = value
+
+
+# 	@property
+# 	def gram_matrix_unnorm(self):
+# 		return self._gram_matrix_unnorm
+
+# 	@gram_matrix_unnorm.setter
+# 	def gram_matrix_unnorm(self, value):
+# 		self._gram_matrix_unnorm = value
\ No newline at end of file
diff --git a/gklearn/ged/model/optim_costs.py b/gklearn/ged/model/optim_costs.py
new file mode 100644
index 0000000..1e23732
--- /dev/null
+++ b/gklearn/ged/model/optim_costs.py
@@ -0,0 +1,149 @@
+import numpy as np
+
+from gklearn.ged.model.distances import sum_squares, euclid_d
+from gklearn.ged.model.ged_com import compute_geds
+
+
+def optimize_costs_unlabeled(nb_cost_mat, dis_k_vec):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+	:param dis_k_vec: The N distances to fit
+	"""
+	import cvxpy as cp
+	import numpy as np
+	MAX_SAMPLE = 1000
+	nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]] for x in nb_cost_mat])
+	dis_k_vec = np.array(dis_k_vec)
+	# dis_k_vec_norm = dis_k_vec/np.max(dis_k_vec)
+
+	# import pickle
+	# pickle.dump([nb_cost_mat, dis_k_vec], open('debug', 'wb'))
+	N = nb_cost_mat_m.shape[0]
+	sub_sample = np.random.permutation(np.arange(N))
+	sub_sample = sub_sample[:MAX_SAMPLE]
+
+	x = cp.Variable(nb_cost_mat_m.shape[1])
+	cost = cp.sum_squares((nb_cost_mat_m[sub_sample, :] @ x) - dis_k_vec[sub_sample])
+	prob = cp.Problem(cp.Minimize(cost), [x >= 0])
+	prob.solve()
+	edit_costs_new = [x.value[0], x.value[1], 0, x.value[2], x.value[3], 0]
+	edit_costs_new = [xi if xi > 0 else 0 for xi in edit_costs_new]
+	residual = prob.value
+	return edit_costs_new, residual
+
+
+def optimize_costs_classif_unlabeled(nb_cost_mat, Y):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in
+	nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit
+	operations for each pair of graph
+	:param dis_k_vec: {-1,1}^N vector of common classes
+	"""
+	# import cvxpy as cp
+	from ml import reg_log
+	# import pickle
+	# pickle.dump([nb_cost_mat, Y], open('debug', 'wb'))
+	nb_cost_mat_m = np.array([[x[0], x[1], x[3], x[4]]
+							  for x in nb_cost_mat])
+	w, J, _ = reg_log(nb_cost_mat_m, Y, pos_contraint=True)
+	edit_costs_new = [w[0], w[1], 0, w[2], w[3], 0]
+	residual = J[-1]
+
+	return edit_costs_new, residual
+
+
+def optimize_costs_classif(nb_cost_mat, Y):
+	"""
+		Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+		! take care that nb_cost_mat do not contains 0 lines
+		:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+		:param dis_k_vec: {-1,1}^N vector of common classes
+	"""
+	#import pickle
+	# pickle.dump([nb_cost_mat, Y], open("test.pickle", "wb"))
+	from ml import reg_log
+	w, J, _ = reg_log(nb_cost_mat, Y, pos_contraint=True)
+	return w, J[-1]
+
+
+def optimize_costs(nb_cost_mat, dis_k_vec):
+	"""
+	Optimize edit costs to fit dis_k_vec according to edit operations in nb_cost_mat
+	! take care that nb_cost_mat do not contains 0 lines
+	:param nb_cost_mat: \in \mathbb{N}^{N x 6} encoding the number of edit operations for each pair of graph
+	:param dis_k_vec: The N distances to fit
+	"""
+	import cvxpy as cp
+	x = cp.Variable(nb_cost_mat.shape[1])
+	cost = cp.sum_squares((nb_cost_mat @ x) - dis_k_vec)
+	constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
+				   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
+				   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
+	prob = cp.Problem(cp.Minimize(cost), constraints)
+	prob.solve()
+	edit_costs_new = x.value
+	residual = prob.value
+
+	return edit_costs_new, residual
+
+
+def compute_optimal_costs(G, y, init_costs=[3, 3, 1, 3, 3, 1],
+						  y_distance=euclid_d,
+						  mode='reg', unlabeled=False,
+						  ed_method='BIPARTITE',
+						  verbose=True,
+						  **kwargs):
+	N = len(y)
+
+	G_pairs = []
+	distances_vec = []
+
+	for i in range(N):
+		for j in range(i+1, N):
+			G_pairs.append([i, j])
+			distances_vec.append(y_distance(y[i], y[j]))
+	ged_vec_init, n_edit_operations = compute_geds(G_pairs, G, init_costs, ed_method,
+												verbose=verbose, **kwargs)
+
+	residual_list = [sum_squares(ged_vec_init, distances_vec)]
+
+	if (mode == 'reg'):
+		if unlabeled:
+			method_optim = optimize_costs_unlabeled
+		else:
+			method_optim = optimize_costs
+
+	elif (mode == 'classif'):
+		if unlabeled:
+			method_optim = optimize_costs_classif_unlabeled
+		else:
+			method_optim = optimize_costs_classif
+
+	ite_max = 5
+	for i in range(ite_max):
+		if verbose:
+			print('ite', i + 1, '/', ite_max, ':')
+		# compute GEDs and numbers of edit operations.
+		edit_costs_new, residual = method_optim(
+			np.array(n_edit_operations), distances_vec)
+		ged_vec, n_edit_operations = compute_geds(G_pairs, G, edit_costs_new, ed_method,
+											verbose=verbose, **kwargs)
+		residual_list.append(sum_squares(ged_vec, distances_vec))
+
+	return edit_costs_new
+
+
+def get_optimal_costs_GH2020(**kwargs):
+	import pickle
+	import os
+	dir_root = 'cj/output/'
+	ds_name = kwargs.get('ds_name')
+	nb_trial = kwargs.get('nb_trial')
+	file_name = os.path.join(dir_root, 'costs.' + ds_name + '.' + str(nb_trial) + '.pkl')
+	with open(file_name, 'rb') as f:
+		edit_costs = pickle.load(f)
+	return edit_costs