#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu May  5 09:42:30 2022

@author: ljia
"""
import sys
import multiprocessing
import time
import numpy as np
import networkx as nx

# from abc import ABC, abstractmethod
from sklearn.base import BaseEstimator # , TransformerMixin
from sklearn.utils.validation import check_is_fitted # check_X_y, check_array,
from sklearn.exceptions import NotFittedError

from gklearn.ged.model.distances import euclid_d
from gklearn.ged.util import pairwise_ged, get_nb_edit_operations
# from gklearn.utils import normalize_gram_matrix
from gklearn.utils import get_iters


class GEDModel(BaseEstimator): #, ABC):
	"""The graph edit distance model class compatible with `scikit-learn`.

	Attributes
    ----------
    _graphs : list
        Stores the input graphs on fit input data.
        Default format of the list objects is `NetworkX` graphs.
		**We don't guarantee that the input graphs remain unchanged during the
		computation.**

	References
	----------
	https://ysig.github.io/GraKeL/0.1a8/_modules/grakel/kernels/kernel.html#Kernel.
	"""

	def __init__(self,
			  ed_method='BIPARTITE',
			  edit_cost_fun='CONSTANT',
			  init_edit_cost_constants=[3, 3, 1, 3, 3, 1],
			  optim_method='init',
			  optim_options={'y_distance': euclid_d, 'mode': 'reg'},
			  node_labels=[],
			  edge_labels=[],
			  parallel=None,
			  n_jobs=None,
			  chunksize=None,
#			  normalize=True,
			  copy_graphs=True, # make sure it is a full deep copy. and faster!
			  verbose=2):
		"""`__init__` for `GEDModel` object."""
		# @todo: the default settings of the parameters are different from those in the self.compute method.
#		self._graphs = None
		self.ed_method = ed_method
		self.edit_cost_fun = edit_cost_fun
		self.init_edit_cost_constants = init_edit_cost_constants
		self.optim_method=optim_method
		self.optim_options=optim_options
		self.node_labels=node_labels
		self.edge_labels=edge_labels
		self.parallel = parallel
		self.n_jobs = n_jobs
		self.chunksize = chunksize
#		self.normalize = normalize
		self.copy_graphs = copy_graphs
		self.verbose = verbose
#		self._run_time = 0
#		self._gram_matrix = None
#		self._gram_matrix_unnorm = None


	##########################################################################
	# The following is the 1st paradigm to compute GED distance matrix, which is
	# compatible with `scikit-learn`.
	##########################################################################


	def fit(self, X, y=None):
		"""Fit a graph dataset for a transformer.

		Parameters
		----------
		X : iterable
			DESCRIPTION.

		y : None, optional
			There is no need of a target in a transformer, yet the `scikit-learn`
			pipeline API requires this parameter.

		Returns
		-------
		object
			Returns self.

		"""
#		self._is_tranformed = False

		# Clear any prior attributes stored on the estimator, # @todo: unless warm_start is used;
		self.clear_attributes()

		# Validate parameters for the transformer.
		self.validate_parameters()

		# Validate the input.
		self._graphs = self.validate_input(X)
		if y is not None:
 			self._targets = y
 			# self._targets = self.validate_input(y)

#		self._X = X
#		self._kernel = self._get_kernel_instance()

		# Return the transformer.
		return self


	def transform(self, X=None, return_dm_train=False):
		"""Compute the graph kernel matrix between given and fitted data.

		Parameters
		----------
		X : TYPE
			DESCRIPTION.

		Raises
		------
		ValueError
			DESCRIPTION.

		Returns
		-------
		None.

		"""
		# If `return_dm_train`, return the fitted GED distance matrix of training data.
		if return_dm_train:
			check_is_fitted(self, '_dm_train')
			self._is_transformed = True
			return self._dm_train # @todo: copy or not?

		# Check if method "fit" had been called.
		check_is_fitted(self, '_graphs')

		# Validate the input.
		Y = self.validate_input(X)

		# Transform: compute the graph kernel matrix.
		dis_matrix = self.compute_distance_matrix(Y)
		self._Y = Y

		# Self transform must appear before the diagonal call on normilization.
		self._is_transformed = True
# 		if self.normalize:
# 			X_diag, Y_diag = self.diagonals()
# 			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
# 			try:
# 				kernel_matrix /= np.sqrt(np.outer(Y_diag, X_diag))
# 			except:
# 				raise
# 			finally:
# 				np.seterr(**old_settings)

		return dis_matrix


	def fit_transform(self, X, y=None, save_dm_train=False):
		"""Fit and transform: compute GED distance matrix on the same data.

		Parameters
		----------
		X : list of graphs
			Input graphs.

		Returns
		-------
		dis_matrix : numpy array, shape = [len(X), len(X)]
			The distance matrix of X.

		"""
		self.fit(X, y)

		# Compute edit cost constants.
		self.compute_edit_costs()

		# Transform: compute Gram matrix.
		dis_matrix = self.compute_distance_matrix()

#		# Normalize.
#		if self.normalize:
#			self._X_diag = np.diagonal(gram_matrix).copy()
#			old_settings = np.seterr(invalid='raise') # Catch FloatingPointError: invalid value encountered in sqrt.
#			try:
#				gram_matrix /= np.sqrt(np.outer(self._X_diag, self._X_diag))
#			except:
#				raise
#			finally:
#				np.seterr(**old_settings)

		if save_dm_train:
			self._dm_train = dis_matrix

		return dis_matrix


	def get_params(self):
		pass


	def set_params(self):
		pass


	def clear_attributes(self): # @todo: update
#		if hasattr(self, '_X_diag'):
#			delattr(self, '_X_diag')
		if hasattr(self, '_graphs'):
			delattr(self, '_graphs')
		if hasattr(self, '_Y'):
			delattr(self, '_Y')
		if hasattr(self, '_run_time'):
			delattr(self, '_run_time')


	def validate_parameters(self):
		"""Validate all parameters for the transformer.

		Returns
		-------
		None.

		"""
		if self.parallel is not None and self.parallel != 'imap_unordered':
			raise ValueError('Parallel mode is not set correctly.')

		if self.parallel == 'imap_unordered' and self.n_jobs is None:
			self.n_jobs = multiprocessing.cpu_count()


	def validate_input(self, X):
		"""Validate the given input and raise errors if it is invalid.

		Parameters
		----------
		X : list
			The input to check. Should be a list of graph.

		Raises
		------
		ValueError
			Raise if the input is not correct.

		Returns
		-------
		X : list
			The input. A list of graph.

		"""
		if X is None:
			raise ValueError('Please add graphs before computing.')
		elif not isinstance(X, list):
			raise ValueError('Cannot detect graphs. The input must be a list.')
		elif len(X) == 0:
			raise ValueError('The graph list given is empty. No computation will be performed.')

		return X


	def compute_distance_matrix(self, Y=None):
		"""Compute the distance matrix between a given target graphs (Y) and
		the fitted graphs (X / self._graphs) or the distance matrix for the fitted
		graphs (X / self._graphs).

		Parameters
		----------
		Y : list of graphs, optional
			The target graphs. The default is None. If None kernel is computed
			between X and itself.

		Returns
		-------
		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
			The computed kernel matrix.

		"""
		if Y is None:
			# Compute Gram matrix for self._graphs (X).
			dis_matrix = self._compute_X_distance_matrix()
#			self._gram_matrix_unnorm = np.copy(self._gram_matrix)

		else:
			# Compute kernel matrix between Y and self._graphs (X).
			start_time = time.time()

			if self.parallel == 'imap_unordered':
				dis_matrix = self._compute_distance_matrix_imap_unordered(Y)

			elif self.parallel is None:
				Y_copy = ([g.copy() for g in Y] if self.copy_graphs else Y)
				graphs_copy = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
				dis_matrix = self._compute_distance_matrix_series(Y_copy, graphs_copy)

			self._run_time = time.time() - start_time
			if self.verbose:
				print('Distance matrix of size (%d, %d) built in %s seconds.'
				  % (len(Y), len(self._graphs), self._run_time))

		return dis_matrix


	def _compute_distance_matrix_series(self, X, Y):
		"""Compute the GED distance matrix between two sets of graphs (X and Y)
		without parallelization.

		Parameters
		----------
		X, Y : list of graphs
			The input graphs.

		Returns
		-------
		dis_matrix : numpy array, shape = [n_X, n_Y]
			The computed distance matrix.

		"""
		dis_matrix = np.zeros((len(X), len(Y)))

		for i_x, g_x in enumerate(X):
			for i_y, g_y in enumerate(Y):
				dis_matrix[i_x, i_y], _ = self.compute_ged(g_x, g_y)

		return dis_matrix


	def _compute_kernel_matrix_imap_unordered(self, Y):
		"""Compute the kernel matrix between a given target graphs (Y) and
		the fitted graphs (X / self._graphs) using imap unordered parallelization.

		Parameters
		----------
		Y : list of graphs, optional
			The target graphs.

		Returns
		-------
		kernel_matrix : numpy array, shape = [n_targets, n_inputs]
			The computed kernel matrix.

		"""
		raise Exception('Parallelization for kernel matrix is not implemented.')


	def diagonals(self):
		"""Compute the kernel matrix diagonals of the fit/transformed data.

		Returns
		-------
        X_diag : numpy array
            The diagonal of the kernel matrix between the fitted data.
            This consists of each element calculated with itself.

        Y_diag : numpy array
            The diagonal of the kernel matrix, of the transform.
            This consists of each element calculated with itself.

		"""
		# Check if method "fit" had been called.
		check_is_fitted(self, ['_graphs'])

		# Check if the diagonals of X exist.
		try:
			check_is_fitted(self, ['_X_diag'])
		except NotFittedError:
			# Compute diagonals of X.
			self._X_diag = np.empty(shape=(len(self._graphs),))
			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
			for i, x in enumerate(graphs):
				self._X_diag[i] = self.pairwise_kernel(x, x) # @todo: parallel?

		try:
            # If transform has happened, return both diagonals.
			check_is_fitted(self, ['_Y'])
			self._Y_diag = np.empty(shape=(len(self._Y),))
			Y = ([g.copy() for g in self._Y] if self.copy_graphs else self._Y)
			for (i, y) in enumerate(Y):
				self._Y_diag[i] = self.pairwise_kernel(y, y) # @todo: parallel?

			return self._X_diag, self._Y_diag
		except NotFittedError:
            # Else just return both X_diag
			return self._X_diag


#	@abstractmethod
	def pairwise_distance(self, x, y):
		"""Compute pairwise kernel between two graphs.

		Parameters
		----------
		x, y : NetworkX Graph.
			Graphs bewteen which the kernel is computed.

		Returns
		-------
		kernel: float
			The computed kernel.

#		Notes
#		-----
#		This method is abstract and must be implemented by a subclass.

		"""
		raise NotImplementedError('Pairwise kernel computation is not implemented!')


	def compute_edit_costs(self, Y=None, Y_targets=None):
		"""Compute edit cost constants. When optimizing method is `fiited`,
		apply Jia2021's metric learning method by using a given target graphs (Y)
		the fitted graphs (X / self._graphs).

		Parameters
		----------
		Y : TYPE, optional
			DESCRIPTION. The default is None.

		Returns
		-------
		None.

		"""
		# Get or compute.
		if self.optim_method == 'random':
			self._edit_cost_constants = np.random.rand(6)

		elif self.optim_method == 'init':
			self._edit_cost_constants = self.init_edit_cost_constants


		elif self.optim_method == 'expert':
			self._edit_cost_constants = [3, 3, 1, 3, 3, 1]


		elif self.optim_method == 'fitted': # Jia2021 method
			# Get proper inputs.
			if Y is None:
				check_is_fitted(self, ['_graphs'])
				check_is_fitted(self, ['_targets'])
				graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
				targets = self._targets
			else:
				graphs = ([g.copy() for g in Y] if self.copy_graphs else Y)
				targets = Y_targets

			# Get optimization options.
			node_labels = self.node_labels
			edge_labels = self.edge_labels
			unlabeled = (len(node_labels) == 0 and len(edge_labels) == 0)
			from gklearn.ged.model.optim_costs import compute_optimal_costs
			self._edit_cost_constants = compute_optimal_costs(
				graphs, targets,
				node_labels=node_labels, edge_labels=edge_labels,
				unlabeled=unlabeled, ed_method=self.ed_method,
				verbose=(self.verbose >= 2),
				**self.optim_options)


	##########################################################################
	# The following is the 2nd paradigm to compute kernel matrix. It is
	# simplified and not compatible with `scikit-learn`.
	##########################################################################


#	def compute(self, *graphs, **kwargs):
#		self.parallel = kwargs.get('parallel', 'imap_unordered')
#		self.n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
#		self.normalize = kwargs.get('normalize', True)
#		self.verbose = kwargs.get('verbose', 2)
#		self.copy_graphs = kwargs.get('copy_graphs', True)
#		self.save_unnormed = kwargs.get('save_unnormed', True)
#		self.validate_parameters()

#		# If the inputs is a list of graphs.
#		if len(graphs) == 1:
#			if not isinstance(graphs[0], list):
#				raise Exception('Cannot detect graphs.')
#			elif len(graphs[0]) == 0:
#				raise Exception('The graph list given is empty. No computation was performed.')
#			else:
#				if self.copy_graphs:
#					self._graphs = [g.copy() for g in graphs[0]] # @todo: might be very slow.
#				else:
#					self._graphs = graphs
#				self._gram_matrix = self._compute_gram_matrix()

#				if self.save_unnormed:
#					self._gram_matrix_unnorm = np.copy(self._gram_matrix)
#				if self.normalize:
#					self._gram_matrix = normalize_gram_matrix(self._gram_matrix)
#				return self._gram_matrix, self._run_time

#		elif len(graphs) == 2:
#			# If the inputs are two graphs.
#			if self.is_graph(graphs[0]) and self.is_graph(graphs[1]):
#				if self.copy_graphs:
#					G0, G1 = graphs[0].copy(), graphs[1].copy()
#				else:
#					G0, G1 = graphs[0], graphs[1]
#				kernel = self._compute_single_kernel(G0, G1)
#				return kernel, self._run_time

#			# If the inputs are a graph and a list of graphs.
#			elif self.is_graph(graphs[0]) and isinstance(graphs[1], list):
#				if self.copy_graphs:
#					g1 = graphs[0].copy()
#					g_list = [g.copy() for g in graphs[1]]
#					kernel_list = self._compute_kernel_list(g1, g_list)
#				else:
#					kernel_list = self._compute_kernel_list(graphs[0], graphs[1])
#				return kernel_list, self._run_time

#			elif isinstance(graphs[0], list) and self.is_graph(graphs[1]):
#				if self.copy_graphs:
#					g1 = graphs[1].copy()
#					g_list = [g.copy() for g in graphs[0]]
#					kernel_list = self._compute_kernel_list(g1, g_list)
#				else:
#					kernel_list = self._compute_kernel_list(graphs[1], graphs[0])
#				return kernel_list, self._run_time

#			else:
#				raise Exception('Cannot detect graphs.')

#		elif len(graphs) == 0 and self._graphs is None:
#			raise Exception('Please add graphs before computing.')

#		else:
#			raise Exception('Cannot detect graphs.')


#	def normalize_gm(self, gram_matrix):
#		import warnings
#		warnings.warn('gklearn.kernels.graph_kernel.normalize_gm will be deprecated, use gklearn.utils.normalize_gram_matrix instead', DeprecationWarning)

#		diag = gram_matrix.diagonal().copy()
#		for i in range(len(gram_matrix)):
#			for j in range(i, len(gram_matrix)):
#				gram_matrix[i][j] /= np.sqrt(diag[i] * diag[j])
#				gram_matrix[j][i] = gram_matrix[i][j]
#		return gram_matrix


#	def compute_distance_matrix(self):
#		if self._gram_matrix is None:
#			raise Exception('Please compute the Gram matrix before computing distance matrix.')
#		dis_mat = np.empty((len(self._gram_matrix), len(self._gram_matrix)))
#		for i in range(len(self._gram_matrix)):
#			for j in range(i, len(self._gram_matrix)):
#				dis = self._gram_matrix[i, i] + self._gram_matrix[j, j] - 2 * self._gram_matrix[i, j]
#				if dis < 0:
#					if dis > -1e-10:
#						dis = 0
#					else:
#						raise ValueError('The distance is negative.')
#				dis_mat[i, j] = np.sqrt(dis)
#				dis_mat[j, i] = dis_mat[i, j]
#		dis_max = np.max(np.max(dis_mat))
#		dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
#		dis_mean = np.mean(np.mean(dis_mat))
#		return dis_mat, dis_max, dis_min, dis_mean


	def _compute_X_distance_matrix(self):
		start_time = time.time()

		if self.parallel == 'imap_unordered':
			dis_matrix = self._compute_X_dm_imap_unordered()
		elif self.parallel is None:
			graphs = ([g.copy() for g in self._graphs] if self.copy_graphs else self._graphs)
			dis_matrix = self._compute_X_dm_series(graphs)
		else:
			raise Exception('Parallel mode is not set correctly.')

		self._run_time = time.time() - start_time
		if self.verbose:
			print('Distance matrix of size %d built in %s seconds.'
			  % (len(self._graphs), self._run_time))

		return dis_matrix


	def _compute_X_dm_series(self, graphs):
		N = len(graphs)
		dis_matrix = np.zeros((N, N))

		for i, G1 in get_iters(enumerate(graphs), desc='Computing distance matrix', file=sys.stdout, verbose=(self.verbose >= 2)):
			for j, G2 in enumerate(graphs[i+1:], i+1):
				dis_matrix[i, j], _ = self.compute_ged(G1, G2)
				dis_matrix[j, i] = dis_matrix[i, j]
		return dis_matrix


	def _compute_X_dm_imap_unordered(self, graphs):
		pass


	def compute_ged(self, Gi, Gj, **kwargs):
		"""
		Compute GED between two graph according to edit_cost.
		"""
		ged_options = {'edit_cost': self.edit_cost_fun,
				 'method': self.ed_method,
				 'edit_cost_constants': self._edit_cost_constants}
		dis, pi_forward, pi_backward = pairwise_ged(Gi, Gj, ged_options, repeats=10)
		n_eo_tmp = get_nb_edit_operations(Gi, Gj, pi_forward, pi_backward,
									 edit_cost=self.edit_cost_fun,
									 node_labels=self.node_labels,
									 edge_labels=self.edge_labels)
		return dis, n_eo_tmp


# 	def _compute_kernel_list(self, g1, g_list):
# 		start_time = time.time()

# 		if self.parallel == 'imap_unordered':
# 			kernel_list = self._compute_kernel_list_imap_unordered(g1, g_list)
# 		elif self.parallel is None:
# 			kernel_list = self._compute_kernel_list_series(g1, g_list)
# 		else:
# 			raise Exception('Parallel mode is not set correctly.')

# 		self._run_time = time.time() - start_time
# 		if self.verbose:
# 			print('Graph kernel bewteen a graph and a list of %d graphs built in %s seconds.'
# 			  % (len(g_list), self._run_time))

# 		return kernel_list


# 	def _compute_kernel_list_series(self, g1, g_list):
# 		pass


# 	def _compute_kernel_list_imap_unordered(self, g1, g_list):
# 		pass


# 	def _compute_single_kernel(self, g1, g2):
# 		start_time = time.time()

# 		kernel = self._compute_single_kernel_series(g1, g2)

# 		self._run_time = time.time() - start_time
# 		if self.verbose:
# 			print('Graph kernel bewteen two graphs built in %s seconds.' % (self._run_time))

# 		return kernel


# 	def _compute_single_kernel_series(self, g1, g2):
# 		pass


	def is_graph(self, graph):
		if isinstance(graph, nx.Graph):
			return True
		if isinstance(graph, nx.DiGraph):
			return True
		if isinstance(graph, nx.MultiGraph):
			return True
		if isinstance(graph, nx.MultiDiGraph):
			return True
		return False


	@property
	def graphs(self):
		return self._graphs


#	@property
#	def parallel(self):
#		return self.parallel


#	@property
#	def n_jobs(self):
#		return self.n_jobs


#	@property
#	def verbose(self):
#		return self.verbose


#	@property
#	def normalize(self):
#		return self.normalize


	@property
	def run_time(self):
		return self._run_time


	@property
	def dis_matrix(self):
		return self._dis_matrix

	@dis_matrix.setter
	def dis_matrix(self, value):
		self._dis_matrix = value


# 	@property
# 	def gram_matrix_unnorm(self):
# 		return self._gram_matrix_unnorm

# 	@gram_matrix_unnorm.setter
# 	def gram_matrix_unnorm(self, value):
# 		self._gram_matrix_unnorm = value