#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 15:16:34 2020

@author: ljia

@references:

	[1] Shervashidze N, Schweitzer P, Leeuwen EJ, Mehlhorn K, Borgwardt KM.
	Weisfeiler-lehman graph kernels. Journal of Machine Learning Research.
	2011;12(Sep):2539-61.
"""

import numpy as np
import networkx as nx
import sys
from collections import Counter
# from functools import partial
from itertools import combinations_with_replacement
from gklearn.utils import SpecialLabel
from gklearn.utils.parallel import parallel_gm, parallel_me
from gklearn.kernels import GraphKernel
from gklearn.utils.iters import get_iters


class WeisfeilerLehman(GraphKernel): # @todo: sp, edge user kernel.


	def __init__(self, **kwargs):
		GraphKernel.__init__(self)
		self.node_labels = kwargs.get('node_labels', [])
		self.edge_labels = kwargs.get('edge_labels', [])
		self.height = int(kwargs.get('height', 0))
		self._base_kernel = kwargs.get('base_kernel', 'subtree')
		self._ds_infos = kwargs.get('ds_infos', {})


	##########################################################################
	# The following is the 1st paradigm to compute kernel matrix, which is
	# compatible with `scikit-learn`.
	# -------------------------------------------------------------------
	# Special thanks to the "GraKeL" library for providing an excellent template!
	##########################################################################


	##########################################################################
	# The following is the 2nd paradigm to compute kernel matrix. It is
	# simplified and not compatible with `scikit-learn`.
	##########################################################################


	def _compute_gm_series(self):
#		if self.verbose >= 2:
#			import warnings
#			warnings.warn('A part of the computation is parallelized.')

# 		self._add_dummy_node_labels(self._graphs)

		# for WL subtree kernel
		if self._base_kernel == 'subtree':
			gram_matrix = self._subtree_kernel_do(self._graphs)

		# for WL shortest path kernel
		elif self._base_kernel == 'sp':
			gram_matrix = self._sp_kernel_do(self._graphs)

		# for WL edge kernel
		elif self._base_kernel == 'edge':
			gram_matrix = self._edge_kernel_do(self._graphs)

		# for user defined base kernel
		else:
			gram_matrix = self._user_kernel_do(self._graphs)

		return gram_matrix


	def _compute_gm_imap_unordered(self):
# 		self._add_dummy_node_labels(self._graphs)

		if self._base_kernel == 'subtree':
			gram_matrix = np.zeros((len(self._graphs), len(self._graphs)))

#			for i in range(len(self._graphs)):
#				for j in range(i, len(self._graphs)):
#					gram_matrix[i][j] = self.pairwise_kernel(self._graphs[i], self._graphs[j])
#					gram_matrix[j][i] = gram_matrix[i][j]

			def init_worker(gn_toshare):
				global G_gn
				G_gn = gn_toshare
			do_fun = self._wrapper_pairwise
			parallel_gm(do_fun, gram_matrix, self._graphs, init_worker=init_worker,
			  glbv=(self._graphs,), n_jobs=self.n_jobs, verbose=self.verbose)
			return gram_matrix
		else:
			if self.verbose >= 2:
				import warnings
				warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
			return self._compute_gm_series()


	def _compute_kernel_list_series(self, g1, g_list): # @todo: this should be better.
#		if self.verbose >= 2:
#			import warnings
#			warnings.warn('A part of the computation is parallelized.')

		self._add_dummy_node_labels(g_list + [g1])

		# for WL subtree kernel
		if self._base_kernel == 'subtree':
			gram_matrix = self._subtree_kernel_do(g_list + [g1])

		# for WL shortest path kernel
		elif self._base_kernel == 'sp':
			gram_matrix = self._sp_kernel_do(g_list + [g1])

		# for WL edge kernel
		elif self._base_kernel == 'edge':
			gram_matrix = self._edge_kernel_do(g_list + [g1])

		# for user defined base kernel
		else:
			gram_matrix = self._user_kernel_do(g_list + [g1])

		return list(gram_matrix[-1][0:-1])


	def _compute_kernel_list_imap_unordered(self, g1, g_list):
		self._add_dummy_node_labels(g_list + [g1])

		if self._base_kernel == 'subtree':
			kernel_list = [None] * len(g_list)

			def init_worker(g1_toshare, g_list_toshare):
				global G_g1, G_g_list
				G_g1 = g1_toshare
				G_g_list = g_list_toshare
			do_fun = self._wrapper_kernel_list_do
			def func_assign(result, var_to_assign):
				var_to_assign[result[0]] = result[1]
			itr = range(len(g_list))
			len_itr = len(g_list)
			parallel_me(do_fun, func_assign, kernel_list, itr, len_itr=len_itr,
				init_worker=init_worker, glbv=(g1, g_list), method='imap_unordered',
				n_jobs=self.n_jobs, itr_desc='Computing kernels', verbose=self.verbose)
			return kernel_list
		else:
			if self.verbose >= 2:
				import warnings
				warnings.warn('This base kernel is not parallelized. The serial computation is used instead.')
			return self._compute_kernel_list_series(g1, g_list)


	def _wrapper_kernel_list_do(self, itr):
		return itr, self.pairwise_kernel(G_g1, G_g_list[itr])


	def _compute_single_kernel_series(self, g1, g2):  # @todo: this should be better.
		self._add_dummy_node_labels([g1] + [g2])

		# for WL subtree kernel
		if self._base_kernel == 'subtree':
			gram_matrix = self._subtree_kernel_do([g1] + [g2])

		# for WL shortest path kernel
		elif self._base_kernel == 'sp':
			gram_matrix = self._sp_kernel_do([g1] + [g2])

		# for WL edge kernel
		elif self._base_kernel == 'edge':
			gram_matrix = self._edge_kernel_do([g1] + [g2])

		# for user defined base kernel
		else:
			gram_matrix = self._user_kernel_do([g1] + [g2])

		return gram_matrix[0][1]

	##########################################################################
	# The following are the methods used by both diagrams.
	##########################################################################

	def validate_parameters(self):
		"""Validate all parameters for the transformer.

		Returns
		-------
		None.

		"""
		super().validate_parameters()
		if len(self.node_labels) == 0:
			if len(self.edge_labels) == 0:
				self._subtree_kernel_do = self._subtree_kernel_do_unlabeled
			else:
				self._subtree_kernel_do = self._subtree_kernel_do_el
		else:
			if len(self.edge_labels) == 0:
				self._subtree_kernel_do = self._subtree_kernel_do_nl
			else:
				self._subtree_kernel_do = self._subtree_kernel_do_labeled


	def pairwise_kernel(self, g1, g2):
		Gn = [g1.copy(), g2.copy()] # @todo: make sure it is a full deep copy. and faster!
		kernel = 0

		# initial for height = 0
		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

		# for each graph
		for G in Gn:
			# set all labels into a tuple.
			for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
				G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
			# get the set of original labels
			labels_ori = list(nx.get_node_attributes(G, 'lt').values())
			# number of occurence of each label in G
			all_num_of_each_label.append(dict(Counter(labels_ori)))

		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
		kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)

		# iterate each height
		for h in range(1, self.height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
	#		all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:

				all_multisets = []
				for node, attrs in G.nodes(data=True):
					# Multiset-label determination.
					multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]]
					# sorting each multiset
					multiset.sort()
					multiset = [attrs['lt']] + multiset # add the prefix
					all_multisets.append(tuple(multiset))

				# label compression
				set_unique = list(set(all_multisets)) # set of unique multiset labels
				# a dictionary mapping original labels to new ones.
				set_compressed = {}
				# if a label occured before, assign its former compressed label,
				# else assign the number of labels occured + 1 as the compressed label.
				for value in set_unique:
					if value in all_set_compressed.keys():
						set_compressed[value] = all_set_compressed[value]
					else:
						set_compressed[value] = str(num_of_labels_occured + 1)
						num_of_labels_occured += 1

				all_set_compressed.update(set_compressed)

				# relabel nodes
				for idx, node in enumerate(G.nodes()):
					G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

				# get the set of compressed labels
				labels_comp = list(nx.get_node_attributes(G, 'lt').values())
	#			all_labels_ori.update(labels_comp)
				all_num_of_each_label.append(dict(Counter(labels_comp)))

			# Compute subtree kernel with h iterations and add it to the final kernel
			kernel = self._compute_kernel_itr(kernel, all_num_of_each_label)

		return kernel


	def _wrapper_pairwise(self, itr):
		i = itr[0]
		j = itr[1]
		return i, j, self.pairwise_kernel(G_gn[i], G_gn[j])


	def _compute_kernel_itr(self, kernel, all_num_of_each_label):
		labels = set(list(all_num_of_each_label[0].keys()) +
			   list(all_num_of_each_label[1].keys()))
		vector1 = np.array([(all_num_of_each_label[0][label]
							if (label in all_num_of_each_label[0].keys()) else 0)
							for label in labels])
		vector2 = np.array([(all_num_of_each_label[1][label]
							if (label in all_num_of_each_label[1].keys()) else 0)
							for label in labels])
		kernel += np.dot(vector1, vector2)
		return kernel


	def _subtree_kernel_do_nl(self, Gn):
		"""Compute Weisfeiler-Lehman kernels between graphs with node labels.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		gram_matrix = np.zeros((len(Gn), len(Gn)))

		# initial for height = 0
		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

		# for each graph
		if self.verbose >= 2:
			iterator = get_iters(Gn, desc='Setting all labels into a tuple')
		else:
			iterator = Gn
		for G in iterator:
			# set all labels into a tuple. # @todo: remove this original labels or not?
			for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
				G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
			# get the set of original labels
			labels_ori = list(nx.get_node_attributes(G, 'lt').values())
			# number of occurence of each label in G
			all_num_of_each_label.append(dict(Counter(labels_ori)))

		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
		self._compute_gram_itr(gram_matrix, all_num_of_each_label)

		# iterate each height
		for h in range(1, self.height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
	#		all_labels_ori = set() # all unique orignal labels in all graphs in this iteration
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
# 			if self.verbose >= 2:
# 				iterator = get_iters(enumerate(Gn), desc='Going through iteration ' + str(h), length=len(Gn))
# 			else:
# 				iterator = enumerate(Gn)
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)

		return gram_matrix


	def _subtree_kernel_do_el(self, Gn):
		"""Compute Weisfeiler-Lehman kernels between graphs with edge labels.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		gram_matrix = np.zeros((len(Gn), len(Gn)))

		# initial for height = 0
		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
		iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
		for i, j in iterator:
			gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
			gram_matrix[j][i] = gram_matrix[i][j]


		# if h >= 1.
		if self.height > 0:
			# Set all edge labels into a tuple. # @todo: remove this original labels or not?
			if self.verbose >= 2:
				iterator = get_iters(Gn, desc='Setting all labels into a tuple')
			else:
				iterator = Gn
			for G in iterator:
				for n1, n2, attrs in G.edges(data=True): # @todo: there may be a better way.
					G.edges[(n1, n2)]['lt'] = tuple(attrs[name] for name in self.edge_labels)

			# When h == 1, compute the kernel.
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_el(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)


		# Iterate along heights (>= 2).
		for h in range(2, self.height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)

		return gram_matrix


	def _subtree_kernel_do_labeled(self, Gn):
		"""Compute Weisfeiler-Lehman kernels between graphs with both node and
		edge labels.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		gram_matrix = np.zeros((len(Gn), len(Gn)))

		# initial for height = 0
		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

		# Set all node labels into a tuple and get # of occurence of each label.
		if self.verbose >= 2:
			iterator = get_iters(Gn, desc='Setting all node labels into a tuple')
		else:
			iterator = Gn
		for G in iterator:
			# Set all node labels into a tuple. # @todo: remove this original labels or not?
			for nd, attrs in G.nodes(data=True): # @todo: there may be a better way.
				G.nodes[nd]['lt'] = tuple(attrs[name] for name in self.node_labels)
			# Get the set of original labels.
			labels_ori = list(nx.get_node_attributes(G, 'lt').values())
			# number of occurence of each label in G
			all_num_of_each_label.append(dict(Counter(labels_ori)))

		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
		self._compute_gram_itr(gram_matrix, all_num_of_each_label)


		# if h >= 1.
		if self.height > 0:
			# Set all edge labels into a tuple. # @todo: remove this original labels or not?
			if self.verbose >= 2:
				iterator = get_iters(Gn, desc='Setting all edge labels into a tuple')
			else:
				iterator = Gn
			for G in iterator:
				for n1, n2, attrs in G.edges(data=True): # @todo: there may be a better way.
					G.edges[(n1, n2)]['lt'] = tuple(attrs[name] for name in self.edge_labels)

			# When h == 1, compute the kernel.
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_labeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)


		# Iterate along heights.
		for h in range(2, self.height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)

		return gram_matrix


	def _subtree_kernel_do_unlabeled(self, Gn):
		"""Compute Weisfeiler-Lehman kernels between graphs without labels.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		gram_matrix = np.zeros((len(Gn), len(Gn)))

		# initial for height = 0
		all_num_of_each_label = [] # number of occurence of each label in each graph in this iteration

		# Compute subtree kernel with the 0th iteration and add it to the final kernel.
		iterator = combinations_with_replacement(range(0, len(gram_matrix)), 2)
		for i, j in iterator:
			gram_matrix[i][j] += nx.number_of_nodes(Gn[i]) * nx.number_of_nodes(Gn[j])
			gram_matrix[j][i] = gram_matrix[i][j]


		# if h >= 1.
		if self.height > 0:
			# When h == 1, compute the kernel.
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_unlabeled(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)


		# Iterate along heights (>= 2).
		for h in range(2, self.height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			all_num_of_each_label = [] # number of occurence of each label in G

			# @todo: parallel this part.
			for G in Gn:
				num_of_labels_occured = self._subtree_1graph_nl(G, all_set_compressed, all_num_of_each_label, num_of_labels_occured)

			# Compute subtree kernel with h iterations and add it to the final kernel.
			self._compute_gram_itr(gram_matrix, all_num_of_each_label)

		return gram_matrix


	def _subtree_1graph_nl(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
		all_multisets = []
		for node, attrs in G.nodes(data=True):
			# Multiset-label determination.
			multiset = [G.nodes[neighbors]['lt'] for neighbors in G[node]]
			# sorting each multiset
			multiset.sort()
			multiset = [attrs['lt']] + multiset # add the prefix
			all_multisets.append(tuple(multiset))

		# label compression
		set_unique = list(set(all_multisets)) # set of unique multiset labels
		# a dictionary mapping original labels to new ones.
		set_compressed = {}
		# If a label occured before, assign its former compressed label;
		# otherwise assign the number of labels occured + 1 as the
		# compressed label.
		for value in set_unique:
			if value in all_set_compressed.keys(): # @todo: put keys() function out of for loop?
				set_compressed[value] = all_set_compressed[value]
			else:
				set_compressed[value] = str(num_of_labels_occured + 1) # @todo: remove str? and what if num_of_labels_occured is extremely big.
				num_of_labels_occured += 1

		all_set_compressed.update(set_compressed)

		# Relabel nodes.
		for idx, node in enumerate(G.nodes()):
			G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

		# Get the set of compressed labels.
		labels_comp = list(nx.get_node_attributes(G, 'lt').values())
		all_num_of_each_label.append(dict(Counter(labels_comp)))

		return num_of_labels_occured


	def _subtree_1graph_el(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
		all_multisets = []
# 		for node, attrs in G.nodes(data=True):
		for node in G.nodes():
			# Multiset-label determination.
			multiset = [G.edges[(node, neighbors)]['lt'] for neighbors in G[node]] # @todo: check reference for this.
			# sorting each multiset
			multiset.sort()
# 			multiset = [attrs['lt']] + multiset # add the prefix
			all_multisets.append(tuple(multiset))

		# label compression
		set_unique = list(set(all_multisets)) # set of unique multiset labels
		# a dictionary mapping original labels to new ones.
		set_compressed = {}
		# If a label occured before, assign its former compressed label;
		# otherwise assign the number of labels occured + 1 as the
		# compressed label.
		for value in set_unique:
			if value in all_set_compressed.keys(): # @todo: put keys() function out of for loop?
				set_compressed[value] = all_set_compressed[value]
			else:
				set_compressed[value] = str(num_of_labels_occured + 1) # @todo: remove str?
				num_of_labels_occured += 1

		all_set_compressed.update(set_compressed)

		# Relabel nodes.
		for idx, node in enumerate(G.nodes()):
			G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

		# Get the set of compressed labels.
		labels_comp = list(nx.get_node_attributes(G, 'lt').values()) # @todo: maybe can be faster.
		all_num_of_each_label.append(dict(Counter(labels_comp)))

		return num_of_labels_occured


	def _subtree_1graph_labeled(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
		all_multisets = []
		for node, attrs in G.nodes(data=True):
			# Multiset-label determination.
			multiset = [tuple((G.edges[(node, neighbors)]['lt'], G.nodes[neighbors]['lt'])) for neighbors in G[node]] # @todo: check reference for this.
			# sorting each multiset
			multiset.sort()
			multiset = [attrs['lt']] + multiset # add the prefix
			all_multisets.append(tuple(multiset))

		# label compression
		set_unique = list(set(all_multisets)) # set of unique multiset labels
		# a dictionary mapping original labels to new ones.
		set_compressed = {}
		# If a label occured before, assign its former compressed label;
		# otherwise assign the number of labels occured + 1 as the
		# compressed label.
		for value in set_unique:
			if value in all_set_compressed.keys(): # @todo: put keys() function out of for loop?
				set_compressed[value] = all_set_compressed[value]
			else:
				set_compressed[value] = str(num_of_labels_occured + 1) # @todo: remove str?
				num_of_labels_occured += 1

		all_set_compressed.update(set_compressed)

		# Relabel nodes.
		for idx, node in enumerate(G.nodes()):
			G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

		# Get the set of compressed labels.
		labels_comp = list(nx.get_node_attributes(G, 'lt').values())
		all_num_of_each_label.append(dict(Counter(labels_comp)))

		return num_of_labels_occured


	def _subtree_1graph_unlabeled(self, G, all_set_compressed, all_num_of_each_label, num_of_labels_occured):
# 		all_multisets = []
# 		for node, attrs in G.nodes(data=True): # @todo: it can be better.
# 			# Multiset-label determination.
# 			multiset = [0 for neighbors in G[node]]
# 			# sorting each multiset
# 			multiset.sort()
# 			multiset = [0] + multiset # add the prefix
# 			all_multisets.append(tuple(multiset))
		all_multisets = [len(G[node]) for node in G.nodes()]

		# label compression
		set_unique = list(set(all_multisets)) # set of unique multiset labels
		# a dictionary mapping original labels to new ones.
		set_compressed = {}
		# If a label occured before, assign its former compressed label;
		# otherwise assign the number of labels occured + 1 as the
		# compressed label.
		for value in set_unique:
			if value in all_set_compressed.keys(): # @todo: put keys() function out of for loop?
				set_compressed[value] = all_set_compressed[value]
			else:
				set_compressed[value] = str(num_of_labels_occured + 1) # @todo: remove str?
				num_of_labels_occured += 1

		all_set_compressed.update(set_compressed)

		# Relabel nodes.
		for idx, node in enumerate(G.nodes()):
			G.nodes[node]['lt'] = set_compressed[all_multisets[idx]]

		# Get the set of compressed labels.
		labels_comp = list(nx.get_node_attributes(G, 'lt').values())
		all_num_of_each_label.append(dict(Counter(labels_comp)))

		return num_of_labels_occured


	def _compute_gram_itr(self, gram_matrix, all_num_of_each_label):
		"""Compute Gram matrix using the base kernel.
		"""
#		if self.parallel == 'imap_unordered':
#			# compute kernels.
#			def init_worker(alllabels_toshare):
#				global G_alllabels
#				G_alllabels = alllabels_toshare
#			do_partial = partial(self._wrapper_compute_subtree_kernel, gram_matrix)
#			parallel_gm(do_partial, gram_matrix, Gn, init_worker=init_worker,
#						glbv=(all_num_of_each_label,), n_jobs=self.n_jobs, verbose=self.verbose)
#		elif self.parallel is None:
		itr = combinations_with_replacement(range(0, len(gram_matrix)), 2)
		len_itr = int(len(gram_matrix) * (len(gram_matrix) + 1) / 2)
		iterator = get_iters(itr, desc='Computing Gram matrix for this iteration', file=sys.stdout, length=len_itr, verbose=(self.verbose >= 2))
		for i, j in iterator:
# 		for i in iterator:
# 			for j in range(i, len(gram_matrix)):
			gram_matrix[i][j] += self._compute_subtree_kernel(all_num_of_each_label[i],
				   all_num_of_each_label[j])
			gram_matrix[j][i] = gram_matrix[i][j]


	def _compute_subtree_kernel(self, num_of_each_label1, num_of_each_label2):
		"""Compute the subtree kernel.
		"""
		labels = set(list(num_of_each_label1.keys()) + list(num_of_each_label2.keys()))
		vector1 = np.array([(num_of_each_label1[label]
							if (label in num_of_each_label1.keys()) else 0)
							for label in labels])
		vector2 = np.array([(num_of_each_label2[label]
							if (label in num_of_each_label2.keys()) else 0)
							for label in labels])
		kernel = np.dot(vector1, vector2)
		return kernel


#	def _wrapper_compute_subtree_kernel(self, gram_matrix, itr):
#		i = itr[0]
#		j = itr[1]
#		return i, j, self._compute_subtree_kernel(G_alllabels[i], G_alllabels[j], gram_matrix[i][j])


	def _wl_spkernel_do(Gn, node_label, edge_label, height):
		"""Compute Weisfeiler-Lehman shortest path kernels between graphs.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.
		node_label : string
			node attribute used as label.
		edge_label : string
			edge attribute used as label.
		height : int
			subtree height.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		pass
		from gklearn.utils.utils import getSPGraph

		# init.
		height = int(height)
		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel

		Gn = [ getSPGraph(G, edge_weight = edge_label) for G in Gn ] # get shortest path graphs of Gn

		# initial for height = 0
		for i in range(0, len(Gn)):
			for j in range(i, len(Gn)):
				for e1 in Gn[i].edges(data = True):
					for e2 in Gn[j].edges(data = True):
						if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
							gram_matrix[i][j] += 1
				gram_matrix[j][i] = gram_matrix[i][j]

		# iterate each height
		for h in range(1, height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			for G in Gn: # for each graph
				set_multisets = []
				for node in G.nodes(data = True):
					# Multiset-label determination.
					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
					# sorting each multiset
					multiset.sort()
					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
					set_multisets.append(multiset)

				# label compression
				set_unique = list(set(set_multisets)) # set of unique multiset labels
				# a dictionary mapping original labels to new ones.
				set_compressed = {}
				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
				for value in set_unique:
					if value in all_set_compressed.keys():
						set_compressed[value] = all_set_compressed[value]
					else:
						set_compressed[value] = str(num_of_labels_occured + 1)
						num_of_labels_occured += 1

				all_set_compressed.update(set_compressed)

				# relabel nodes
				for node in G.nodes(data = True):
					node[1][node_label] = set_compressed[set_multisets[node[0]]]

			# Compute subtree kernel with h iterations and add it to the final kernel
			for i in range(0, len(Gn)):
				for j in range(i, len(Gn)):
					for e1 in Gn[i].edges(data = True):
						for e2 in Gn[j].edges(data = True):
							if e1[2]['cost'] != 0 and e1[2]['cost'] == e2[2]['cost'] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
								gram_matrix[i][j] += 1
					gram_matrix[j][i] = gram_matrix[i][j]

		return gram_matrix


	def _wl_edgekernel_do(Gn, node_label, edge_label, height):
		"""Compute Weisfeiler-Lehman edge kernels between graphs.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.
		node_label : string
			node attribute used as label.
		edge_label : string
			edge attribute used as label.
		height : int
			subtree height.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		pass
		# init.
		height = int(height)
		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel

		# initial for height = 0
		for i in range(0, len(Gn)):
			for j in range(i, len(Gn)):
				for e1 in Gn[i].edges(data = True):
					for e2 in Gn[j].edges(data = True):
						if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
							gram_matrix[i][j] += 1
				gram_matrix[j][i] = gram_matrix[i][j]

		# iterate each height
		for h in range(1, height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			for G in Gn: # for each graph
				set_multisets = []
				for node in G.nodes(data = True):
					# Multiset-label determination.
					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
					# sorting each multiset
					multiset.sort()
					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
					set_multisets.append(multiset)

				# label compression
				set_unique = list(set(set_multisets)) # set of unique multiset labels
				# a dictionary mapping original labels to new ones.
				set_compressed = {}
				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
				for value in set_unique:
					if value in all_set_compressed.keys():
						set_compressed[value] = all_set_compressed[value]
					else:
						set_compressed[value] = str(num_of_labels_occured + 1)
						num_of_labels_occured += 1

				all_set_compressed.update(set_compressed)

				# relabel nodes
				for node in G.nodes(data = True):
					node[1][node_label] = set_compressed[set_multisets[node[0]]]

			# Compute subtree kernel with h iterations and add it to the final kernel
			for i in range(0, len(Gn)):
				for j in range(i, len(Gn)):
					for e1 in Gn[i].edges(data = True):
						for e2 in Gn[j].edges(data = True):
							if e1[2][edge_label] == e2[2][edge_label] and ((e1[0] == e2[0] and e1[1] == e2[1]) or (e1[0] == e2[1] and e1[1] == e2[0])):
								gram_matrix[i][j] += 1
					gram_matrix[j][i] = gram_matrix[i][j]

		return gram_matrix


	def _wl_userkernel_do(Gn, node_label, edge_label, height, base_kernel):
		"""Compute Weisfeiler-Lehman kernels based on user-defined kernel between graphs.

		Parameters
		----------
		Gn : List of NetworkX graph
			List of graphs between which the kernels are computed.
		node_label : string
			node attribute used as label.
		edge_label : string
			edge attribute used as label.
		height : int
			subtree height.
		base_kernel : string
			Name of the base kernel function used in each iteration of WL kernel. This function returns a Numpy matrix, each element of which is the user-defined Weisfeiler-Lehman kernel between 2 praphs.

		Return
		------
		gram_matrix : Numpy matrix
			Kernel matrix, each element of which is the Weisfeiler-Lehman kernel between 2 praphs.
		"""
		pass
		# init.
		height = int(height)
		gram_matrix = np.zeros((len(Gn), len(Gn))) # init kernel

		# initial for height = 0
		gram_matrix = base_kernel(Gn, node_label, edge_label)

		# iterate each height
		for h in range(1, height + 1):
			all_set_compressed = {} # a dictionary mapping original labels to new ones in all graphs in this iteration
			num_of_labels_occured = 0 # number of the set of letters that occur before as node labels at least once in all graphs
			for G in Gn: # for each graph
				set_multisets = []
				for node in G.nodes(data = True):
					# Multiset-label determination.
					multiset = [ G.node[neighbors][node_label] for neighbors in G[node[0]] ]
					# sorting each multiset
					multiset.sort()
					multiset = node[1][node_label] + ''.join(multiset) # concatenate to a string and add the prefix
					set_multisets.append(multiset)

				# label compression
				set_unique = list(set(set_multisets)) # set of unique multiset labels
				# a dictionary mapping original labels to new ones.
				set_compressed = {}
				# if a label occured before, assign its former compressed label, else assign the number of labels occured + 1 as the compressed label
				for value in set_unique:
					if value in all_set_compressed.keys():
						set_compressed[value] = all_set_compressed[value]
					else:
						set_compressed[value] = str(num_of_labels_occured + 1)
						num_of_labels_occured += 1

				all_set_compressed.update(set_compressed)

				# relabel nodes
				for node in G.nodes(data = True):
					node[1][node_label] = set_compressed[set_multisets[node[0]]]

			# Compute kernel with h iterations and add it to the final kernel
			gram_matrix += base_kernel(Gn, node_label, edge_label)

		return gram_matrix


	def _add_dummy_node_labels(self, Gn):
		if len(self.node_labels) == 0 or (len(self.node_labels) == 1 and self.node_labels[0] == SpecialLabel.DUMMY):
			for i in range(len(Gn)):
				nx.set_node_attributes(Gn[i], '0', SpecialLabel.DUMMY)
			self.node_labels = [SpecialLabel.DUMMY]


class WLSubtree(WeisfeilerLehman):

	def __init__(self, **kwargs):
		kwargs['base_kernel'] = 'subtree'
		super().__init__(**kwargs)