OpenI
/
graphkit-learn

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 26 18:27:22 2020

@author: ljia
"""
import numpy as np
import time
import random
import multiprocessing
import networkx as nx
import cvxpy as cp
from gklearn.preimage import PreimageGenerator
from gklearn.preimage.utils import compute_k_dis
from gklearn.ged.util import compute_geds, ged_options_to_string
from gklearn.ged.median import MedianGraphEstimator
from gklearn.ged.median import constant_node_costs,mge_options_to_string
from gklearn.gedlib import librariesImport, gedlibpy
from gklearn.utils import Timer
# from gklearn.utils.dataset import Dataset

class MedianPreimageGenerator(PreimageGenerator):
	
	def __init__(self, dataset=None):
		PreimageGenerator.__init__(self, dataset=dataset)
		# arguments to set.
		self.__mge = None
		self.__ged_options = {}
		self.__mge_options = {}
		self.__fit_method = 'k-graphs'
		self.__init_ecc = None
		self.__parallel = True
		self.__n_jobs = multiprocessing.cpu_count()
		self.__ds_name = None
		self.__time_limit_in_sec = 0
		self.__max_itrs = 100
		self.__max_itrs_without_update = 3
		self.__epsilon_residual = 0.01
		self.__epsilon_ec = 0.1
		# values to compute.
		self.__runtime_optimize_ec = None
		self.__runtime_generate_preimage = None
		self.__runtime_total = None
		self.__set_median = None
		self.__gen_median = None
		self.__best_from_dataset = None
		self.__sod_set_median = None
		self.__sod_gen_median = None
		self.__k_dis_set_median = None
		self.__k_dis_gen_median = None
		self.__k_dis_dataset = None
		self.__itrs = 0
		self.__converged = False
		self.__num_updates_ecc = 0
		# values that can be set or to be computed.
		self.__edit_cost_constants = []
		self.__gram_matrix_unnorm = None
		self.__runtime_precompute_gm = None

		
	def set_options(self, **kwargs):
		self._kernel_options = kwargs.get('kernel_options', {})
		self._graph_kernel = kwargs.get('graph_kernel', None)
		self._verbose = kwargs.get('verbose', 2)
		self.__ged_options = kwargs.get('ged_options', {})
		self.__mge_options = kwargs.get('mge_options', {})
		self.__fit_method = kwargs.get('fit_method', 'k-graphs')
		self.__init_ecc = kwargs.get('init_ecc', None)
		self.__edit_cost_constants = kwargs.get('edit_cost_constants', [])
		self.__parallel = kwargs.get('parallel', True)
		self.__n_jobs = kwargs.get('n_jobs', multiprocessing.cpu_count())
		self.__ds_name = kwargs.get('ds_name', None)
		self.__time_limit_in_sec = kwargs.get('time_limit_in_sec', 0)
		self.__max_itrs = kwargs.get('max_itrs', 100)
		self.__max_itrs_without_update = kwargs.get('max_itrs_without_update', 3)
		self.__epsilon_residual = kwargs.get('epsilon_residual', 0.01)
		self.__epsilon_ec = kwargs.get('epsilon_ec', 0.1)
		self.__gram_matrix_unnorm = kwargs.get('gram_matrix_unnorm', None)
		self.__runtime_precompute_gm = kwargs.get('runtime_precompute_gm', None)
		
		
	def run(self):
		self.__set_graph_kernel_by_name()
		
		# record start time.
		start = time.time()
		
		# 1. precompute gram matrix.
		if self.__gram_matrix_unnorm is None:
			gram_matrix, run_time = self._graph_kernel.compute(self._dataset.graphs, **self._kernel_options)
			self.__gram_matrix_unnorm = self._graph_kernel.gram_matrix_unnorm
			end_precompute_gm = time.time()
			self.__runtime_precompute_gm = end_precompute_gm - start
		else:
			if self.__runtime_precompute_gm is None:
				raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
			self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
			if self._kernel_options['normalize']:
				self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
			else:
				self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
			end_precompute_gm = time.time()
			start -= self.__runtime_precompute_gm
			
		if self.__fit_method != 'k-graphs' and self.__fit_method != 'whole-dataset':
			start = time.time()
			self.__runtime_precompute_gm = 0
			end_precompute_gm = start
		
		# 2. optimize edit cost constants. 
		self.__optimize_edit_cost_constants()
		end_optimize_ec = time.time()
		self.__runtime_optimize_ec = end_optimize_ec - end_precompute_gm
		
		# 3. compute set median and gen median using optimized edit costs.
		if self._verbose >= 2:
			print('\nstart computing set median and gen median using optimized edit costs...\n')
# 		group_fnames = [Gn[g].graph['filename'] for g in group_min]
		self.__generate_preimage_iam()
		end_generate_preimage = time.time()
		self.__runtime_generate_preimage = end_generate_preimage - end_optimize_ec
		self.__runtime_total = end_generate_preimage - start
		if self._verbose >= 2:
			print('medians computed.')
			print('SOD of the set median: ', self.__sod_set_median)
			print('SOD of the generalized median: ', self.__sod_gen_median)
			
		# 4. compute kernel distances to the true median.
		if self._verbose >= 2:
			print('\nstart computing distances to true median....\n')
# 		Gn_median = [Gn[g].copy() for g in group_min]
		self.__compute_distances_to_true_median()
# 		dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = 
# 		idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
# 		print('index min dis_k_gi:', idx_dis_k_gi_min)
# 		print('sod_sm:', sod_sm)
# 		print('sod_gm:', sod_gm)

		# 5. print out results.
		if self._verbose:
			print()
			print('================================================================================')
			print('Finished generalization of preimages.')
			print('--------------------------------------------------------------------------------')
			print('The optimized edit cost constants:', self.__edit_cost_constants)
			print('SOD of the set median:', self.__sod_set_median)
			print('SOD of the generalized median:', self.__sod_gen_median)
			print('Distance in kernel space for set median:', self.__k_dis_set_median)
			print('Distance in kernel space for generalized median:', self.__k_dis_gen_median)
			print('Minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
			print('Time to pre-compute Gram matrix:', self.__runtime_precompute_gm)
			print('Time to optimize edit costs:', self.__runtime_optimize_ec)
			print('Time to generate pre-images:', self.__runtime_generate_preimage)
			print('Total time:', self.__runtime_total)
			print('Total number of iterations for optimizing:', self.__itrs)
			print('Total number of updating edit costs:', self.__num_updates_ecc)
			print('Is optimization of edit costs converged:', self.__converged)
			print('================================================================================')
			print()
			
	# collect return values.
# 	return (sod_sm, sod_gm), \
# 		   (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
# 		   (time_fitting, time_generating)


	def get_results(self):
		results = {}
		results['edit_cost_constants'] = self.__edit_cost_constants
		results['runtime_precompute_gm'] = self.__runtime_precompute_gm
		results['runtime_optimize_ec'] = self.__runtime_optimize_ec
		results['runtime_generate_preimage'] = self.__runtime_generate_preimage
		results['runtime_total'] = self.__runtime_total
		results['sod_set_median'] = self.__sod_set_median
		results['sod_gen_median'] = self.__sod_gen_median
		results['k_dis_set_median'] = self.__k_dis_set_median
		results['k_dis_gen_median'] = self.__k_dis_gen_median
		results['k_dis_dataset'] = self.__k_dis_dataset
		results['itrs'] = self.__itrs
		results['converged'] = self.__converged
		results['num_updates_ecc'] = self.__num_updates_ecc
		return results

		
	def __optimize_edit_cost_constants(self):
		"""fit edit cost constants.	
		"""
		if self.__fit_method == 'random': # random
			if self.__ged_options['edit_cost'] == 'LETTER':
				self.__edit_cost_constants = random.sample(range(1, 10), 3)
				self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants]
			elif self.__ged_options['edit_cost'] == 'LETTER2':
				random.seed(time.time())
				self.__edit_cost_constants = random.sample(range(1, 10), 5)
	#			self.__edit_cost_constants = [item * 0.1 for item in self.__edit_cost_constants]
			elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
				self.__edit_cost_constants = random.sample(range(1, 10), 6)
				if self._dataset.node_attrs == []:
					self.__edit_cost_constants[2] = 0
				if self._dataset.edge_attrs == []:
					self.__edit_cost_constants[5] = 0
			else:
				self.__edit_cost_constants = random.sample(range(1, 10), 6)
			if self._verbose >= 2:
				print('edit cost constants used:', self.__edit_cost_constants)
		elif self.__fit_method == 'expert': # expert
			if self.__init_ecc is None:
				if self.__ged_options['edit_cost'] == 'LETTER':
					self.__edit_cost_constants = [0.9, 1.7, 0.75] 
				elif self.__ged_options['edit_cost'] == 'LETTER2':
					self.__edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
				else:
					self.__edit_cost_constants = [3, 3, 1, 3, 3, 1] 
			else:
				self.__edit_cost_constants = self.__init_ecc
		elif self.__fit_method == 'k-graphs':
			if self.__init_ecc is None:
				if self.__ged_options['edit_cost'] == 'LETTER':
					self.__init_ecc = [0.9, 1.7, 0.75] 
				elif self.__ged_options['edit_cost'] == 'LETTER2':
					self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
				elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
					self.__init_ecc = [0, 0, 1, 1, 1, 0]
					if self._dataset.node_attrs == []:
						self.__init_ecc[2] = 0
					if self._dataset.edge_attrs == []:
						self.__init_ecc[5] = 0
				else:
					self.__init_ecc = [3, 3, 1, 3, 3, 1] 
			# optimize on the k-graph subset.
			self.__optimize_ecc_by_kernel_distances()
		elif self.__fit_method == 'whole-dataset':
			if self.__init_ecc is None:
				if self.__ged_options['edit_cost'] == 'LETTER':
					self.__init_ecc = [0.9, 1.7, 0.75] 
				elif self.__ged_options['edit_cost'] == 'LETTER2':
					self.__init_ecc = [0.675, 0.675, 0.75, 0.425, 0.425]
				else:
					self.__init_ecc = [3, 3, 1, 3, 3, 1] 
			# optimizeon the whole set.
			self.__optimize_ecc_by_kernel_distances()
		elif self.__fit_method == 'precomputed':
			pass
		
		
	def __optimize_ecc_by_kernel_distances(self):		
		# compute distances in feature space.
		dis_k_mat, _, _, _ = self._graph_kernel.compute_distance_matrix()
		dis_k_vec = []
		for i in range(len(dis_k_mat)):
	#		for j in range(i, len(dis_k_mat)):
			for j in range(i + 1, len(dis_k_mat)):
				dis_k_vec.append(dis_k_mat[i, j])
		dis_k_vec = np.array(dis_k_vec)
		
		# init ged.
		if self._verbose >= 2:
			print('\ninitial:')
		time0 = time.time()
		graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
		self.__edit_cost_constants = self.__init_ecc
		options = self.__ged_options.copy()
		options['edit_cost_constants'] = self.__edit_cost_constants # @todo
		options['node_labels'] = self._dataset.node_labels
		options['edge_labels'] = self._dataset.edge_labels
		options['node_attrs'] = self._dataset.node_attrs
		options['edge_attrs'] = self._dataset.edge_attrs
		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
		residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]	
		time_list = [time.time() - time0]
		edit_cost_list = [self.__init_ecc]  
		nb_cost_mat = np.array(n_edit_operations)
		nb_cost_mat_list = [nb_cost_mat]
		if self._verbose >= 2:
			print('Current edit cost constants:', self.__edit_cost_constants)
			print('Residual list:', residual_list)
		
		# run iteration from initial edit costs.
		self.__converged = False
		itrs_without_update = 0
		self.__itrs = 0
		self.__num_updates_ecc = 0
		timer = Timer(self.__time_limit_in_sec)
		while not self.__termination_criterion_met(self.__converged, timer, self.__itrs, itrs_without_update):
			if self._verbose >= 2:
				print('\niteration', self.__itrs + 1)
			time0 = time.time()
			# "fit" geds to distances in feature space by tuning edit costs using theLeast Squares Method.
# 			np.savez('results/xp_fit_method/fit_data_debug' + str(self.__itrs) + '.gm', 
# 					 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
# 					 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
# 					 ged_mat=ged_mat)
			self.__edit_cost_constants, _ = self.__update_ecc(nb_cost_mat, dis_k_vec)
			for i in range(len(self.__edit_cost_constants)):
				if -1e-9 <= self.__edit_cost_constants[i] <= 1e-9:
					self.__edit_cost_constants[i] = 0
				if self.__edit_cost_constants[i] < 0:
					raise ValueError('The edit cost is negative.')
	#		for i in range(len(self.__edit_cost_constants)):
	#			if self.__edit_cost_constants[i] < 0:
	#				self.__edit_cost_constants[i] = 0
	
			# compute new GEDs and numbers of edit operations.
			options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
			options['edit_cost_constants'] = self.__edit_cost_constants # @todo
			options['node_labels'] = self._dataset.node_labels
			options['edge_labels'] = self._dataset.edge_labels
			options['node_attrs'] = self._dataset.node_attrs
			options['edge_attrs'] = self._dataset.edge_attrs
			ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
			residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
			time_list.append(time.time() - time0)
			edit_cost_list.append(self.__edit_cost_constants)
			nb_cost_mat = np.array(n_edit_operations)
			nb_cost_mat_list.append(nb_cost_mat)	
				
			# check convergency.
			ec_changed = False
			for i, cost in enumerate(self.__edit_cost_constants):
				if cost == 0:
 					if edit_cost_list[-2][i] > self.__epsilon_ec:
						 ec_changed = True
						 break
				elif abs(cost - edit_cost_list[-2][i]) / cost > self.__epsilon_ec:
 					ec_changed = True
 					break
# 				if abs(cost - edit_cost_list[-2][i]) > self.__epsilon_ec:
#  					ec_changed = True
#  					break
			residual_changed = False
			if residual_list[-1] == 0:
				if residual_list[-2] > self.__epsilon_residual:
					residual_changed = True
			elif abs(residual_list[-1] - residual_list[-2]) / residual_list[-1] > self.__epsilon_residual:
				residual_changed = True
			self.__converged = not (ec_changed or residual_changed)
			if self.__converged:
				itrs_without_update += 1
			else:
				itrs_without_update = 0
				self.__num_updates_ecc += 1
				
			# print current states.
			if self._verbose >= 2:
				print()
				print('-------------------------------------------------------------------------')
				print('States of iteration', self.__itrs + 1)
				print('-------------------------------------------------------------------------')
# 				print('Time spend:', self.__runtime_optimize_ec)
				print('Total number of iterations for optimizing:', self.__itrs + 1)
				print('Total number of updating edit costs:', self.__num_updates_ecc)
				print('Was optimization of edit costs converged:', self.__converged)
				print('Did edit costs change:', ec_changed)
				print('Did residual change:', residual_changed)
				print('Iterations without update:', itrs_without_update)
				print('Current edit cost constants:', self.__edit_cost_constants)
				print('Residual list:', residual_list)
				print('-------------------------------------------------------------------------')
			
			self.__itrs += 1


	def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
		if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
# 			if self.__state == AlgorithmState.TERMINATED:
# 				self.__state = AlgorithmState.INITIALIZED
			return True
		return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)


	def __update_ecc(self, nb_cost_mat, dis_k_vec, rw_constraints='inequality'):
	#	if self.__ds_name == 'Letter-high':
		if self.__ged_options['edit_cost'] == 'LETTER':			
			pass
	#		# method 1: set alpha automatically, just tune c_vir and c_eir by 
	#		# LMS using cvxpy.
	#		alpha = 0.5
	#		coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
	##		if np.count_nonzero(nb_cost_mat[:,4]) == 0:
	##			alpha = 0.75
	##		else:
	##			alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
	##		alpha = alpha * 0.99
	#		param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
	#		param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
	#		nb_cost_mat_new = np.column_stack((param_vir, param_eir))
	#		dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
	#		
	#		x = cp.Variable(nb_cost_mat_new.shape[1])
	#		cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
	#		constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
	#		prob = cp.Problem(cp.Minimize(cost), constraints)
	#		prob.solve()
	#		edit_costs_new = x.value
	#		edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
	#		residual = np.sqrt(prob.value)
		
	#		# method 2: tune c_vir, c_eir and alpha by nonlinear programming by 
	#		# scipy.optimize.minimize.
	#		w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
	#		w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
	#		w2 = nb_cost_mat[:,3]
	#		w3 = dis_k_vec
	#		func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
	#							 + w2 * x[2] - w3 * x[3]) ** 2)
	#		bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
	#		res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
	#		edit_costs_new = res.x[0:3]
	#		residual = res.fun
		
		# method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
		
		
	#		# method 4: tune c_vir, c_eir and alpha by QP function
	#		# scipy.optimize.least_squares. An initial guess is required.
	#		w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
	#		w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
	#		w2 = nb_cost_mat[:,3]
	#		w3 = dis_k_vec
	#		func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
	#							 + w2 * x[2] - w3 * x[3]) ** 2
	#		res = optimize.root(func, [0.9, 1.7, 0.75, 100])
	#		edit_costs_new = res.x
	#		residual = None
		elif self.__ged_options['edit_cost'] == 'LETTER2':
	#			# 1. if c_vi != c_vr, c_ei != c_er.
	#			nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
	#			x = cp.Variable(nb_cost_mat_new.shape[1])
	#			cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
	##			# 1.1 no constraints.
	##			constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
	#			# 1.2 c_vs <= c_vi + c_vr.
	#			constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
	#						   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]			
	##			# 2. if c_vi == c_vr, c_ei == c_er.
	##			nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
	##			nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
	##			nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
	##			x = cp.Variable(nb_cost_mat_new.shape[1])
	##			cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
	##			# 2.1 no constraints.
	##			constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
	###			# 2.2 c_vs <= c_vi + c_vr.
	###			constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
	###						   np.array([2.0, -1.0, 0.0]).T@x >= 0.0]	 
	#			
	#			prob = cp.Problem(cp.Minimize(cost_fun), constraints)
	#			prob.solve()
	#			edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
	#			edit_costs_new = np.array(edit_costs_new)
	#			residual = np.sqrt(prob.value)
			if rw_constraints == 'inequality':
				# c_vs <= c_vi + c_vr.
				nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
				x = cp.Variable(nb_cost_mat_new.shape[1])
				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
				constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
							   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
				self.__execute_cvx(prob)
				edit_costs_new = x.value
				residual = np.sqrt(prob.value)
			elif rw_constraints == '2constraints':
				# c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
				nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
				x = cp.Variable(nb_cost_mat_new.shape[1])
				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
				constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
							   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
							   np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
							   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
				prob.solve()
				edit_costs_new = x.value
				residual = np.sqrt(prob.value)
			elif rw_constraints == 'no-constraint':
				# no constraint.
				nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
				x = cp.Variable(nb_cost_mat_new.shape[1])
				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
				constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
				prob.solve()
				edit_costs_new = x.value
				residual = np.sqrt(prob.value)
	#			elif method == 'inequality_modified':
	#				# c_vs <= c_vi + c_vr.
	#				nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
	#				x = cp.Variable(nb_cost_mat_new.shape[1])
	#				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
	#				constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
	#							   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
	#				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
	#				prob.solve()
	#				# use same costs for insertion and removal rather than the fitted costs.
	#				edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
	#				edit_costs_new = np.array(edit_costs_new)
	#				residual = np.sqrt(prob.value)
		elif self.__ged_options['edit_cost'] == 'NON_SYMBOLIC':
			is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
			is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
			
			if self.__ds_name == 'SYNTHETICnew':
	#			nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
				nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
				x = cp.Variable(nb_cost_mat_new.shape[1])
				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
	#			constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
	#						   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
	#			constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
				constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
					   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
				prob.solve()
	#			print(x.value)
				edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, 
												 np.array([0.0])))
				residual = np.sqrt(prob.value)
				
			elif rw_constraints == 'inequality':
				# c_vs <= c_vi + c_vr.
				if is_n_attr and is_e_attr:
					nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
					x = cp.Variable(nb_cost_mat_new.shape[1])
					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
								   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
								   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
					self.__execute_cvx(prob)
					edit_costs_new = x.value
					residual = np.sqrt(prob.value)
				elif is_n_attr and not is_e_attr:
					nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
					x = cp.Variable(nb_cost_mat_new.shape[1])
					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
								   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
					self.__execute_cvx(prob)
					edit_costs_new = np.concatenate((x.value, np.array([0.0])))
					residual = np.sqrt(prob.value)
				elif not is_n_attr and is_e_attr:
					nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
					x = cp.Variable(nb_cost_mat_new.shape[1])
					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
								   np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
					self.__execute_cvx(prob)
					edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
					residual = np.sqrt(prob.value)
				else:
					nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
					x = cp.Variable(nb_cost_mat_new.shape[1])
					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
					self.__execute_cvx(prob)
					edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
													 x.value[2:], np.array([0.0])))
					residual = np.sqrt(prob.value)
		elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled.
			x = cp.Variable(nb_cost_mat.shape[1])
			cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
			constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
						   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
						   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
			prob = cp.Problem(cp.Minimize(cost_fun), constraints)
			self.__execute_cvx(prob)
			edit_costs_new = x.value
			residual = np.sqrt(prob.value)
		else:
	#	# method 1: simple least square method.
	#	edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
	#													 rcond=None)
		
	#	# method 2: least square method with x_i >= 0.
	#	edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
		
		# method 3: solve as a quadratic program with constraints.
	#	P = np.dot(nb_cost_mat.T, nb_cost_mat)
	#	q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
	#	G = -1 * np.identity(nb_cost_mat.shape[1])
	#	h = np.array([0 for i in range(nb_cost_mat.shape[1])])
	#	A = np.array([1 for i in range(nb_cost_mat.shape[1])])
	#	b = 1
	#	x = cp.Variable(nb_cost_mat.shape[1])
	#	prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
	#					  [G@x <= h])
	#	prob.solve()
	#	edit_costs_new = x.value
	#	residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
		
	#	G = -1 * np.identity(nb_cost_mat.shape[1])
	#	h = np.array([0 for i in range(nb_cost_mat.shape[1])])
			x = cp.Variable(nb_cost_mat.shape[1])
			cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
			constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
		#				   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
						   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
						   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
			prob = cp.Problem(cp.Minimize(cost_fun), constraints)
			self.__execute_cvx(prob)
			edit_costs_new = x.value
			residual = np.sqrt(prob.value)
		
		# method 4: 
		
		return edit_costs_new, residual
	
	
	def __execute_cvx(self, prob):
		try:
			prob.solve(verbose=(self._verbose>=2))
		except MemoryError as error0:
			if self._verbose >= 2:
				print('\nUsing solver "OSQP" caused a memory error.')
				print('the original error message is\n', error0)
				print('solver status: ', prob.status)
				print('trying solver "CVXOPT" instead...\n')
			try:
				prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
			except Exception as error1:
				if self._verbose >= 2:
					print('\nAn error occured when using solver "CVXOPT".')
					print('the original error message is\n', error1)
					print('solver status: ', prob.status)
					print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
				prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
			else:
				if self._verbose >= 2:
					print('solver status: ', prob.status)					
		else:
			if self._verbose >= 2:
				print('solver status: ', prob.status)
		if self._verbose >= 2:				
			print()

	
	def __generate_preimage_iam(self):
		# Set up the ged environment.
		ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.
		# gedlibpy.restart_env()
		ged_env.set_edit_cost(self.__ged_options['edit_cost'], edit_cost_constant=self.__edit_cost_constants)
		graphs = [self.__clean_graph(g) for g in self._dataset.graphs]
		for g in graphs:
			ged_env.add_nx_graph(g, '')
		graph_ids = ged_env.get_all_graph_ids()
		set_median_id = ged_env.add_graph('set_median')
		gen_median_id = ged_env.add_graph('gen_median')
		ged_env.init(init_option=self.__ged_options['init_option'])
		
		# Set up the madian graph estimator.
		mge = MedianGraphEstimator(ged_env, constant_node_costs(self.__ged_options['edit_cost']))
		mge.set_refine_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
		options = self.__mge_options.copy()
		if not 'seed' in options:
			options['seed'] = int(round(time.time() * 1000)) # @todo: may not work correctly for possible parallel usage.
		
		# Select the GED algorithm.
		mge.set_options(mge_options_to_string(options))
		mge.set_label_names(node_labels=self._dataset.node_labels, 
					  edge_labels=self._dataset.edge_labels, 
					  node_attrs=self._dataset.node_attrs, 
					  edge_attrs=self._dataset.edge_attrs)
		mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
		mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
		
		# Run the estimator.
		mge.run(graph_ids, set_median_id, gen_median_id)
		
		# Get SODs.
		self.__sod_set_median = mge.get_sum_of_distances('initialized')
		self.__sod_gen_median = mge.get_sum_of_distances('converged')
		
		# Get median graphs.
		self.__set_median = ged_env.get_nx_graph(set_median_id)
		self.__gen_median = ged_env.get_nx_graph(gen_median_id)
		
		
	def __compute_distances_to_true_median(self):		
		# compute distance in kernel space for set median.
		kernels_to_sm, _ = self._graph_kernel.compute(self.__set_median, self._dataset.graphs, **self._kernel_options)
		kernel_sm, _ = self._graph_kernel.compute(self.__set_median, self.__set_median, **self._kernel_options)
		kernels_to_sm = [kernels_to_sm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_sm) for i in range(len(kernels_to_sm))] # normalize 
		# @todo: not correct kernel value
		gram_with_sm = np.concatenate((np.array([kernels_to_sm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
		gram_with_sm = np.concatenate((np.array([[1] + kernels_to_sm]).T, gram_with_sm), axis=1)
		self.__k_dis_set_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), 
										  [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
										  gram_with_sm, withterm3=False)
	#	print(gen_median.nodes(data=True))
	#	print(gen_median.edges(data=True))
	#	print(set_median.nodes(data=True))
	#	print(set_median.edges(data=True))
		
		# compute distance in kernel space for generalized median.
		kernels_to_gm, _ = self._graph_kernel.compute(self.__gen_median, self._dataset.graphs, **self._kernel_options)
		kernel_gm, _ = self._graph_kernel.compute(self.__gen_median, self.__gen_median, **self._kernel_options)
		kernels_to_gm = [kernels_to_gm[i] / np.sqrt(self.__gram_matrix_unnorm[i, i] * kernel_gm) for i in range(len(kernels_to_gm))] # normalize
		gram_with_gm = np.concatenate((np.array([kernels_to_gm]), np.copy(self._graph_kernel.gram_matrix)), axis=0)
		gram_with_gm = np.concatenate((np.array([[1] + kernels_to_gm]).T, gram_with_gm), axis=1)
		self.__k_dis_gen_median = compute_k_dis(0, range(1, 1+len(self._dataset.graphs)), 
										  [1 / len(self._dataset.graphs)] * len(self._dataset.graphs),
										  gram_with_gm, withterm3=False)
				
		# compute distance in kernel space for each graph in median set.
		k_dis_median_set = []
		for idx in range(len(self._dataset.graphs)):
			k_dis_median_set.append(compute_k_dis(idx+1, range(1, 1+len(self._dataset.graphs)), 
								 [1 / len(self._dataset.graphs)] * len(self._dataset.graphs), 
								 gram_with_gm, withterm3=False))
		idx_k_dis_median_set_min = np.argmin(k_dis_median_set)
		self.__k_dis_dataset = k_dis_median_set[idx_k_dis_median_set_min]
		self.__best_from_dataset = self._dataset.graphs[idx_k_dis_median_set_min].copy()
			
		if self._verbose >= 2:
			print()
			print('distance in kernel space for set median:', self.__k_dis_set_median)
			print('distance in kernel space for generalized median:', self.__k_dis_gen_median)
			print('minimum distance in kernel space for each graph in median set:', self.__k_dis_dataset)
			print('distance in kernel space for each graph in median set:', k_dis_median_set)	
		

	def __set_graph_kernel_by_name(self):
		if self.kernel_options['name'] == 'structuralspkernel':
			from gklearn.kernels import StructuralSP
			self._graph_kernel = StructuralSP(node_labels=self._dataset.node_labels,
									  edge_labels=self._dataset.edge_labels, 
									  node_attrs=self._dataset.node_attrs,
									  edge_attrs=self._dataset.edge_attrs,
									  ds_infos=self._dataset.get_dataset_infos(keys=['directed']),
									  **self._kernel_options)
			
			
# 	def __clean_graph(self, G, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
	def __clean_graph(self, G): # @todo: this may not be needed when datafile is updated.
		"""
		Cleans node and edge labels and attributes of the given graph.
		"""
		G_new = nx.Graph(**G.graph)
		for nd, attrs in G.nodes(data=True):
			G_new.add_node(str(nd)) # @todo: should we keep this as str()?
			for l_name in self._dataset.node_labels:
				G_new.nodes[str(nd)][l_name] = str(attrs[l_name])
			for a_name in self._dataset.node_attrs:
				G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
		for nd1, nd2, attrs in G.edges(data=True):
			G_new.add_edge(str(nd1), str(nd2))
			for l_name in self._dataset.edge_labels:
				G_new.edges[str(nd1), str(nd2)][l_name] = str(attrs[l_name])		
			for a_name in self._dataset.edge_attrs:
				G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])		
		return G_new
			
			
	@property
	def mge(self):
		return self.__mge
	
	@property
	def ged_options(self):
		return self.__ged_options

	@ged_options.setter
	def ged_options(self, value):
		self.__ged_options = value		

	
	@property
	def mge_options(self):
		return self.__mge_options

	@mge_options.setter
	def mge_options(self, value):
		self.__mge_options = value		


	@property
	def fit_method(self):
		return self.__fit_method

	@fit_method.setter
	def fit_method(self, value):
		self.__fit_method = value
		
		
	@property
	def init_ecc(self):
		return self.__init_ecc

	@init_ecc.setter
	def init_ecc(self, value):
		self.__init_ecc = value
		
	
	@property
	def set_median(self):
		return self.__set_median


	@property
	def gen_median(self):
		return self.__gen_median
	
	
	@property
	def best_from_dataset(self):
		return self.__best_from_dataset
	
	
	@property
	def gram_matrix_unnorm(self):
		return self.__gram_matrix_unnorm
	
	@gram_matrix_unnorm.setter
	def gram_matrix_unnorm(self, value):
		self.__gram_matrix_unnorm = value