Merge pull request #11 from jajupmochi/v0.2

V0.2
5 years ago · 8efc673bde
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ gklearn/kernels/*_sym.py
 gklearn/preimage/*
 !gklearn/preimage/*.py
 !gklearn/preimage/experiments/*.py
 __pycache__
 ##*#
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,6 +1,10 @@
 language: python
 python:
 - '3.6.9'
 - '3.0'
 - '3.1'
 - '3.2'
 - '3.3'
 - '3.4'
 - '3.5'
 - '3.6'
 - '3.7'
--- a/gklearn/ged/median/median_graph_estimator.py
+++ b/gklearn/ged/median/median_graph_estimator.py
@@ -70,6 +70,7 @@ class MedianGraphEstimator(object):
 		self.__num_increase_order = 0
 		self.__num_converged_descents = 0
 		self.__state = AlgorithmState.TERMINATED
 		self.__label_names = {}
 		if ged_env is None:
 			raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
@@ -551,6 +552,7 @@ class MedianGraphEstimator(object):
 		self.__init_type_increase_order = 'K-MEANS++'
 		self.__max_itrs_increase_order = 10
 		self.__print_to_stdout = 2
 		self.__label_names = {}
 	def __construct_initial_medians(self, graph_ids, timer, initial_medians):
@@ -666,7 +668,8 @@ class MedianGraphEstimator(object):
 			# Compute the median label and update the median.
 			if len(node_labels) > 0:
 				median_label = self.__ged_env.get_median_node_label(node_labels)
 # 				median_label = self.__ged_env.get_median_node_label(node_labels)
 				median_label = self.__get_median_node_label(node_labels)
 				if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
 					nx.set_node_attributes(median, {i: median_label})
@@ -701,7 +704,7 @@ class MedianGraphEstimator(object):
 				if median.has_edge(i, j):
 					median_label = median.edges[(i, j)]
 				if self.__labeled_edges and len(edge_labels) > 0:
 					new_median_label = self.__ged_env.median_edge_label(edge_labels)
 					new_median_label = self.__get_median_edge_label(edge_labels)
 					if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
 						median_label = new_median_label
 					for edge_label in edge_labels:
@@ -821,4 +824,170 @@ class MedianGraphEstimator(object):
 	def compute_my_cost(g, h, node_map):
 		cost = 0.0
 		for node in g.nodes:
 			cost += 0
 			cost += 0
 	def set_label_names(self, node_labels=[], edge_labels=[], node_attrs=[], edge_attrs=[]):
 		self.__label_names = {'node_labels': node_labels, 'edge_labels': edge_labels,
 						'node_attrs': node_attrs, 'edge_attrs': edge_attrs}
 	def __get_median_node_label(self, node_labels):
 		if len(self.__label_names['node_labels']) > 0:
 			return self.__get_median_label_symbolic(node_labels)
 		elif len(self.__label_names['node_attrs']) > 0:
 			return self.__get_median_label_nonsymbolic(node_labels)
 		else:
 			raise Exception('Node label names are not given.')
 	def __get_median_edge_label(self, edge_labels):
 		if len(self.__label_names['edge_labels']) > 0:
 			return self.__get_median_label_symbolic(edge_labels)
 		elif len(self.__label_names['edge_attrs']) > 0:
 			return self.__get_median_label_nonsymbolic(edge_labels)
 		else:
 			raise Exception('Edge label names are not given.')
 	def __get_median_label_symbolic(self, labels):
 		# Construct histogram.
 		hist = {}
 		for label in labels:
 			label = tuple([kv for kv in label.items()]) # @todo: this may be slow.
 			if label not in hist:
 				hist[label] = 1
 			else:
 				hist[label] += 1
 		# Return the label that appears most frequently.
 		best_count = 0
 		median_label = {}
 		for label, count in hist.items():
 			if count > best_count:
 				best_count = count
 				median_label = {kv[0]: kv[1] for kv in label}
 		return median_label
 	def __get_median_label_nonsymbolic(self, labels):
 		if len(labels) == 0:
 			return {} # @todo
 		else:
 			# Transform the labels into coordinates and compute mean label as initial solution.
 			labels_as_coords = []
 			sums = {}
 			for key, val in labels[0].items():
 				sums[key] = 0
 			for label in labels:
 				coords = {}
 				for key, val in label.items():
 					label = float(val)
 					sums[key] += label
 					coords[key] = label
 				labels_as_coords.append(coords)
 			median = {}
 			for key, val in sums.items():
 				median[key] = val / len(labels)
 			# Run main loop of Weiszfeld's Algorithm.
 			epsilon = 0.0001
 			delta = 1.0
 			num_itrs = 0
 			all_equal = False
 			while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
 				numerator = {}
 				for key, val in sums.items():
 					numerator[key] = 0
 				denominator = 0
 				for label_as_coord in labels_as_coords:
 					norm = 0
 					for key, val in label_as_coord.items():
 						norm += (val - median[key]) ** 2
 					norm += np.sqrt(norm)
 					if norm > 0:
 						for key, val in label_as_coord.items():
 							numerator[key] += val / norm
 						denominator += 1.0 / norm
 				if denominator == 0:
 					all_equal = True
 				else:
 					new_median = {}
 					delta = 0.0
 					for key, val in numerator.items():
 						this_median = val / denominator
 						new_median[key] = this_median
 						delta += np.abs(median[key] - this_median)
 					median = new_median
 				num_itrs += 1
 			# Transform the solution to strings and return it.
 			median_label = {}
 			for key, val in median.items():
 				median_label[key] = str(val)
 			return median_label
 # 	def __get_median_edge_label_symbolic(self, edge_labels):
 # 		pass
 # 	def __get_median_edge_label_nonsymbolic(self, edge_labels):
 # 		if len(edge_labels) == 0:
 # 			return {}
 # 		else:
 # 			# Transform the labels into coordinates and compute mean label as initial solution.
 # 			edge_labels_as_coords = []
 # 			sums = {}
 # 			for key, val in edge_labels[0].items():
 # 				sums[key] = 0
 # 			for edge_label in edge_labels:
 # 				coords = {}
 # 				for key, val in edge_label.items():
 # 					label = float(val)
 # 					sums[key] += label
 # 					coords[key] = label
 # 				edge_labels_as_coords.append(coords)
 # 			median = {}
 # 			for key, val in sums.items():
 # 				median[key] = val / len(edge_labels)
 # 				
 # 			# Run main loop of Weiszfeld's Algorithm.
 # 			epsilon = 0.0001
 # 			delta = 1.0
 # 			num_itrs = 0
 # 			all_equal = False
 # 			while ((delta > epsilon) and (num_itrs < 100) and (not all_equal)):
 # 				numerator = {}
 # 				for key, val in sums.items():
 # 					numerator[key] = 0
 # 				denominator = 0
 # 				for edge_label_as_coord in edge_labels_as_coords:
 # 					norm = 0
 # 					for key, val in edge_label_as_coord.items():
 # 						norm += (val - median[key]) ** 2
 # 					norm += np.sqrt(norm)
 # 					if norm > 0:
 # 						for key, val in edge_label_as_coord.items():
 # 							numerator[key] += val / norm
 # 						denominator += 1.0 / norm
 # 				if denominator == 0:
 # 					all_equal = True
 # 				else:
 # 					new_median = {}
 # 					delta = 0.0
 # 					for key, val in numerator.items():
 # 						this_median = val / denominator
 # 						new_median[key] = this_median
 # 						delta += np.abs(median[key] - this_median)
 # 					median = new_median
 # 					
 # 				num_itrs += 1
 # 				
 # 			# Transform the solution to ged::GXLLabel and return it.
 # 			median_label = {}
 # 			for key, val in median.items():
 # 				median_label[key] = str(val)
 # 			return median_label
--- a/gklearn/ged/median/utils.py
+++ b/gklearn/ged/median/utils.py
@@ -9,6 +9,10 @@ Created on Wed Apr  1 15:12:31 2020
 def constant_node_costs(edit_cost_name):
 	if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER':
 		return False
 	elif edit_cost_name == 'CONSTANT':
 		return True
 	else:
 		raise Exception('Can not recognize the given edit cost. Possible edit costs include: "NON_SYMBOLIC", "LETTER", "LETTER2", "CONSTANT".')
 #	 elif edit_cost_name != '':
 # # 		throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>");
 #		 return False
--- a/gklearn/ged/util/util.py
+++ b/gklearn/ged/util/util.py
@@ -57,7 +57,10 @@ def compute_geds(graphs, options={}, parallel=False):
 	ged_env.set_method(options['method'], ged_options_to_string(options))
 	ged_env.init_method()
 	# compute ged.	
 	# compute ged.
 	neo_options = {'edit_cost': options['edit_cost'],
 				'node_labels': options['node_labels'], 'edge_labels': options['edge_labels'], 
 				'node_attrs': options['node_attrs'], 'edge_attrs': options['edge_attrs']}
 	ged_mat = np.zeros((len(graphs), len(graphs)))
 	if parallel:
 		len_itr = int(len(graphs) * (len(graphs) - 1) / 2)
@@ -74,7 +77,7 @@ def compute_geds(graphs, options={}, parallel=False):
 			G_graphs = graphs_toshare
 			G_ged_env = ged_env_toshare
 			G_listID = listID_toshare
 		do_partial = partial(_wrapper_compute_ged_parallel, options)
 		do_partial = partial(_wrapper_compute_ged_parallel, neo_options)
 		pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(graphs, ged_env, listID))
 		iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
 						desc='computing GEDs', file=sys.stdout)
@@ -100,7 +103,7 @@ def compute_geds(graphs, options={}, parallel=False):
 				ged_vec.append(dis)
 				ged_mat[i][j] = dis
 				ged_mat[j][i] = dis
 				n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, edit_cost=options['edit_cost'])
 				n_eo_tmp = get_nb_edit_operations(graphs[i], graphs[j], pi_forward, pi_backward, **neo_options)
 				n_edit_operations.append(n_eo_tmp)
 	return ged_vec, ged_mat, n_edit_operations
@@ -115,7 +118,7 @@ def _wrapper_compute_ged_parallel(options, itr):
 def _compute_ged_parallel(env, gid1, gid2, g1, g2, options):
 	dis, pi_forward, pi_backward = _compute_ged(env, gid1, gid2, g1, g2)
 	n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, edit_cost=options['edit_cost']) # [0,0,0,0,0,0]
 	n_eo_tmp = get_nb_edit_operations(g1, g2, pi_forward, pi_backward, **options) # [0,0,0,0,0,0]
 	return dis, n_eo_tmp
@@ -137,17 +140,26 @@ def _compute_ged(env, gid1, gid2, g1, g2):
 	return dis, pi_forward, pi_backward
 def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None):
 def get_nb_edit_operations(g1, g2, forward_map, backward_map, edit_cost=None, **kwargs):
 	if edit_cost == 'LETTER' or edit_cost == 'LETTER2':
 		return get_nb_edit_operations_letter(g1, g2, forward_map, backward_map)
 	elif edit_cost == 'NON_SYMBOLIC':
 		return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map)
 		node_attrs = kwargs.get('node_attrs', [])
 		edge_attrs = kwargs.get('edge_attrs', [])
 		return get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map, 
 											node_attrs=node_attrs, edge_attrs=edge_attrs)
 	elif edit_cost == 'CONSTANT':
 		node_labels = kwargs.get('node_labels', [])
 		edge_labels = kwargs.get('edge_labels', [])
 		return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map, 
 										 node_labels=node_labels, edge_labels=edge_labels)
 	else: 
 		return get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map)
 def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map):
 	"""Compute the number of each edit operations.
 def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map,
 									node_labels=[], edge_labels=[]):
 	"""Compute the number of each edit operations for symbolic-labeled graphs.
 	"""
 	n_vi = 0
 	n_vr = 0
@@ -160,8 +172,13 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map):
 	for i, map_i in enumerate(forward_map):
 		if map_i == np.inf:
 			n_vr += 1
 		elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
 			n_vs += 1
 		else:
 			for nl in node_labels:
 				label1 = g1.nodes[nodes1[i]][nl]
 				label2 = g2.nodes[map_i][nl]
 				if label1 != label2:
 					n_vs += 1
 					break
 	for map_i in backward_map:
 		if map_i == np.inf:
 			n_vi += 1
@@ -180,15 +197,21 @@ def get_nb_edit_operations_symbolic(g1, g2, forward_map, backward_map):
 		elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
 			nb_edges2_cnted += 1
 			# edge labels are different.
 			if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
 				!= g1.edges[(n1, n2)]['bond_type']:
 			for el in edge_labels:
 				label1 = g2.edges[((forward_map[idx1], forward_map[idx2]))][el]
 				label2 = g1.edges[(n1, n2)][el]
 				if label1 != label2:
 					n_es += 1
 					break
 		elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
 			nb_edges2_cnted += 1
 			# edge labels are different.
 			if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
 				!= g1.edges[(n1, n2)]['bond_type']:
 					n_es += 1				
 			for el in edge_labels:
 				label1 = g2.edges[((forward_map[idx2], forward_map[idx1]))][el]
 				label2 = g1.edges[(n1, n2)][el]
 				if label1 != label2:
 					n_es += 1
 					break
 		# corresponding nodes are in g2, however the edge is removed.
 		else:
 			n_er += 1
@@ -242,7 +265,8 @@ def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
 	return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
 def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map,
 									   node_attrs=[], edge_attrs=[]):
 	"""Compute the number of each edit operations.
 	"""
 	n_vi = 0
@@ -261,7 +285,7 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 		else:
 			n_vs += 1
 			sum_squares = 0
 			for a_name in g1.graph['node_attrs']:
 			for a_name in node_attrs:
 				diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
 				sum_squares += np.square(diff)
 			sod_vs += np.sqrt(sum_squares)
@@ -284,15 +308,15 @@ def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
 		elif (n1_g2, n2_g2) in g2.edges():
 			n_es += 1
 			sum_squares = 0
 			for a_name in g1.graph['edge_attrs']:
 				diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
 			for a_name in edge_attrs:
 				diff = float(g1.edges[n1, n2][a_name]) - float(g2.edges[n1_g2, n2_g2][a_name])
 				sum_squares += np.square(diff)
 			sod_es += np.sqrt(sum_squares)
 		elif (n2_g2, n1_g2) in g2.edges():
 			n_es += 1
 			sum_squares = 0
 			for a_name in g1.graph['edge_attrs']:
 				diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
 			for a_name in edge_attrs:
 				diff = float(g1.edges[n2, n1][a_name]) - float(g2.edges[n2_g2, n1_g2][a_name])
 				sum_squares += np.square(diff)
 			sod_es += np.sqrt(sum_squares)
 		# corresponding nodes are in g2, however the edge is removed.
--- a/gklearn/preimage/median_preimage_generator.py
+++ b/gklearn/preimage/median_preimage_generator.py
@@ -96,7 +96,10 @@ class MedianPreimageGenerator(PreimageGenerator):
 			if self.__runtime_precompute_gm is None:
 				raise Exception('Parameter "runtime_precompute_gm" must be given when using pre-computed Gram matrix.')
 			self._graph_kernel.gram_matrix_unnorm = self.__gram_matrix_unnorm
 			self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
 			if self._kernel_options['normalize']:
 				self._graph_kernel.gram_matrix = self._graph_kernel.normalize_gm(np.copy(self.__gram_matrix_unnorm))
 			else:
 				self._graph_kernel.gram_matrix = np.copy(self.__gram_matrix_unnorm)
 			end_precompute_gm = time.time()
 			start -= self.__runtime_precompute_gm
@@ -259,6 +262,10 @@ class MedianPreimageGenerator(PreimageGenerator):
 		self.__edit_cost_constants = self.__init_ecc
 		options = self.__ged_options.copy()
 		options['edit_cost_constants'] = self.__edit_cost_constants # @todo
 		options['node_labels'] = self._dataset.node_labels
 		options['edge_labels'] = self._dataset.edge_labels
 		options['node_attrs'] = self._dataset.node_attrs
 		options['edge_attrs'] = self._dataset.edge_attrs
 		ged_vec_init, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
 		residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]	
 		time_list = [time.time() - time0]
@@ -297,6 +304,10 @@ class MedianPreimageGenerator(PreimageGenerator):
 			# compute new GEDs and numbers of edit operations.
 			options = self.__ged_options.copy() # np.array([self.__edit_cost_constants[0], self.__edit_cost_constants[1], 0.75])
 			options['edit_cost_constants'] = self.__edit_cost_constants # @todo
 			options['node_labels'] = self._dataset.node_labels
 			options['edge_labels'] = self._dataset.edge_labels
 			options['node_attrs'] = self._dataset.node_attrs
 			options['edge_attrs'] = self._dataset.edge_attrs
 			ged_vec, ged_mat, n_edit_operations = compute_geds(graphs, options=options, parallel=self.__parallel)
 			residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
 			time_list.append(time.time() - time0)
@@ -444,34 +455,10 @@ class MedianPreimageGenerator(PreimageGenerator):
 				nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
 				x = cp.Variable(nb_cost_mat_new.shape[1])
 				cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 				constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
 				constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
 							   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
 				prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 				try:
 					prob.solve(verbose=True)
 				except MemoryError as error0:
 					if self._verbose >= 2:
 						print('\nUsing solver "OSQP" caused a memory error.')
 						print('the original error message is\n', error0)
 						print('solver status: ', prob.status)
 						print('trying solver "CVXOPT" instead...\n')
 					try:
 						prob.solve(solver=cp.CVXOPT, verbose=True)
 					except Exception as error1:
 						if self._verbose >= 2:
 							print('\nAn error occured when using solver "CVXOPT".')
 							print('the original error message is\n', error1)
 							print('solver status: ', prob.status)
 							print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
 						prob.solve(solver=cp.MOSEK, verbose=True)
 					else:
 						if self._verbose >= 2:
 							print('solver status: ', prob.status)					
 				else:
 					if self._verbose >= 2:
 						print('solver status: ', prob.status)
 				if self._verbose >= 2:				
 					print()
 				self.__execute_cvx(prob)
 				edit_costs_new = x.value
 				residual = np.sqrt(prob.value)
 			elif rw_constraints == '2constraints':
@@ -541,19 +528,17 @@ class MedianPreimageGenerator(PreimageGenerator):
 								   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
 								   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
 					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 					prob.solve()
 					self.__execute_cvx(prob)
 					edit_costs_new = x.value
 					residual = np.sqrt(prob.value)
 				elif is_n_attr and not is_e_attr:
 					nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
 					x = cp.Variable(nb_cost_mat_new.shape[1])
 					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 					constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
 					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
 								   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
 					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 					prob.solve()
 					if self._verbose >= 2:
 						print(x.value)
 					self.__execute_cvx(prob)
 					edit_costs_new = np.concatenate((x.value, np.array([0.0])))
 					residual = np.sqrt(prob.value)
 				elif not is_n_attr and is_e_attr:
@@ -563,7 +548,7 @@ class MedianPreimageGenerator(PreimageGenerator):
 					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
 								   np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
 					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 					prob.solve()
 					self.__execute_cvx(prob)
 					edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
 					residual = np.sqrt(prob.value)
 				else:
@@ -572,10 +557,20 @@ class MedianPreimageGenerator(PreimageGenerator):
 					cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
 					constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
 					prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 					prob.solve()
 					self.__execute_cvx(prob)
 					edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
 													 x.value[2:], np.array([0.0])))
 					residual = np.sqrt(prob.value)
 		elif self.__ged_options['edit_cost'] == 'CONSTANT': # @todo: node/edge may not labeled.
 			x = cp.Variable(nb_cost_mat.shape[1])
 			cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
 			constraints = [x >= [0.01 for i in range(nb_cost_mat.shape[1])],
 						   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
 						   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
 			prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 			self.__execute_cvx(prob)
 			edit_costs_new = x.value
 			residual = np.sqrt(prob.value)
 		else:
 	#	# method 1: simple least square method.
 	#	edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
@@ -607,7 +602,7 @@ class MedianPreimageGenerator(PreimageGenerator):
 						   np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
 						   np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
 			prob = cp.Problem(cp.Minimize(cost_fun), constraints)
 			prob.solve()
 			self.__execute_cvx(prob)
 			edit_costs_new = x.value
 			residual = np.sqrt(prob.value)
@@ -616,6 +611,34 @@ class MedianPreimageGenerator(PreimageGenerator):
 		return edit_costs_new, residual
 	def __execute_cvx(self, prob):
 		try:
 			prob.solve(verbose=(self._verbose>=2))
 		except MemoryError as error0:
 			if self._verbose >= 2:
 				print('\nUsing solver "OSQP" caused a memory error.')
 				print('the original error message is\n', error0)
 				print('solver status: ', prob.status)
 				print('trying solver "CVXOPT" instead...\n')
 			try:
 				prob.solve(solver=cp.CVXOPT, verbose=(self._verbose>=2))
 			except Exception as error1:
 				if self._verbose >= 2:
 					print('\nAn error occured when using solver "CVXOPT".')
 					print('the original error message is\n', error1)
 					print('solver status: ', prob.status)
 					print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
 				prob.solve(solver=cp.MOSEK, verbose=(self._verbose>=2))
 			else:
 				if self._verbose >= 2:
 					print('solver status: ', prob.status)					
 		else:
 			if self._verbose >= 2:
 				print('solver status: ', prob.status)
 		if self._verbose >= 2:				
 			print()
 	def __generate_preimage_iam(self):
 		# Set up the ged environment.
 		ged_env = gedlibpy.GEDEnv() # @todo: maybe create a ged_env as a private varible.
@@ -638,6 +661,10 @@ class MedianPreimageGenerator(PreimageGenerator):
 		# Select the GED algorithm.
 		mge.set_options(mge_options_to_string(options))
 		mge.set_label_names(node_labels=self._dataset.node_labels, 
 					  edge_labels=self._dataset.edge_labels, 
 					  node_attrs=self._dataset.node_attrs, 
 					  edge_attrs=self._dataset.edge_attrs)
 		mge.set_init_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
 		mge.set_descent_method(self.__ged_options['method'], ged_options_to_string(self.__ged_options))
--- a/gklearn/preimage/utils.py
+++ b/gklearn/preimage/utils.py
@@ -37,7 +37,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
 	dataset_all.trim_dataset(edge_required=edge_required)
 	if irrelevant_labels is not None:
 		dataset_all.remove_labels(**irrelevant_labels)
 # 	dataset_all.cut_graphs(range(0, 100))
 # 	dataset_all.cut_graphs(range(0, 10))
 	datasets = split_dataset_by_target(dataset_all)
 	if save_results:
@@ -67,8 +67,8 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
 		gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options['name'] + '.gm.npz'
 		gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
 		if gmfile_exist:
 			gmfile = np.load(gm_fname)
 			gram_matrix_unnorm_list = gmfile['gram_matrix_unnorm_list']
 			gmfile = np.load(gm_fname, allow_pickle=True) # @todo: may not be safe.
 			gram_matrix_unnorm_list = [item for item in gmfile['gram_matrix_unnorm_list']]
 			time_precompute_gm_list = gmfile['run_time_list'].tolist()
 		else:
 			gram_matrix_unnorm_list = []
@@ -87,6 +87,7 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
 	print('start generating preimage for each class of target...')
 	idx_offset = 0
 	for idx, dataset in enumerate(datasets):
 		target = dataset.targets[0]
 		print('\ntarget =', target, '\n')
@@ -96,14 +97,15 @@ def generate_median_preimages_by_class(ds_name, mpg_options, kernel_options, ged
 		num_graphs = len(dataset.graphs)
 		if num_graphs < 2:
 			print('\nnumber of graphs = ', num_graphs, ', skip.\n')
 			idx_offset += 1
 			continue
 		# 2. set parameters.
 		print('2. initializing mpg and setting parameters...')
 		if load_gm:
 			if gmfile_exist:
 				mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx]
 				mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx]
 				mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[idx - idx_offset]
 				mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[idx - idx_offset]
 		mpg = MedianPreimageGenerator()
 		mpg.dataset = dataset
 		mpg.set_options(**mpg_options.copy())
--- a/gklearn/utils/dataset.py
+++ b/gklearn/utils/dataset.py
@@ -67,18 +67,35 @@ class Dataset(object):
 	def load_predefined_dataset(self, ds_name):
 		current_path = os.path.dirname(os.path.realpath(__file__)) + '/'
 		if ds_name == 'Letter-high': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
 		if ds_name == 'acyclic':
 			pass
 		elif ds_name == 'COIL-DEL':
 			ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Letter-med': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt'
 		elif ds_name == 'COIL-RAG':
 			ds_file = current_path + '../../datasets/COIL-RAG/COIL-RAG_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Letter-low': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt'
 		elif ds_name == 'COLORS-3':
 			ds_file = current_path + '../../datasets/COLORS-3/COLORS-3_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Fingerprint':
 			ds_file = current_path + '../../datasets/Fingerprint/Fingerprint_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'FRANKENSTEIN':
 			ds_file = current_path + '../../datasets/FRANKENSTEIN/FRANKENSTEIN_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Letter-high': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-high_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Letter-low': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-low_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Letter-med': # node non-symb
 			ds_file = current_path + '../../datasets/Letter-high/Letter-med_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'MUTAG':
 			ds_file = current_path + '../../datasets/MUTAG/MUTAG_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'SYNTHETIC':
 			pass
 		elif ds_name == 'SYNTHETICnew':
@@ -86,15 +103,6 @@ class Dataset(object):
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'Synthie':
 			pass
 		elif ds_name == 'COIL-DEL':
 			ds_file = current_path + '../../datasets/COIL-DEL/COIL-DEL_A.txt'
 			self.__graphs, self.__targets, label_names = load_dataset(ds_file)
 		elif ds_name == 'COIL-RAG':
 			pass
 		elif ds_name == 'COLORS-3':
 			pass
 		elif ds_name == 'FRANKENSTEIN':
 			pass
 		self.__node_labels = label_names['node_labels']
 		self.__node_attrs = label_names['node_attrs']
--- a/gklearn/utils/graph_files.py
+++ b/gklearn/utils/graph_files.py
@@ -474,6 +474,7 @@ def load_tud(filename):
 		label_names = {'node_labels': [], 'node_attrs': [], 
 					   'edge_labels': [], 'edge_attrs': []}
 		class_label_map = None
 		class_label_map_strings = []
 		content_rm = open(frm).read().splitlines()
 		i = 0
@@ -538,20 +539,32 @@ def load_tud(filename):
 	else:
 		label_names = {'node_labels': [], 'node_attrs': [], 
 					   'edge_labels': [], 'edge_attrs': []}
 		class_label_map = None
 	content_gi = open(fgi).read().splitlines()  # graph indicator
 	content_am = open(fam).read().splitlines()  # adjacency matrix
 	content_gl = open(fgl).read().splitlines()  # graph labels
 	# load targets.
 	if 'fgl' in locals():
 		content_targets = open(fgl).read().splitlines()  # targets (classification)
 		targets = [float(i) for i in content_targets]
 	elif 'fga' in locals():
 		content_targets = open(fga).read().splitlines()  # targets (regression)
 		targets = [int(i) for i in content_targets]
 		if class_label_map is not None:
 			targets = [class_label_map[t] for t in targets]
 	else:
 		raise Exception('Can not find targets file. Please make sure there is a "', ds_name, '_graph_labels.txt" or "', ds_name, '_graph_attributes.txt"', 'file in your dataset folder.')
 	# create graphs and add nodes
 	data = [nx.Graph(name=str(i)) for i in range(0, len(content_gl))]
 	data = [nx.Graph(name=str(i)) for i in range(0, len(content_targets))]
 	if 'fnl' in locals():
 		content_nl = open(fnl).read().splitlines()  # node labels
 		for idx, line in enumerate(content_gi):
 			# transfer to int first in case of unexpected blanks
 			data[int(line) - 1].add_node(idx)
 			labels = [l.strip() for l in content_nl[idx].split(',')]
 			if label_names['node_labels'] == []:
 			if label_names['node_labels'] == []: # @todo: need fix bug.
 				for i, label in enumerate(labels):
 					l_name = 'label_' + str(i)
 					data[int(line) - 1].nodes[idx][l_name] = label
@@ -619,11 +632,6 @@ def load_tud(filename):
 				for i, a_name in enumerate(label_names['edge_attrs']):
 					data[g].edges[n[0], n[1]][a_name] = attrs[i]
 	# load targets.
 	targets = [int(i) for i in content_gl]
 	if 'class_label_map' in locals():
 		targets = [class_label_map[t] for t in targets]
 	return data, targets, label_names