From 29903787ed15370631665e071e9995f006d11891 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 27 Mar 2020 09:33:27 +0100
Subject: [PATCH] update preimage.

---
 gklearn/preimage/common_types.py                |  17 +
 gklearn/preimage/cpp2python.py                  | 134 ++++
 gklearn/preimage/fitDistance.py                 |  23 +-
 gklearn/preimage/ged.py                         |  63 +-
 gklearn/preimage/median_graph_estimator.py      | 826 ++++++++++++++++++++++++
 gklearn/preimage/median_preimage_generator.py   |  15 +
 gklearn/preimage/misc.py                        | 108 ++++
 gklearn/preimage/preimage_generator.py          |  12 +
 gklearn/preimage/python_code.py                 | 122 ++++
 gklearn/preimage/test_median_graph_estimator.py |  91 +++
 gklearn/preimage/timer.py                       |  40 ++
 gklearn/preimage/xp_fit_method.py               | 196 ++++--
 12 files changed, 1572 insertions(+), 75 deletions(-)
 create mode 100644 gklearn/preimage/common_types.py
 create mode 100644 gklearn/preimage/cpp2python.py
 create mode 100644 gklearn/preimage/median_graph_estimator.py
 create mode 100644 gklearn/preimage/median_preimage_generator.py
 create mode 100644 gklearn/preimage/misc.py
 create mode 100644 gklearn/preimage/preimage_generator.py
 create mode 100644 gklearn/preimage/python_code.py
 create mode 100644 gklearn/preimage/test_median_graph_estimator.py
 create mode 100644 gklearn/preimage/timer.py

diff --git a/gklearn/preimage/common_types.py b/gklearn/preimage/common_types.py
new file mode 100644
index 0000000..2face25
--- /dev/null
+++ b/gklearn/preimage/common_types.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 19 18:17:38 2020
+
+@author: ljia
+"""
+
+from enum import Enum, auto
+
+class AlgorithmState(Enum):
+    """can be used to specify the state of an algorithm.
+    """
+    CALLED = auto # The algorithm has been called.
+    INITIALIZED = auto # The algorithm has been initialized.
+    CONVERGED = auto # The algorithm has converged.
+    TERMINATED = auto # The algorithm has terminated.
\ No newline at end of file
diff --git a/gklearn/preimage/cpp2python.py b/gklearn/preimage/cpp2python.py
new file mode 100644
index 0000000..9d63026
--- /dev/null
+++ b/gklearn/preimage/cpp2python.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 20 11:09:04 2020
+
+@author: ljia
+"""
+import re
+
+def convert_function(cpp_code):
+# f_cpp = open('cpp_code.cpp', 'r')
+# # f_cpp = open('cpp_ext/src/median_graph_estimator.ipp', 'r')
+# 	cpp_code = f_cpp.read()
+	python_code = cpp_code.replace('else if (', 'elif ')
+	python_code = python_code.replace('if (', 'if ')
+	python_code = python_code.replace('else {', 'else:')
+	python_code = python_code.replace(') {', ':')
+	python_code = python_code.replace(';\n', '\n')
+	python_code = re.sub('\n(.*)}\n', '\n\n', python_code)
+	# python_code = python_code.replace('}\n', '')
+	python_code = python_code.replace('throw', 'raise')
+	python_code = python_code.replace('error', 'Exception')
+	python_code = python_code.replace('"', '\'')
+	python_code = python_code.replace('\\\'', '"')
+	python_code = python_code.replace('try {', 'try:')
+	python_code = python_code.replace('true', 'True')
+	python_code = python_code.replace('false', 'False')
+	python_code = python_code.replace('catch (...', 'except')
+	# python_code = re.sub('std::string\(\'(.*)\'\)', '$1', python_code)
+	
+	return python_code
+
+
+
+# # python_code = python_code.replace('}\n', '')
+
+
+
+
+# python_code = python_code.replace('option.first', 'opt_name')
+# python_code = python_code.replace('option.second', 'opt_val')
+# python_code = python_code.replace('ged::Error', 'Exception')
+# python_code = python_code.replace('std::string(\'Invalid argument "\')', '\'Invalid argument "\'')
+
+
+# f_cpp.close()
+# f_python = open('python_code.py', 'w')
+# f_python.write(python_code)
+# f_python.close()
+
+
+def convert_function_comment(cpp_fun_cmt, param_types):
+	cpp_fun_cmt = cpp_fun_cmt.replace('\t', '')
+	cpp_fun_cmt = cpp_fun_cmt.replace('\n * ', ' ')
+	# split the input comment according to key words.
+	param_split = None
+	note = None
+	cmt_split = cpp_fun_cmt.split('@brief')[1]
+	brief = cmt_split
+	if '@param' in cmt_split:
+		cmt_split = cmt_split.split('@param')
+		brief = cmt_split[0]
+		param_split = cmt_split[1:]
+	if '@note' in cmt_split[-1]:
+		note_split = cmt_split[-1].split('@note')
+		if param_split is not None:
+			param_split.pop()
+			param_split.append(note_split[0])
+		else:
+			brief = note_split[0]
+		note = note_split[1]
+		
+	# get parameters.
+	if param_split is not None:
+		for idx, param in enumerate(param_split):
+			_, param_name, param_desc = param.split(' ', 2)
+			param_name = function_comment_strip(param_name, ' *\n\t/')
+			param_desc = function_comment_strip(param_desc, ' *\n\t/')
+			param_split[idx] = (param_name, param_desc)
+		
+	# strip comments.
+	brief = function_comment_strip(brief, ' *\n\t/')
+	if note is not None:
+		note = function_comment_strip(note, ' *\n\t/')
+		
+	# construct the Python function comment.
+	python_fun_cmt = '"""'
+	python_fun_cmt += brief + '\n'
+	if param_split is not None and len(param_split) > 0:
+		python_fun_cmt += '\nParameters\n----------'
+		for idx, param in enumerate(param_split):
+			python_fun_cmt += '\n' + param[0] + ' : ' + param_types[idx]
+			python_fun_cmt += '\n\t' + param[1] + '\n'
+	if note is not None:
+		python_fun_cmt += '\nNote\n----\n' + note + '\n'
+	python_fun_cmt += '"""'
+	
+	return python_fun_cmt
+			
+		
+def function_comment_strip(comment, bad_chars):
+	head_removed, tail_removed = False, False
+	while not head_removed or not tail_removed:
+		if comment[0] in bad_chars:
+			comment = comment[1:]
+			head_removed = False
+		else:
+			head_removed = True
+		if comment[-1] in bad_chars:
+			comment = comment[:-1]
+			tail_removed = False
+		else:
+			tail_removed = True
+			
+	return comment
+
+		
+if __name__ == '__main__':
+#  	python_code = convert_function("""
+# 		if (print_to_stdout_ == 2) {
+# 			std::cout << "\n===========================================================\n";
+# 			std::cout << "Block gradient descent for initial median " << median_pos + 1 << " of " << medians.size() << ".\n";
+# 			std::cout << "-----------------------------------------------------------\n";
+# 		}
+# 								""")
+	
+	
+ 	python_fun_cmt = convert_function_comment("""
+	/*!
+	 * @brief Returns the sum of distances.
+	 * @param[in] state The state of the estimator.
+	 * @return The sum of distances of the median when the estimator was in the state @p state during the last call to run().
+	 */
+						""", ['string', 'string'])
\ No newline at end of file
diff --git a/gklearn/preimage/fitDistance.py b/gklearn/preimage/fitDistance.py
index 3a885f3..234f7fc 100644
--- a/gklearn/preimage/fitDistance.py
+++ b/gklearn/preimage/fitDistance.py
@@ -260,10 +260,29 @@ def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides',
             nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
             x = cp.Variable(nb_cost_mat_new.shape[1])
             cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
+            constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
                            np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
             prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
+            try:
+                prob.solve(verbose=True)
+            except MemoryError as error0:
+                print('\nUsing solver "OSQP" caused a memory error.')
+                print('the original error message is\n', error0)
+                print('solver status: ', prob.status)
+                print('trying solver "CVXOPT" instead...\n')
+                try:
+                    prob.solve(solver=cp.CVXOPT, verbose=True)
+                except Exception as error1:
+                    print('\nAn error occured when using solver "CVXOPT".')
+                    print('the original error message is\n', error1)
+                    print('solver status: ', prob.status)
+                    print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
+                    prob.solve(solver=cp.MOSEK, verbose=True)
+                else:
+                    print('solver status: ', prob.status)                    
+            else:
+                print('solver status: ', prob.status)
+            print()
             edit_costs_new = x.value
             residual = np.sqrt(prob.value)
         elif rw_constraints == '2constraints':
diff --git a/gklearn/preimage/ged.py b/gklearn/preimage/ged.py
index 91fc2d2..a66baaf 100644
--- a/gklearn/preimage/ged.py
+++ b/gklearn/preimage/ged.py
@@ -14,42 +14,13 @@ from multiprocessing import Pool
 from functools import partial
 
 #from gedlibpy_linlin import librariesImport, gedlibpy
-from libs import *
+from gklearn.gedlib import librariesImport, gedlibpy
 
 def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', 
         edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
     """
     Compute GED for 2 graphs.
-    """
-    def convertGraph(G, cost):
-        """Convert a graph to the proper NetworkX format that can be
-        recognized by library gedlibpy.
-        """
-        G_new = nx.Graph()
-        if cost == 'LETTER' or cost == 'LETTER2':   
-            for nd, attrs in G.nodes(data=True):
-                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
-                               y=str(attrs['attributes'][1]))
-            for nd1, nd2, attrs in G.edges(data=True):
-                G_new.add_edge(str(nd1), str(nd2))
-        elif cost == 'NON_SYMBOLIC':
-            for nd, attrs in G.nodes(data=True):
-                G_new.add_node(str(nd))
-                for a_name in G.graph['node_attrs']:
-                    G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
-            for nd1, nd2, attrs in G.edges(data=True):
-                G_new.add_edge(str(nd1), str(nd2))
-                for a_name in G.graph['edge_attrs']:
-                    G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
-        else:
-            for nd, attrs in G.nodes(data=True):
-                G_new.add_node(str(nd), chem=attrs['atom'])
-            for nd1, nd2, attrs in G.edges(data=True):
-                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-#                G_new.add_edge(str(nd1), str(nd2))
-            
-        return G_new
-    
+    """    
     
 #    dataset = dataset.lower()
     
@@ -178,6 +149,36 @@ def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method
     return dis, pi_forward, pi_backward
 
 
+def convertGraph(G, cost):
+    """Convert a graph to the proper NetworkX format that can be
+    recognized by library gedlibpy.
+    """
+    G_new = nx.Graph()
+    if cost == 'LETTER' or cost == 'LETTER2':   
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
+                           y=str(attrs['attributes'][1]))
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2))
+    elif cost == 'NON_SYMBOLIC':
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd))
+            for a_name in G.graph['node_attrs']:
+                G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2))
+            for a_name in G.graph['edge_attrs']:
+                G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
+    else:
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd), chem=attrs['atom'])
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+#                G_new.add_edge(str(nd1), str(nd2))
+        
+    return G_new
+
+
 def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
         edit_cost_constant=[], stabilizer='min', repeat=50):
     """
diff --git a/gklearn/preimage/median_graph_estimator.py b/gklearn/preimage/median_graph_estimator.py
new file mode 100644
index 0000000..b70cc61
--- /dev/null
+++ b/gklearn/preimage/median_graph_estimator.py
@@ -0,0 +1,826 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 16 18:04:55 2020
+
+@author: ljia
+"""
+import numpy as np
+from gklearn.preimage.common_types import AlgorithmState
+from gklearn.preimage import misc
+from gklearn.preimage.timer import Timer
+from gklearn.utils.utils import graph_isIdentical
+import time
+from tqdm import tqdm
+import sys
+import networkx as nx
+
+
+class MedianGraphEstimator(object):
+	
+	def __init__(self, ged_env, constant_node_costs):
+		"""Constructor.
+		
+		Parameters
+		----------
+		ged_env : gklearn.gedlib.gedlibpy.GEDEnv
+			Initialized GED environment. The edit costs must be set by the user.
+			
+		constant_node_costs : Boolean
+			Set to True if the node relabeling costs are constant.
+		"""
+		self.__ged_env = ged_env
+		self.__init_method = 'BRANCH_FAST'
+		self.__init_options = ''
+		self.__descent_method = 'BRANCH_FAST'
+		self.__descent_options = ''
+		self.__refine_method = 'IPFP'
+		self.__refine_options = ''
+		self.__constant_node_costs = constant_node_costs
+		self.__labeled_nodes = (ged_env.get_num_node_labels() > 1)
+		self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1))
+		self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1))
+		self.__labeled_edges = (ged_env.get_num_edge_labels() > 1)
+		self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1))
+		self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1))
+		self.__init_type = 'RANDOM'
+		self.__num_random_inits = 10
+		self.__desired_num_random_inits = 10
+		self.__use_real_randomness = True
+		self.__seed = 0
+		self.__refine = True
+		self.__time_limit_in_sec = 0
+		self.__epsilon = 0.0001
+		self.__max_itrs = 100
+		self.__max_itrs_without_update = 3
+		self.__num_inits_increase_order = 10
+		self.__init_type_increase_order = 'K-MEANS++'
+		self.__max_itrs_increase_order = 10
+		self.__print_to_stdout = 2
+		self.__median_id = np.inf # @todo: check
+		self.__median_node_id_prefix = '' # @todo: check
+		self.__node_maps_from_median = {}
+		self.__sum_of_distances = 0
+		self.__best_init_sum_of_distances = np.inf
+		self.__converged_sum_of_distances = np.inf
+		self.__runtime = None
+		self.__runtime_initialized = None
+		self.__runtime_converged = None
+		self.__itrs = [] # @todo: check: {} ?
+		self.__num_decrease_order = 0
+		self.__num_increase_order = 0
+		self.__num_converged_descents = 0
+		self.__state = AlgorithmState.TERMINATED
+		
+		if ged_env is None:
+			raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
+		elif not ged_env.is_initialized():
+			raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.')
+	
+	
+	def set_options(self, options):
+		"""Sets the options of the estimator.
+
+		Parameters
+		----------
+		options : string
+			String that specifies with which options to run the estimator.
+		"""
+		self.__set_default_options()
+		options_map = misc.options_string_to_options_map(options)
+		for opt_name, opt_val in options_map.items():
+			if opt_name == 'init-type':
+				self.__init_type = opt_val
+				if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN':
+					raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"')
+			elif opt_name == 'random-inits':
+				try:
+					self.__num_random_inits = int(opt_val)
+					self.__desired_num_random_inits = self.__num_random_inits
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+				if self.__num_random_inits <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+	
+			elif opt_name == 'randomness':
+				if opt_val == 'PSEUDO':
+					self.__use_real_randomness = False
+	
+				elif opt_val == 'REAL':
+					self.__use_real_randomness = True
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
+	
+			elif opt_name == 'stdout':
+				if opt_val == '0':
+					self.__print_to_stdout = 0
+	
+				elif opt_val == '1':
+					self.__print_to_stdout = 1
+	
+				elif opt_val == '2':
+					self.__print_to_stdout = 2
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
+	
+			elif opt_name == 'refine':
+				if opt_val == 'TRUE':
+					self.__refine = True
+	
+				elif opt_val == 'FALSE':
+					self.__refine = False
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
+	
+			elif opt_name == 'time-limit':
+				try:
+					self.__time_limit_in_sec = float(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
+	
+			elif opt_name == 'max-itrs':
+				try:
+					self.__max_itrs = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
+	
+			elif opt_name == 'max-itrs-without-update':
+				try:
+					self.__max_itrs_without_update = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
+	
+			elif opt_name == 'seed':
+				try:
+					self.__seed = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
+	
+			elif opt_name == 'epsilon':
+				try:
+					self.__epsilon = float(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+	
+				if self.__epsilon <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+	
+			elif opt_name == 'inits-increase-order':
+				try:
+					self.__num_inits_increase_order = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+	
+				if self.__num_inits_increase_order <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+			elif opt_name == 'init-type-increase-order':
+				self.__init_type_increase_order = opt_val
+				if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
+					raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
+	
+			elif opt_name == 'max-itrs-increase-order':
+				try:
+					self.__max_itrs_increase_order = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
+
+			else:
+				valid_options = '[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] '
+				valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
+				valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
+				raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"')
+ 
+		
+	def set_init_method(self, init_method, init_options=''):
+		"""Selects method to be used for computing the initial medoid graph.
+		
+		Parameters
+		----------
+		init_method : string
+			The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM.
+		
+		init_options : string
+			The options for the selected method. Default: "".
+		
+		Notes
+		-----
+		Has no effect unless "--init-type MEDOID" is passed to set_options().
+		"""
+		self.__init_method = init_method;
+		self.__init_options = init_options;
+	
+	
+	def set_descent_method(self, descent_method, descent_options=''):
+		"""Selects method to be used for block gradient descent..
+		
+		Parameters
+		----------
+		descent_method : string
+			The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST.
+		
+		descent_options : string
+			The options for the selected method. Default: "".
+		
+		Notes
+		-----
+		Has no effect unless "--init-type MEDOID" is passed to set_options().
+		"""
+		self.__descent_method = descent_method;
+		self.__descent_options = descent_options;
+
+	
+	def set_refine_method(self, refine_method, refine_options):
+		"""Selects method to be used for improving the sum of distances and the node maps for the converged median.
+		
+		Parameters
+		----------
+		refine_method : string
+			The selected method. Default: "IPFP".
+			
+		refine_options : string 
+			The options for the selected method. Default: "".
+					
+		Notes
+		-----
+		Has no effect if "--refine FALSE" is passed to set_options().
+		"""
+		self.__refine_method = refine_method
+		self.__refine_options = refine_options
+
+	
+	def run(self, graph_ids, set_median_id, gen_median_id):
+		"""Computes a generalized median graph.
+		
+		Parameters
+		----------
+		graph_ids : list[integer]
+			The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor.
+		
+		set_median_id : integer
+			The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
+
+
+		gen_median_id : integer
+			The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
+		"""
+		# Sanity checks.
+		if len(graph_ids) == 0:
+			raise Exception('Empty vector of graph IDs, unable to compute median.')
+		all_graphs_empty = True
+		for graph_id in graph_ids:
+			if self.__ged_env.get_graph_num_nodes(graph_id) > 0:
+				self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0]
+				all_graphs_empty = False
+				break
+		if all_graphs_empty:
+			raise Exception('All graphs in the collection are empty.')
+			
+		# Start timer and record start time.
+		start = time.time()
+		timer = Timer(self.__time_limit_in_sec)
+		self.__median_id = gen_median_id
+		self.__state = AlgorithmState.TERMINATED
+		
+		# Get ExchangeGraph representations of the input graphs.
+		graphs = {}
+		for graph_id in graph_ids:
+			# @todo: get_nx_graph() function may need to be modified according to the coming code.
+			graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
+# 		print(self.__ged_env.get_graph_internal_id(0))
+# 		print(graphs[0].graph)
+# 		print(graphs[0].nodes(data=True))
+# 		print(graphs[0].edges(data=True))
+# 		print(nx.adjacency_matrix(graphs[0]))
+
+			
+		# Construct initial medians.
+		medians = []
+		self.__construct_initial_medians(graph_ids, timer, medians)
+		end_init = time.time()
+		self.__runtime_initialized = end_init - start
+# 		print(medians[0].graph)
+# 		print(medians[0].nodes(data=True))
+# 		print(medians[0].edges(data=True))
+# 		print(nx.adjacency_matrix(medians[0]))
+		
+		# Reset information about iterations and number of times the median decreases and increases.
+		self.__itrs = [0] * len(medians)
+		self.__num_decrease_order = 0
+		self.__num_increase_order = 0
+		self.__num_converged_descents = 0
+		
+		# Initialize the best median.
+		best_sum_of_distances = np.inf
+		self.__best_init_sum_of_distances = np.inf
+		node_maps_from_best_median = {}
+		
+		# Run block gradient descent from all initial medians.
+		self.__ged_env.set_method(self.__descent_method, self.__descent_options)
+		for median_pos in range(0, len(medians)):
+			
+			# Terminate if the timer has expired and at least one SOD has been computed.
+			if timer.expired() and median_pos > 0:
+				break
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				print('\n===========================================================')
+				print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
+				print('-----------------------------------------------------------')
+				
+			# Get reference to the median.
+			median = medians[median_pos]
+			
+			# Load initial median into the environment.
+			self.__ged_env.load_nx_graph(median, gen_median_id)
+			self.__ged_env.init(self.__ged_env.get_init_type())
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout)
+				
+			# Compute node maps and sum of distances for initial median.
+			self.__sum_of_distances = 0
+			self.__node_maps_from_median.clear() # @todo
+			for graph_id in graph_ids:
+				self.__ged_env.run_method(gen_median_id, graph_id)
+				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
+# 				print(self.__node_maps_from_median[graph_id])
+				self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
+# 				print(self.__sum_of_distances)
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					progress.update(1)
+					
+			self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
+			self.__ged_env.load_nx_graph(median, set_median_id)
+# 			print(self.__best_init_sum_of_distances)
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				print('\n')
+				
+			# Run block gradient descent from initial median.
+			converged = False
+			itrs_without_update = 0
+			while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update):
+				
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('\n===========================================================')
+					print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
+					print('-----------------------------------------------------------')
+					
+				# Initialize flags that tell us what happened in the iteration.
+				median_modified = False
+				node_maps_modified = False
+				decreased_order = False
+				increased_order = False
+				
+				# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
+				median_modified = self.__update_median(graphs, median)
+				if not median_modified or self.__itrs[median_pos] == 0:
+					decreased_order = False
+					if not decreased_order or self.__itrs[median_pos] == 0:
+						increased_order = False
+						
+				# Update the number of iterations without update of the median.
+				if median_modified or decreased_order or increased_order:
+					itrs_without_update = 0
+				else:
+					itrs_without_update += 1
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Loading median to environment: ... ', end='')
+					
+				# Load the median into the environment.
+				# @todo: should this function use the original node label?
+				self.__ged_env.load_nx_graph(median, gen_median_id)
+				self.__ged_env.init(self.__ged_env.get_init_type())
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('done.')					
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Updating induced costs: ... ', end='')
+
+				# Compute induced costs of the old node maps w.r.t. the updated median.
+				for graph_id in graph_ids:
+# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
+					# @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+					self.__ged_env.compute_induced_cost(gen_median_id, graph_id)
+# 					print('---------------------------------------')
+# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('done.')					
+					
+				# Update the node maps.
+				node_maps_modified = self.__update_node_maps() # @todo
+
+				# Update the order of the median if no improvement can be found with the current order.
+				
+				# Update the sum of distances.
+				old_sum_of_distances = self.__sum_of_distances
+				self.__sum_of_distances = 0
+				for graph_id in self.__node_maps_from_median:
+					self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above.
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Old local SOD: ', old_sum_of_distances)
+					print('New local SOD: ', self.__sum_of_distances)
+					print('Best converged SOD: ', best_sum_of_distances)
+					print('Modified median: ', median_modified)
+					print('Modified node maps: ', node_maps_modified)
+					print('Decreased order: ', decreased_order)
+					print('Increased order: ', increased_order)
+					print('===========================================================\n')
+					
+				converged = not (median_modified or node_maps_modified or decreased_order or increased_order)
+				
+				self.__itrs[median_pos] += 1
+				
+			# Update the best median.
+			if self.__sum_of_distances < self.__best_init_sum_of_distances:
+				best_sum_of_distances = self.__sum_of_distances
+				node_maps_from_best_median = self.__node_maps_from_median
+				best_median = median
+				
+			# Update the number of converged descents.
+			if converged:
+				self.__num_converged_descents += 1
+				
+		# Store the best encountered median.
+		self.__sum_of_distances = best_sum_of_distances
+		self.__node_maps_from_median = node_maps_from_best_median
+		self.__ged_env.load_nx_graph(best_median, gen_median_id)
+		self.__ged_env.init(self.__ged_env.get_init_type())
+		end_descent = time.time()
+		self.__runtime_converged = end_descent - start
+		
+		# Refine the sum of distances and the node maps for the converged median.
+		self.__converged_sum_of_distances = self.__sum_of_distances
+		if self.__refine:
+			self.__improve_sum_of_distances(timer) # @todo
+		
+		# Record end time, set runtime and reset the number of initial medians.
+		end = time.time()
+		self.__runtime = end - start
+		self.__num_random_inits = self.__desired_num_random_inits
+		
+		# Print global information.
+		if self.__print_to_stdout != 0:
+			print('\n===========================================================')
+			print('Finished computation of generalized median graph.')
+			print('-----------------------------------------------------------')
+			print('Best SOD after initialization: ', self.__best_init_sum_of_distances)
+			print('Converged SOD: ', self.__converged_sum_of_distances)
+			if self.__refine:
+				print('Refined SOD: ', self.__sum_of_distances)
+			print('Overall runtime: ', self.__runtime)
+			print('Runtime of initialization: ', self.__runtime_initialized)
+			print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized)
+			if self.__refine:
+				print('Runtime of refinement: ', self.__runtime - self.__runtime_converged)
+			print('Number of initial medians: ', len(medians))
+			total_itr = 0
+			num_started_descents = 0
+			for itr in self.__itrs:
+				total_itr += itr
+				if itr > 0:
+					num_started_descents += 1
+			print('Size of graph collection: ', len(graph_ids))
+			print('Number of started descents: ', num_started_descents)
+			print('Number of converged descents: ', self.__num_converged_descents)
+			print('Overall number of iterations: ', total_itr)
+			print('Overall number of times the order decreased: ', self.__num_decrease_order)
+			print('Overall number of times the order increased: ', self.__num_increase_order)
+			print('===========================================================\n')
+	
+	
+	def get_sum_of_distances(self, state=''):
+		"""Returns the sum of distances.
+		
+		Parameters
+		----------
+		state : string
+			The state of the estimator. Can be 'initialized' or 'converged'. Default: ""
+			
+		Returns
+		-------
+		float
+			The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned.
+		"""
+		if not self.__median_available():
+			raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().')
+		if state == 'initialized':
+			return self.__best_init_sum_of_distances
+		if state == 'converged':
+			return self.__converged_sum_of_distances
+		return self.__sum_of_distances
+	
+	
+	def __set_default_options(self):
+		self.__init_type = 'RANDOM'
+		self.__num_random_inits = 10
+		self.__desired_num_random_inits = 10
+		self.__use_real_randomness = True
+		self.__seed = 0
+		self.__refine = True
+		self.__time_limit_in_sec = 0
+		self.__epsilon = 0.0001
+		self.__max_itrs = 100
+		self.__max_itrs_without_update = 3
+		self.__num_inits_increase_order = 10
+		self.__init_type_increase_order = 'K-MEANS++'
+		self.__max_itrs_increase_order = 10
+		self.__print_to_stdout = 2
+		
+		
+	def __construct_initial_medians(self, graph_ids, timer, initial_medians):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n===========================================================')
+			print('Constructing initial median(s).')
+			print('-----------------------------------------------------------')
+			
+		# Compute or sample the initial median(s).
+		initial_medians.clear()
+		if self.__init_type == 'MEDOID':
+			self.__compute_medoid(graph_ids, timer, initial_medians)
+		elif self.__init_type == 'MAX':
+			pass # @todo
+# 			compute_max_order_graph_(graph_ids, initial_medians)
+		elif self.__init_type == 'MIN':
+			pass # @todo
+# 			compute_min_order_graph_(graph_ids, initial_medians)
+		elif self.__init_type == 'MEAN':
+			pass # @todo
+# 			compute_mean_order_graph_(graph_ids, initial_medians)
+		else:
+			pass # @todo
+# 			sample_initial_medians_(graph_ids, initial_medians)
+
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('===========================================================')
+			
+			
+	def __compute_medoid(self, graph_ids, timer, initial_medians):
+		# Use method selected for initialization phase.
+		self.__ged_env.set_method(self.__init_method, self.__init_options)
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout)
+			
+		# Compute the medoid.
+		medoid_id = graph_ids[0]
+		best_sum_of_distances = np.inf
+		for g_id in graph_ids:
+			if timer.expired():
+				self.__state = AlgorithmState.CALLED
+				break
+			sum_of_distances = 0
+			for h_id in graph_ids:
+				self.__ged_env.run_method(g_id, h_id)
+				sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
+			if sum_of_distances < best_sum_of_distances:
+				best_sum_of_distances = sum_of_distances
+				medoid_id = g_id
+				
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress.update(1)
+		initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n')
+			
+		
+	def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
+		if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
+			if self.__state == AlgorithmState.TERMINATED:
+				self.__state = AlgorithmState.INITIALIZED
+			return True
+		return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
+	
+	
+	def __update_median(self, graphs, median):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('Updating median: ', end='')
+			
+		# Store copy of the old median.
+		old_median = median.copy() # @todo: this is just a shallow copy.
+		
+		# Update the node labels.
+		if self.__labeled_nodes:
+			self.__update_node_labels(graphs, median)
+			
+		# Update the edges and their labels.
+		self.__update_edges(graphs, median)
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('done.')
+			
+		return not self.__are_graphs_equal(median, old_median)
+		
+		
+	def __update_node_labels(self, graphs, median):
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('nodes ... ', end='')
+			
+		# Iterate through all nodes of the median.
+		for i in range(0, nx.number_of_nodes(median)):
+# 			print('i: ', i)
+			# Collect the labels of the substituted nodes.
+			node_labels = []
+			for graph_id, graph in graphs.items():
+# 				print('graph_id: ', graph_id)
+# 				print(self.__node_maps_from_median[graph_id])
+				k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
+# 				print('k: ', k)
+				if k != np.inf:
+					node_labels.append(graph.nodes[k])
+					
+			# Compute the median label and update the median.
+			if len(node_labels) > 0:
+				median_label = self.__ged_env.get_median_node_label(node_labels)
+				if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
+					nx.set_node_attributes(median, {i: median_label})
+					
+					
+	def __update_edges(self, graphs, median):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('edges ... ', end='')
+			
+		# Clear the adjacency lists of the median and reset number of edges to 0.
+		median_edges = list(median.edges)		
+		for (head, tail) in median_edges:
+			median.remove_edge(head, tail)
+		
+		# @todo: what if edge is not labeled?
+		# Iterate through all possible edges (i,j) of the median.
+		for i in range(0, nx.number_of_nodes(median)):
+			for j in range(i + 1, nx.number_of_nodes(median)):
+				
+				# Collect the labels of the edges to which (i,j) is mapped by the node maps.
+				edge_labels = []
+				for graph_id, graph in graphs.items():
+					k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
+					l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
+					if k != np.inf and l != np.inf:
+						if graph.has_edge(k, l):
+							edge_labels.append(graph.edges[(k, l)])
+							
+				# Compute the median edge label and the overall edge relabeling cost.
+				rel_cost = 0
+				median_label = self.__ged_env.get_edge_label(1)
+				if median.has_edge(i, j):
+					median_label = median.edges[(i, j)]
+				if self.__labeled_edges and len(edge_labels) > 0:
+					new_median_label = self.__ged_env.median_edge_label(edge_labels)
+					if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
+						median_label = new_median_label
+					for edge_label in edge_labels:
+						rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
+						
+				# Update the median.
+				if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
+					median.add_edge(i, j, **median_label)
+				else:
+					if median.has_edge(i, j):
+						median.remove_edge(i, j)
+
+
+	def __update_node_maps(self):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
+			
+		# Update the node maps.
+		node_maps_were_modified = False
+		for graph_id in self.__node_maps_from_median:
+			self.__ged_env.run_method(self.__median_id, graph_id)
+			if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above.
+				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned.
+				node_maps_were_modified = True
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress.update(1)
+			
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n')
+			
+		# Return true if the node maps were modified.
+		return node_maps_were_modified
+	
+	
+	def __improve_sum_of_distances(self, timer):
+		pass
+	
+	
+	def __median_available(self):
+		return self.__median_id != np.inf
+		
+				
+	def __get_node_image_from_map(self, node_map, node):
+		"""
+		Return ID of the node mapping of `node` in `node_map`.
+
+		Parameters
+		----------
+		node_map : list[tuple(int, int)]
+			List of node maps where the mapping node is found.
+		
+		node : int
+			The mapping node of this node is returned
+
+		Raises
+		------
+		Exception
+			If the node with ID `node` is not contained in the source nodes of the node map.
+
+		Returns
+		-------
+		int
+			ID of the mapping of `node`.
+			
+		Notes
+		-----
+		This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
+		"""
+		if node < len(node_map):
+			return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
+		else:
+ 			raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
+		return np.inf
+				
+	
+	def __are_graphs_equal(self, g1, g2):
+		"""
+		Check if the two graphs are equal.
+
+		Parameters
+		----------
+		g1 : NetworkX graph object
+			Graph 1 to be compared.
+		
+		g2 : NetworkX graph object
+			Graph 2 to be compared.
+
+		Returns
+		-------
+		bool
+			True if the two graph are equal.
+			
+		Notes
+		-----
+		This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere.
+		"""
+		# check original node ids.
+		if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']:
+			return False
+		# check nodes.
+		nlist1 = [n for n in g1.nodes(data=True)]
+		nlist2 = [n for n in g2.nodes(data=True)]
+		if not nlist1 == nlist2:
+			return False
+		# check edges.
+		elist1 = [n for n in g1.edges(data=True)]
+		elist2 = [n for n in g2.edges(data=True)]
+		if not elist1 == elist2:
+			return False
+
+		return True
+	
+	
+	def compute_my_cost(g, h, node_map):
+		cost = 0.0
+		for node in g.nodes:
+			cost += 0
+		
\ No newline at end of file
diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py
new file mode 100644
index 0000000..dfbaef2
--- /dev/null
+++ b/gklearn/preimage/median_preimage_generator.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 26 18:27:22 2020
+
+@author: ljia
+"""
+from gklearn.preimage.preimage_generator import PreimageGenerator
+# from gklearn.utils.dataset import Dataset
+
+class MedianPreimageGenerator(PreimageGenerator):
+	
+	def __init__(self, mge, dataset):
+		self.__mge = mge
+		self.__dataset = dataset
\ No newline at end of file
diff --git a/gklearn/preimage/misc.py b/gklearn/preimage/misc.py
new file mode 100644
index 0000000..18682c8
--- /dev/null
+++ b/gklearn/preimage/misc.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 19 18:13:56 2020
+
+@author: ljia
+"""
+
+def options_string_to_options_map(options_string):
+    """Transforms an options string into an options map.
+    
+    Parameters
+    ----------
+    options_string : string
+        Options string of the form "[--<option> <arg>] [...]".
+    
+    Return
+    ------
+    options_map : dict{string : string}
+        Map with one key-value pair (<option>, <arg>) for each option contained in the string.
+    """
+    if options_string == '':
+        return
+    options_map = {}
+    words = []
+    tokenize(options_string, ' ', words)
+    expect_option_name = True
+    for word in words:
+        if expect_option_name:
+            is_opt_name, word = is_option_name(word)
+            if is_opt_name:
+                option_name = word
+                if option_name in options_map:
+                    raise Exception('Multiple specification of option "' + option_name + '".')
+                options_map[option_name] = ''
+            else:
+                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
+        else:
+            is_opt_name, word = is_option_name(word)
+            if is_opt_name:
+                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
+            else:
+                options_map[option_name] = word
+        expect_option_name = not expect_option_name
+    return options_map
+    
+
+def tokenize(sentence, sep, words):
+    """Separates a sentence into words separated by sep (unless contained in single quotes).
+    
+    Parameters
+    ----------
+    sentence : string
+        The sentence that should be tokenized.
+        
+    sep : string 
+        The separator. Must be different from "'".
+        
+    words : list[string]
+        The obtained words.
+    """
+    outside_quotes = True
+    word_length = 0
+    pos_word_start = 0
+    for pos in range(0, len(sentence)):
+        if sentence[pos] == '\'':
+            if not outside_quotes and pos < len(sentence) - 1:
+                if sentence[pos + 1] != sep:
+                    raise Exception('Sentence contains closing single quote which is followed by a char different from ' + sep + '.')
+            word_length += 1
+            outside_quotes = not outside_quotes
+        elif outside_quotes and sentence[pos] == sep:
+            if word_length > 0:
+                words.append(sentence[pos_word_start:pos_word_start + word_length])
+            pos_word_start = pos + 1
+            word_length = 0
+        else:
+            word_length += 1
+    if not outside_quotes:
+        raise Exception('Sentence contains unbalanced single quotes.')
+    if word_length > 0:
+        words.append(sentence[pos_word_start:pos_word_start + word_length])
+
+
+def is_option_name(word):
+    """Checks whether a word is an option name and, if so, removes the leading dashes.
+    
+    Parameters
+    ----------
+    word : string
+        Word.
+        
+    return
+    ------
+    True if word is of the form "--<option>".
+    
+    word : string
+        The word without the leading dashes.
+    """
+    if word[0] == '\'':
+        word = word[1:len(word) - 2]
+        return False, word
+    if len(word) < 3:
+        return False, word
+    if word[0] == '-' and word[1] == '-' and word[2] != '-':
+        word = word[2:]
+        return True, word
+    return False, word
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_generator.py b/gklearn/preimage/preimage_generator.py
new file mode 100644
index 0000000..51fb43b
--- /dev/null
+++ b/gklearn/preimage/preimage_generator.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 26 18:26:36 2020
+
+@author: ljia
+"""
+
+class PreimageGenerator(object):
+	
+	def __init__(self):
+		pass
diff --git a/gklearn/preimage/python_code.py b/gklearn/preimage/python_code.py
new file mode 100644
index 0000000..3772526
--- /dev/null
+++ b/gklearn/preimage/python_code.py
@@ -0,0 +1,122 @@
+		elif opt_name == 'random-inits':
+			try:
+				num_random_inits_ = std::stoul(opt_val)
+				desired_num_random_inits_ = num_random_inits_
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+			if num_random_inits_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+		}
+		elif opt_name == 'randomness':
+			if opt_val == 'PSEUDO':
+				use_real_randomness_ = False
+
+			elif opt_val == 'REAL':
+				use_real_randomness_ = True
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
+
+		}
+		elif opt_name == 'stdout':
+			if opt_val == '0':
+				print_to_stdout_ = 0
+
+			elif opt_val == '1':
+				print_to_stdout_ = 1
+
+			elif opt_val == '2':
+				print_to_stdout_ = 2
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
+
+		}
+		elif opt_name == 'refine':
+			if opt_val == 'TRUE':
+				refine_ = True
+
+			elif opt_val == 'FALSE':
+				refine_ = False
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
+
+		}
+		elif opt_name == 'time-limit':
+			try:
+				time_limit_in_sec_ = std::stod(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
+
+		}
+		elif opt_name == 'max-itrs':
+			try:
+				max_itrs_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
+
+		}
+		elif opt_name == 'max-itrs-without-update':
+			try:
+				max_itrs_without_update_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
+
+		}
+		elif opt_name == 'seed':
+			try:
+				seed_ = std::stoul(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
+
+		}
+		elif opt_name == 'epsilon':
+			try:
+				epsilon_ = std::stod(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+
+			if epsilon_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+
+		}
+		elif opt_name == 'inits-increase-order':
+			try:
+				num_inits_increase_order_ = std::stoul(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+			if num_inits_increase_order_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+		}
+		elif opt_name == 'init-type-increase-order':
+			init_type_increase_order_ = opt_val
+			if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
+				raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
+
+		}
+		elif opt_name == 'max-itrs-increase-order':
+			try:
+				max_itrs_increase_order_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
+
+		}
+		else:
+			std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ')
+			valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
+			valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
+			raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"')
+
diff --git a/gklearn/preimage/test_median_graph_estimator.py b/gklearn/preimage/test_median_graph_estimator.py
new file mode 100644
index 0000000..2a930df
--- /dev/null
+++ b/gklearn/preimage/test_median_graph_estimator.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 16 17:26:40 2020
+
+@author: ljia
+"""
+	
+def test_median_graph_estimator():
+	from gklearn.utils.graphfiles import loadDataset
+	from gklearn.preimage.median_graph_estimator import MedianGraphEstimator
+	from gklearn.gedlib import librariesImport, gedlibpy
+	from gklearn.preimage.utils import get_same_item_indices
+	from gklearn.preimage.ged import convertGraph
+	import multiprocessing
+
+	# estimator parameters.
+	init_type = 'MEDOID'
+	num_inits = 1
+	threads = multiprocessing.cpu_count()
+	time_limit = 60000
+	
+	# algorithm parameters.
+	algo = 'IPFP'
+	initial_solutions = 40
+	algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
+
+	edit_cost_name = 'LETTER2'
+	edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
+	ds_name = 'COIL-DEL'
+	
+	# Load dataset.
+	# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
+	dataset = '../../datasets/Letter-high/Letter-high_A.txt'
+	Gn, y_all = loadDataset(dataset)
+	y_idx = get_same_item_indices(y_all)
+	for i, (y, values) in enumerate(y_idx.items()):
+		Gn_i = [Gn[val] for val in values]
+		break
+	
+	# Set up the environment.
+	ged_env = gedlibpy.GEDEnv()
+	# gedlibpy.restart_env()
+	ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
+	for G in Gn_i:
+		ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '')
+	graph_ids = ged_env.get_all_graph_ids()
+	set_median_id = ged_env.add_graph('set_median')
+	gen_median_id = ged_env.add_graph('gen_median')
+	ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
+	
+	# Set up the estimator.
+	mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
+	mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
+	
+	mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
+	mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1'  + ' --refine FALSE'# @todo: std::to_string(rng())
+	
+	# Select the GED algorithm.
+	algo_options = '--threads ' + str(threads) + algo_options_suffix
+	mge.set_options(mge_options)
+	mge.set_init_method(algo, algo_options)
+	mge.set_descent_method(algo, algo_options)
+	
+	# Run the estimator.
+	mge.run(graph_ids, set_median_id, gen_median_id)
+	
+	# Get SODs.
+	sod_sm = mge.get_sum_of_distances('initialized')
+	sod_gm = mge.get_sum_of_distances('converged')
+	print('sod_sm, sod_gm: ', sod_sm, sod_gm)
+	
+	# Get median graphs.
+	set_median = ged_env.get_nx_graph(set_median_id)
+	gen_median = ged_env.get_nx_graph(gen_median_id)
+	
+	return set_median, gen_median
+	
+
+
+def constant_node_costs(edit_cost_name):
+	if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER':
+		return False
+#	 elif edit_cost_name != '':
+# # 		throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>");
+#		 return False
+	# return True
+
+
+if __name__ == '__main__':
+	set_median, gen_median = test_median_graph_estimator()
\ No newline at end of file
diff --git a/gklearn/preimage/timer.py b/gklearn/preimage/timer.py
new file mode 100644
index 0000000..b1cecec
--- /dev/null
+++ b/gklearn/preimage/timer.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 23 09:52:50 2020
+
+@author: ljia
+"""
+import time
+
+class Timer(object):
+	"""A timer class that can be used by methods that support time limits.
+	
+	Note
+	----
+	This is the Python implementation of `the C++ code in GEDLIB <https://github.com/dbblumenthal/gedlib/blob/master/src/env/timer.hpp>`__.
+	"""
+	
+	def __init__(self, time_limit_in_sec):
+		"""Constructs a timer for a given time limit.
+		
+		Parameters
+		----------
+		time_limit_in_sec : string
+			The time limit in seconds.
+		"""		
+		self.__time_limit_in_sec = time_limit_in_sec
+		self.__start_time = time.time()
+	
+	
+	def expired(self):
+		"""Checks if the time limit has expired. 
+		
+		Return
+		------
+		Boolean true if the time limit has expired and false otherwise.
+"""
+		if self.__time_limit_in_sec > 0:
+			runtime = time.time() - self.__start_time
+			return runtime >= self.__time_limit_in_sec
+		return False
\ No newline at end of file
diff --git a/gklearn/preimage/xp_fit_method.py b/gklearn/preimage/xp_fit_method.py
index 996108b..ead2786 100644
--- a/gklearn/preimage/xp_fit_method.py
+++ b/gklearn/preimage/xp_fit_method.py
@@ -12,6 +12,7 @@ from shutil import copyfile
 import networkx as nx
 import matplotlib.pyplot as plt
 import os
+import time
 
 from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
 from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
@@ -69,6 +70,10 @@ def get_dataset(ds_name):
         Gn, y_all = loadDataset(dataset)
     elif ds_name == 'Synthie':
         pass
+    elif ds_name == 'COIL-DEL':
+        dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
+        Gn, y_all = loadDataset(dataset)
     elif ds_name == 'COIL-RAG':
         pass
     elif ds_name == 'COLORS-3':
@@ -109,7 +114,8 @@ def init_output_file(ds_name, gkernel, fit_method, dir_output):
 
 
 def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
-                                   Gn_data=None, k_dis_data=None, Kmatrix=None):
+                                   Gn_data=None, k_dis_data=None, Kmatrix=None,
+                                   is_separate=False):
     
     # 1. set parameters.
     print('1. setting parameters...')
@@ -142,11 +148,12 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
         dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, 
             None, Kmatrix=Kmatrix, gkernel=gkernel)
     else:
-        dis_mat = k_dis_data[0]
-        dis_max = k_dis_data[1]
-        dis_min = k_dis_data[2]
-        dis_mean = k_dis_data[3]
-        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
+#        dis_mat = k_dis_data[0]
+#        dis_max = k_dis_data[1]
+#        dis_min = k_dis_data[2]
+#        dis_mean = k_dis_data[3]
+#        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
+        pass
 
 
     if save_results:
@@ -213,8 +220,11 @@ def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_soluti
             
             # get Gram matrix for this part of data.
             if Kmatrix is not None:
-                Kmatrix_sub = Kmatrix[values,:]
-                Kmatrix_sub = Kmatrix_sub[:,values]
+                if is_separate:
+                    Kmatrix_sub = Kmatrix[i].copy()
+                else:
+                    Kmatrix_sub = Kmatrix[values,:]
+                    Kmatrix_sub = Kmatrix_sub[:,values]
             else:
                 Kmatrix_sub = None
             
@@ -395,7 +405,48 @@ def draw_Letter_graph(graph, file_prefix):
     plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
 #    plt.show()
     plt.clf()
-        
+    
+    
+def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
+    
+    if is_separate:
+        print('the Gram matrix is computed for each class.')
+        y_idx = get_same_item_indices(y_all)
+        Kmatrix = []
+        run_time = []
+        k_dis_data = []
+        for i, (y, values) in enumerate(y_idx.items()):
+            print('The ', str(i), ' class:')
+            Gn_i = [Gn[val] for val in values]
+            time0 = time.time()            
+            Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
+            run_time.append(time.time() - time0)
+            k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, 
+                Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
+        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
+        dis_max = np.max([item[1] for item in k_dis_data])
+        dis_min = np.min([item[2] for item in k_dis_data])
+        dis_mean = np.mean([item[3] for item in k_dis_data])
+        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
+              dis_mean)
+
+    else:
+        time0 = time.time()
+        Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
+        run_time = time.time() - time0
+        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
+        k_dis_data = kernel_distance_matrix(Gn, None, None, 
+            Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+        print('the Gram matrix is computed for the whole dataset.')
+        print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], 
+              k_dis_data[2], k_dis_data[3])
+    
+    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
+    return Kmatrix, run_time, k_dis_data
+    
 
 if __name__ == "__main__":
 #    #### xp 1: Letter-high, spkernel.
@@ -573,7 +624,7 @@ if __name__ == "__main__":
 #                                       Kmatrix=Kmatrix)
     
     
-#    #### xp 5: Fingerprint, sspkernel, using LETTER2.
+#    #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
 #    # load dataset.
 #    print('getting dataset and computing kernel distance matrix first...')
 #    ds_name = 'Fingerprint'
@@ -593,17 +644,17 @@ if __name__ == "__main__":
 #            del G.edges[edge]['attributes']
 #            del G.edges[edge]['orient']
 #            del G.edges[edge]['angle']
-#    Gn = Gn[805:815]
-#    y_all = y_all[805:815]
+##    Gn = Gn[805:815]
+##    y_all = y_all[805:815]
 #    for G in Gn:
 #        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
 #            
 #    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
+##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+##         Kmatrix=Kmatrix)
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
 ##    run_time = gmfile['run_time']
 ##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 ##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
@@ -612,11 +663,7 @@ if __name__ == "__main__":
 #        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 ##    Kmatrix = np.zeros((len(Gn), len(Gn)))
 ##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#            
-#    # compute pair distances.
-##    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-##        Kmatrix=None, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
 #    # fitting and computing.
 #    fit_methods = ['k-graphs', 'random', 'random', 'random']
 #    for fit_method in fit_methods:
@@ -627,7 +674,8 @@ if __name__ == "__main__":
 #                      'edit_cost_name': 'LETTER2',
 #                      'ged_method': 'mIPFP',
 #                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
+#                      'fit_method': fit_method,
+#                      'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
 #        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
 #                                       initial_solutions=40,
 #                                       Gn_data = [Gn, y_all, graph_dir],
@@ -773,38 +821,102 @@ if __name__ == "__main__":
 #                                       Kmatrix=Kmatrix)
         
 
-    #### xp 9: Letter-low, spkernel.
+#    #### xp 9: Letter-low, spkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-low'
+#    gkernel = 'spkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without nodes and edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
+#          and nx.number_of_edges(G) != 0)]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    
+#    # compute/read Gram matrix and pair distances.
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+##    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+        
+    #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
     # load dataset.
     print('getting dataset and computing kernel distance matrix first...')
-    ds_name = 'Letter-low'
-    gkernel = 'spkernel'
+    ds_name = 'COIL-DEL'
+    gkernel = 'structuralspkernel'
     Gn, y_all, graph_dir = get_dataset(ds_name)
     # remove graphs without nodes and edges.
-    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-          and nx.number_of_edges(G) != 0)]
+    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
+#          and nx.number_of_edges(G) != 0)]
     idx = [G[0] for G in Gn]
     Gn = [G[1] for G in Gn]
     y_all = [y_all[i] for i in idx]
-#    Gn = Gn[0:50]
-#    y_all = y_all[0:50]
-    
+    # remove unused labels.
+    for G in Gn:
+        G.graph['edge_labels'] = []
+        for edge in G.edges:
+            del G.edges[edge]['bond_type']
+            del G.edges[edge]['valence']
+#    Gn = Gn[805:815]
+#    y_all = y_all[805:815]
+    for G in Gn:
+        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
+            
     # compute/read Gram matrix and pair distances.
-    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-         Kmatrix=Kmatrix)
+    is_separate = True
+    Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, 
+                                                              y_all, 
+                                                              gkernel, 
+                                                              parallel='imap_unordered',
+                                                              is_separate=is_separate)
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
 #    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
 #    Kmatrix = gmfile['Kmatrix']
 #    run_time = gmfile['run_time']
 #    Kmatrix = Kmatrix[[0,1,2,3,4],:]
 #    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
 #    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
 #    Kmatrix = np.zeros((len(Gn), len(Gn)))
 #    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
     
     # fitting and computing.
-    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+    fit_methods = ['k-graphs', 'random', 'random', 'random']
     for fit_method in fit_methods:
         print('\n-------------------------------------')
         print('fit method:', fit_method)
@@ -814,10 +926,10 @@ if __name__ == "__main__":
                       'ged_method': 'mIPFP',
                       'attr_distance': 'euclidean',
                       'fit_method': fit_method,
-                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
-        print('parameters: ', parameters)
+                      'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
         xp_fit_method_for_non_symbolic(parameters, save_results=True, 
                                        initial_solutions=40,
-                                       Gn_data = [Gn, y_all, graph_dir],
-                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-                                       Kmatrix=Kmatrix)
\ No newline at end of file
+                                       Gn_data=[Gn, y_all, graph_dir],
+                                       k_dis_data=k_dis_data,
+                                       Kmatrix=Kmatrix, 
+                                       is_separate=is_separate)
\ No newline at end of file