From dcff21cda7827a46f9e5c15b71364cb240d56598 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 27 Mar 2020 10:46:00 +0100
Subject: [PATCH] Revert "clear repo: remove preimage."

This reverts commit 5fe81a932b96b647d773939175784bce5f703413.
---
 gklearn/preimage/common_types.py                |  17 +
 gklearn/preimage/cpp2python.py                  | 134 ++++
 gklearn/preimage/find_best_k.py                 | 170 +++++
 gklearn/preimage/fitDistance.py                 | 430 +++++++++++
 gklearn/preimage/ged.py                         | 467 ++++++++++++
 gklearn/preimage/iam.py                         | 775 +++++++++++++++++++
 gklearn/preimage/knn.py                         | 114 +++
 gklearn/preimage/libs.py                        |   6 +
 gklearn/preimage/median.py                      | 218 ++++++
 gklearn/preimage/median_benoit.py               | 201 +++++
 gklearn/preimage/median_graph_estimator.py      | 826 ++++++++++++++++++++
 gklearn/preimage/median_linlin.py               | 215 ++++++
 gklearn/preimage/median_preimage_generator.py   |  15 +
 gklearn/preimage/misc.py                        | 108 +++
 gklearn/preimage/pathfrequency.py               | 201 +++++
 gklearn/preimage/preimage_generator.py          |  12 +
 gklearn/preimage/preimage_iam.py                | 705 +++++++++++++++++
 gklearn/preimage/preimage_random.py             | 309 ++++++++
 gklearn/preimage/python_code.py                 | 122 +++
 gklearn/preimage/test.py                        |  83 ++
 gklearn/preimage/test_fitDistance.py            | 648 ++++++++++++++++
 gklearn/preimage/test_ged.py                    | 520 +++++++++++++
 gklearn/preimage/test_iam.py                    | 964 ++++++++++++++++++++++++
 gklearn/preimage/test_k_closest_graphs.py       | 462 ++++++++++++
 gklearn/preimage/test_median_graph_estimator.py |  91 +++
 gklearn/preimage/test_others.py                 | 686 +++++++++++++++++
 gklearn/preimage/test_preimage_iam.py           | 620 +++++++++++++++
 gklearn/preimage/test_preimage_mix.py           | 539 +++++++++++++
 gklearn/preimage/test_preimage_random.py        | 398 ++++++++++
 gklearn/preimage/timer.py                       |  40 +
 gklearn/preimage/utils.py                       | 151 ++++
 gklearn/preimage/visualization.py               | 585 ++++++++++++++
 gklearn/preimage/xp_fit_method.py               | 935 +++++++++++++++++++++++
 gklearn/preimage/xp_letter_h.py                 | 476 ++++++++++++
 gklearn/preimage/xp_monoterpenoides.py          | 249 ++++++
 35 files changed, 12492 insertions(+)
 create mode 100644 gklearn/preimage/common_types.py
 create mode 100644 gklearn/preimage/cpp2python.py
 create mode 100644 gklearn/preimage/find_best_k.py
 create mode 100644 gklearn/preimage/fitDistance.py
 create mode 100644 gklearn/preimage/ged.py
 create mode 100644 gklearn/preimage/iam.py
 create mode 100644 gklearn/preimage/knn.py
 create mode 100644 gklearn/preimage/libs.py
 create mode 100644 gklearn/preimage/median.py
 create mode 100644 gklearn/preimage/median_benoit.py
 create mode 100644 gklearn/preimage/median_graph_estimator.py
 create mode 100644 gklearn/preimage/median_linlin.py
 create mode 100644 gklearn/preimage/median_preimage_generator.py
 create mode 100644 gklearn/preimage/misc.py
 create mode 100644 gklearn/preimage/pathfrequency.py
 create mode 100644 gklearn/preimage/preimage_generator.py
 create mode 100644 gklearn/preimage/preimage_iam.py
 create mode 100644 gklearn/preimage/preimage_random.py
 create mode 100644 gklearn/preimage/python_code.py
 create mode 100644 gklearn/preimage/test.py
 create mode 100644 gklearn/preimage/test_fitDistance.py
 create mode 100644 gklearn/preimage/test_ged.py
 create mode 100644 gklearn/preimage/test_iam.py
 create mode 100644 gklearn/preimage/test_k_closest_graphs.py
 create mode 100644 gklearn/preimage/test_median_graph_estimator.py
 create mode 100644 gklearn/preimage/test_others.py
 create mode 100644 gklearn/preimage/test_preimage_iam.py
 create mode 100644 gklearn/preimage/test_preimage_mix.py
 create mode 100644 gklearn/preimage/test_preimage_random.py
 create mode 100644 gklearn/preimage/timer.py
 create mode 100644 gklearn/preimage/utils.py
 create mode 100644 gklearn/preimage/visualization.py
 create mode 100644 gklearn/preimage/xp_fit_method.py
 create mode 100644 gklearn/preimage/xp_letter_h.py
 create mode 100644 gklearn/preimage/xp_monoterpenoides.py

diff --git a/gklearn/preimage/common_types.py b/gklearn/preimage/common_types.py
new file mode 100644
index 0000000..2face25
--- /dev/null
+++ b/gklearn/preimage/common_types.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 19 18:17:38 2020
+
+@author: ljia
+"""
+
+from enum import Enum, auto
+
+class AlgorithmState(Enum):
+    """can be used to specify the state of an algorithm.
+    """
+    CALLED = auto # The algorithm has been called.
+    INITIALIZED = auto # The algorithm has been initialized.
+    CONVERGED = auto # The algorithm has converged.
+    TERMINATED = auto # The algorithm has terminated.
\ No newline at end of file
diff --git a/gklearn/preimage/cpp2python.py b/gklearn/preimage/cpp2python.py
new file mode 100644
index 0000000..9d63026
--- /dev/null
+++ b/gklearn/preimage/cpp2python.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Mar 20 11:09:04 2020
+
+@author: ljia
+"""
+import re
+
+def convert_function(cpp_code):
+# f_cpp = open('cpp_code.cpp', 'r')
+# # f_cpp = open('cpp_ext/src/median_graph_estimator.ipp', 'r')
+# 	cpp_code = f_cpp.read()
+	python_code = cpp_code.replace('else if (', 'elif ')
+	python_code = python_code.replace('if (', 'if ')
+	python_code = python_code.replace('else {', 'else:')
+	python_code = python_code.replace(') {', ':')
+	python_code = python_code.replace(';\n', '\n')
+	python_code = re.sub('\n(.*)}\n', '\n\n', python_code)
+	# python_code = python_code.replace('}\n', '')
+	python_code = python_code.replace('throw', 'raise')
+	python_code = python_code.replace('error', 'Exception')
+	python_code = python_code.replace('"', '\'')
+	python_code = python_code.replace('\\\'', '"')
+	python_code = python_code.replace('try {', 'try:')
+	python_code = python_code.replace('true', 'True')
+	python_code = python_code.replace('false', 'False')
+	python_code = python_code.replace('catch (...', 'except')
+	# python_code = re.sub('std::string\(\'(.*)\'\)', '$1', python_code)
+	
+	return python_code
+
+
+
+# # python_code = python_code.replace('}\n', '')
+
+
+
+
+# python_code = python_code.replace('option.first', 'opt_name')
+# python_code = python_code.replace('option.second', 'opt_val')
+# python_code = python_code.replace('ged::Error', 'Exception')
+# python_code = python_code.replace('std::string(\'Invalid argument "\')', '\'Invalid argument "\'')
+
+
+# f_cpp.close()
+# f_python = open('python_code.py', 'w')
+# f_python.write(python_code)
+# f_python.close()
+
+
+def convert_function_comment(cpp_fun_cmt, param_types):
+	cpp_fun_cmt = cpp_fun_cmt.replace('\t', '')
+	cpp_fun_cmt = cpp_fun_cmt.replace('\n * ', ' ')
+	# split the input comment according to key words.
+	param_split = None
+	note = None
+	cmt_split = cpp_fun_cmt.split('@brief')[1]
+	brief = cmt_split
+	if '@param' in cmt_split:
+		cmt_split = cmt_split.split('@param')
+		brief = cmt_split[0]
+		param_split = cmt_split[1:]
+	if '@note' in cmt_split[-1]:
+		note_split = cmt_split[-1].split('@note')
+		if param_split is not None:
+			param_split.pop()
+			param_split.append(note_split[0])
+		else:
+			brief = note_split[0]
+		note = note_split[1]
+		
+	# get parameters.
+	if param_split is not None:
+		for idx, param in enumerate(param_split):
+			_, param_name, param_desc = param.split(' ', 2)
+			param_name = function_comment_strip(param_name, ' *\n\t/')
+			param_desc = function_comment_strip(param_desc, ' *\n\t/')
+			param_split[idx] = (param_name, param_desc)
+		
+	# strip comments.
+	brief = function_comment_strip(brief, ' *\n\t/')
+	if note is not None:
+		note = function_comment_strip(note, ' *\n\t/')
+		
+	# construct the Python function comment.
+	python_fun_cmt = '"""'
+	python_fun_cmt += brief + '\n'
+	if param_split is not None and len(param_split) > 0:
+		python_fun_cmt += '\nParameters\n----------'
+		for idx, param in enumerate(param_split):
+			python_fun_cmt += '\n' + param[0] + ' : ' + param_types[idx]
+			python_fun_cmt += '\n\t' + param[1] + '\n'
+	if note is not None:
+		python_fun_cmt += '\nNote\n----\n' + note + '\n'
+	python_fun_cmt += '"""'
+	
+	return python_fun_cmt
+			
+		
+def function_comment_strip(comment, bad_chars):
+	head_removed, tail_removed = False, False
+	while not head_removed or not tail_removed:
+		if comment[0] in bad_chars:
+			comment = comment[1:]
+			head_removed = False
+		else:
+			head_removed = True
+		if comment[-1] in bad_chars:
+			comment = comment[:-1]
+			tail_removed = False
+		else:
+			tail_removed = True
+			
+	return comment
+
+		
+if __name__ == '__main__':
+#  	python_code = convert_function("""
+# 		if (print_to_stdout_ == 2) {
+# 			std::cout << "\n===========================================================\n";
+# 			std::cout << "Block gradient descent for initial median " << median_pos + 1 << " of " << medians.size() << ".\n";
+# 			std::cout << "-----------------------------------------------------------\n";
+# 		}
+# 								""")
+	
+	
+ 	python_fun_cmt = convert_function_comment("""
+	/*!
+	 * @brief Returns the sum of distances.
+	 * @param[in] state The state of the estimator.
+	 * @return The sum of distances of the median when the estimator was in the state @p state during the last call to run().
+	 */
+						""", ['string', 'string'])
\ No newline at end of file
diff --git a/gklearn/preimage/find_best_k.py b/gklearn/preimage/find_best_k.py
new file mode 100644
index 0000000..df38d32
--- /dev/null
+++ b/gklearn/preimage/find_best_k.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jan  9 11:54:32 2020
+
+@author: ljia
+"""
+import numpy as np
+import random
+import csv
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs
+
+def find_best_k():
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'treeletkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    ds_name = 'mono'
+    dir_output = 'results/test_find_best_k/'
+    
+    repeats = 50
+    k_list = range(2, 11)
+    fit_method = 'k-graphs'
+    # fitted on the whole dataset - treelet - mono
+    edit_costs = [0.1268873773592978, 0.004084633224249829, 0.0897581955378986, 0.15328856114451297, 0.3109956881625734, 0.0]
+    
+    # create result files.
+    fn_output_detail = 'results_detail.' + fit_method + '.csv'
+    f_detail = open(dir_output + fn_output_detail, 'a')
+    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+              'repeat', 'median set', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM'])
+    f_detail.close()
+    fn_output_summary = 'results_summary.csv'
+    f_summary = open(dir_output + fn_output_summary, 'a')
+    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+              'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
+              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
+              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
+              'repeats better dis_k gi -> GM'])
+    f_summary.close()
+    
+    random.seed(1)
+    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
+    
+    for k in k_list:
+        print('\n--------- k =', k, '----------')
+        
+        sod_sm_list = []
+        sod_gm_list = []
+        dis_k_sm_list = []
+        dis_k_gm_list = []
+        dis_k_gi_min_list = []
+        nb_sod_sm2gm = [0, 0, 0]
+        nb_dis_k_sm2gm = [0, 0, 0]
+        nb_dis_k_gi2sm = [0, 0, 0]
+        nb_dis_k_gi2gm = [0, 0, 0]
+        repeats_better_sod_sm2gm = []
+        repeats_better_dis_k_sm2gm = []
+        repeats_better_dis_k_gi2sm = []
+        repeats_better_dis_k_gi2gm = []
+        
+        
+        for repeat in range(repeats):
+            print('\nrepeat =', repeat)
+            random.seed(rdn_seed_list[repeat])
+            median_set_idx = random.sample(range(0, len(Gn)), k)
+            print('median set: ', median_set_idx)
+            
+            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
+                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
+                                             fit_method='k-graphs', 
+                                             edit_costs=edit_costs,
+                                             group_min=median_set_idx,
+                                             parallel=False)
+                
+            # write result detail.
+            sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
+            dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
+            dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
+            dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
+            f_detail = open(dir_output + fn_output_detail, 'a')
+            csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, repeat,
+                      median_set_idx, sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
+                      dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
+                      dis_k_gi2gm])
+            f_detail.close()
+            
+            # compute result summary.
+            sod_sm_list.append(sod_sm)
+            sod_gm_list.append(sod_gm)
+            dis_k_sm_list.append(dis_k_sm)
+            dis_k_gm_list.append(dis_k_gm)
+            dis_k_gi_min_list.append(dis_k_gi_min)
+            # # SOD SM -> GM
+            if sod_sm > sod_gm:
+                nb_sod_sm2gm[0] += 1
+                repeats_better_sod_sm2gm.append(repeat)
+            elif sod_sm == sod_gm:
+                nb_sod_sm2gm[1] += 1
+            elif sod_sm < sod_gm:
+                nb_sod_sm2gm[2] += 1
+            # # dis_k SM -> GM
+            if dis_k_sm > dis_k_gm:
+                nb_dis_k_sm2gm[0] += 1
+                repeats_better_dis_k_sm2gm.append(repeat)
+            elif dis_k_sm == dis_k_gm:
+                nb_dis_k_sm2gm[1] += 1
+            elif dis_k_sm < dis_k_gm:
+                nb_dis_k_sm2gm[2] += 1
+            # # dis_k gi -> SM
+            if dis_k_gi_min > dis_k_sm:
+                nb_dis_k_gi2sm[0] += 1
+                repeats_better_dis_k_gi2sm.append(repeat)
+            elif dis_k_gi_min == dis_k_sm:
+                nb_dis_k_gi2sm[1] += 1
+            elif dis_k_gi_min < dis_k_sm:
+                nb_dis_k_gi2sm[2] += 1
+            # # dis_k gi -> GM
+            if dis_k_gi_min > dis_k_gm:
+                nb_dis_k_gi2gm[0] += 1
+                repeats_better_dis_k_gi2gm.append(repeat)
+            elif dis_k_gi_min == dis_k_gm:
+                nb_dis_k_gi2gm[1] += 1
+            elif dis_k_gi_min < dis_k_gm:
+                nb_dis_k_gi2gm[2] += 1
+            
+        # write result summary. 
+        sod_sm_mean = np.mean(sod_sm_list)
+        sod_gm_mean = np.mean(sod_gm_list)
+        dis_k_sm_mean = np.mean(dis_k_sm_list)
+        dis_k_gm_mean = np.mean(dis_k_gm_list)
+        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
+        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
+        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
+        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
+        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
+        f_summary = open(dir_output + fn_output_summary, 'a')
+        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 
+                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
+                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                  dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
+                  nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
+                  repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
+                  repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
+        f_summary.close()
+        
+    print('\ncomplete.')
+    return
+
+
+def getRelations(sign):
+    if sign == -1:
+        return 'better'
+    elif sign == 0:
+        return 'same'
+    elif sign == 1:
+        return 'worse'
+
+
+if __name__ == '__main__':
+    find_best_k()
\ No newline at end of file
diff --git a/gklearn/preimage/fitDistance.py b/gklearn/preimage/fitDistance.py
new file mode 100644
index 0000000..234f7fc
--- /dev/null
+++ b/gklearn/preimage/fitDistance.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Oct 16 14:20:06 2019
+
+@author: ljia
+"""
+import numpy as np
+from tqdm import tqdm
+from itertools import combinations_with_replacement, combinations
+import multiprocessing
+from multiprocessing import Pool
+from functools import partial
+import time
+import random
+import sys
+
+from scipy import optimize
+from scipy.optimize import minimize
+import cvxpy as cp
+
+from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
+from gklearn.preimage.utils import kernel_distance_matrix
+
+def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
+                               params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 
+                                           'method': 'IPFP', 'stabilizer': None},
+                               init_costs=[3, 3, 1, 3, 3, 1],
+                               dataset='monoterpenoides', Kmatrix=None,
+                               parallel=True):
+#    dataset = dataset.lower()
+    
+    # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
+#    random.seed(1)
+#    cost_rdm = random.sample(range(1, 10), 6)
+#    init_costs = cost_rdm + [0]
+#    init_costs = cost_rdm
+#    init_costs = [3, 3, 1, 3, 3, 1]
+#    init_costs = [i * 0.01 for i in cost_rdm] + [0]
+#    init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
+#    init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
+#    init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
+#    idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
+    
+    # compute distances in feature space.
+    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+                                                Kmatrix=Kmatrix, gkernel=gkernel)
+    dis_k_vec = []
+    for i in range(len(dis_k_mat)):
+#        for j in range(i, len(dis_k_mat)):
+        for j in range(i + 1, len(dis_k_mat)):
+            dis_k_vec.append(dis_k_mat[i, j])
+    dis_k_vec = np.array(dis_k_vec)
+    
+    # init ged.
+    print('\ninitial:')
+    time0 = time.time()
+    params_ged['dataset'] = dataset
+    params_ged['edit_cost_constant'] = init_costs
+    ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
+                                                            parallel=parallel)
+    residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]    
+    time_list = [time.time() - time0]
+    edit_cost_list = [init_costs]  
+    nb_cost_mat = np.array(n_edit_operations)
+    nb_cost_mat_list = [nb_cost_mat]
+    print('edit_costs:', init_costs)
+    print('residual_list:', residual_list)
+    
+    for itr in range(itr_max):
+        print('\niteration', itr)
+        time0 = time.time()
+        # "fit" geds to distances in feature space by tuning edit costs using the
+        # Least Squares Method.
+        np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm', 
+                 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
+                 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
+                 ged_mat=ged_mat)
+        edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, 
+                                                dataset=dataset, cost=params_ged['cost'])
+        for i in range(len(edit_costs_new)):
+            if -1e-9 <= edit_costs_new[i] <= 1e-9:
+                edit_costs_new[i] = 0
+            if edit_costs_new[i] < 0:
+                raise ValueError('The edit cost is negative.')
+#        for i in range(len(edit_costs_new)):
+#            if edit_costs_new[i] < 0:
+#                edit_costs_new[i] = 0
+
+        # compute new GEDs and numbers of edit operations.
+        params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
+        ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
+                                                           parallel=parallel)
+        residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
+        time_list.append(time.time() - time0)
+        edit_cost_list.append(edit_costs_new)
+        nb_cost_mat = np.array(n_edit_operations)
+        nb_cost_mat_list.append(nb_cost_mat)                        
+        print('edit_costs:', edit_costs_new)
+        print('residual_list:', residual_list)
+    
+    return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
+        time_list, nb_cost_mat_list
+
+
+def compute_geds(Gn, params_ged, parallel=False):
+    edit_cost_name = params_ged['cost']
+    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
+        get_nb_eo = get_nb_edit_operations_letter
+    elif edit_cost_name == 'NON_SYMBOLIC':
+        get_nb_eo = get_nb_edit_operations_nonsymbolic
+    else: 
+        get_nb_eo = get_nb_edit_operations
+    ged_mat = np.zeros((len(Gn), len(Gn)))
+    if parallel:
+#        print('parallel')
+#        len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
+        len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
+        ged_vec = [0 for i in range(len_itr)]
+        n_edit_operations = [0 for i in range(len_itr)]
+#        itr = combinations_with_replacement(range(0, len(Gn)), 2)
+        itr = combinations(range(0, len(Gn)), 2)
+        n_jobs = multiprocessing.cpu_count()
+        if len_itr < 100 * n_jobs:
+            chunksize = int(len_itr / n_jobs) + 1
+        else:
+            chunksize = 100
+        def init_worker(gn_toshare):
+            global G_gn
+            G_gn = gn_toshare
+        do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
+        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
+        iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
+                        desc='computing GEDs', file=sys.stdout)
+#        iterator = pool.imap_unordered(do_partial, itr, chunksize)
+        for i, j, dis, n_eo_tmp in iterator:
+            idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
+            ged_vec[idx_itr] = dis
+            ged_mat[i][j] = dis
+            ged_mat[j][i] = dis
+            n_edit_operations[idx_itr] = n_eo_tmp
+#            print('\n-------------------------------------------')
+#            print(i, j, idx_itr, dis)
+        pool.close()
+        pool.join()
+        
+    else:
+        ged_vec = []
+        n_edit_operations = []
+        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
+#        for i in range(len(Gn)):
+            for j in range(i + 1, len(Gn)):
+                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
+                ged_vec.append(dis)
+                ged_mat[i][j] = dis
+                ged_mat[j][i] = dis
+                n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
+                n_edit_operations.append(n_eo_tmp)
+                    
+    return ged_vec, ged_mat, n_edit_operations
+                    
+
+def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
+    i = itr[0]
+    j = itr[1]
+    dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
+    return i, j, dis, n_eo_tmp
+
+
+def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
+    dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
+    n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
+    return dis, n_eo_tmp
+
+
+def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', 
+                 cost='CONSTANT', rw_constraints='inequality'):
+#    if dataset == 'Letter-high':
+    if cost == 'LETTER':            
+        pass
+#        # method 1: set alpha automatically, just tune c_vir and c_eir by 
+#        # LMS using cvxpy.
+#        alpha = 0.5
+#        coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
+##        if np.count_nonzero(nb_cost_mat[:,4]) == 0:
+##            alpha = 0.75
+##        else:
+##            alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
+##        alpha = alpha * 0.99
+#        param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
+#        param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
+#        nb_cost_mat_new = np.column_stack((param_vir, param_eir))
+#        dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
+#        
+#        x = cp.Variable(nb_cost_mat_new.shape[1])
+#        cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
+#        constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
+#        prob = cp.Problem(cp.Minimize(cost), constraints)
+#        prob.solve()
+#        edit_costs_new = x.value
+#        edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
+#        residual = np.sqrt(prob.value)
+    
+#        # method 2: tune c_vir, c_eir and alpha by nonlinear programming by 
+#        # scipy.optimize.minimize.
+#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
+#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
+#        w2 = nb_cost_mat[:,3]
+#        w3 = dis_k_vec
+#        func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
+#                             + w2 * x[2] - w3 * x[3]) ** 2)
+#        bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
+#        res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
+#        edit_costs_new = res.x[0:3]
+#        residual = res.fun
+    
+    # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
+    
+    
+#        # method 4: tune c_vir, c_eir and alpha by QP function
+#        # scipy.optimize.least_squares. An initial guess is required.
+#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
+#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
+#        w2 = nb_cost_mat[:,3]
+#        w3 = dis_k_vec
+#        func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
+#                             + w2 * x[2] - w3 * x[3]) ** 2
+#        res = optimize.root(func, [0.9, 1.7, 0.75, 100])
+#        edit_costs_new = res.x
+#        residual = None
+    elif cost == 'LETTER2':
+#            # 1. if c_vi != c_vr, c_ei != c_er.
+#            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+#            x = cp.Variable(nb_cost_mat_new.shape[1])
+#            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+##            # 1.1 no constraints.
+##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
+#            # 1.2 c_vs <= c_vi + c_vr.
+#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
+#                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]            
+##            # 2. if c_vi == c_vr, c_ei == c_er.
+##            nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
+##            nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
+##            nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
+##            x = cp.Variable(nb_cost_mat_new.shape[1])
+##            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+##            # 2.1 no constraints.
+##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
+###            # 2.2 c_vs <= c_vi + c_vr.
+###            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
+###                           np.array([2.0, -1.0, 0.0]).T@x >= 0.0]     
+#            
+#            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+#            prob.solve()
+#            edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
+#            edit_costs_new = np.array(edit_costs_new)
+#            residual = np.sqrt(prob.value)
+        if rw_constraints == 'inequality':
+            # c_vs <= c_vi + c_vr.
+            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+            x = cp.Variable(nb_cost_mat_new.shape[1])
+            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+            constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
+                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+            try:
+                prob.solve(verbose=True)
+            except MemoryError as error0:
+                print('\nUsing solver "OSQP" caused a memory error.')
+                print('the original error message is\n', error0)
+                print('solver status: ', prob.status)
+                print('trying solver "CVXOPT" instead...\n')
+                try:
+                    prob.solve(solver=cp.CVXOPT, verbose=True)
+                except Exception as error1:
+                    print('\nAn error occured when using solver "CVXOPT".')
+                    print('the original error message is\n', error1)
+                    print('solver status: ', prob.status)
+                    print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
+                    prob.solve(solver=cp.MOSEK, verbose=True)
+                else:
+                    print('solver status: ', prob.status)                    
+            else:
+                print('solver status: ', prob.status)
+            print()
+            edit_costs_new = x.value
+            residual = np.sqrt(prob.value)
+        elif rw_constraints == '2constraints':
+            # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
+            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+            x = cp.Variable(nb_cost_mat_new.shape[1])
+            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
+                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
+                           np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
+                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
+            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+            prob.solve()
+            edit_costs_new = x.value
+            residual = np.sqrt(prob.value)
+        elif rw_constraints == 'no-constraint':
+            # no constraint.
+            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+            x = cp.Variable(nb_cost_mat_new.shape[1])
+            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
+            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+            prob.solve()
+            edit_costs_new = x.value
+            residual = np.sqrt(prob.value)
+#            elif method == 'inequality_modified':
+#                # c_vs <= c_vi + c_vr.
+#                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+#                x = cp.Variable(nb_cost_mat_new.shape[1])
+#                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+#                constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
+#                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+#                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+#                prob.solve()
+#                # use same costs for insertion and removal rather than the fitted costs.
+#                edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
+#                edit_costs_new = np.array(edit_costs_new)
+#                residual = np.sqrt(prob.value)
+    elif cost == 'NON_SYMBOLIC':
+        is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
+        is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
+        
+        if dataset == 'SYNTHETICnew':
+#            nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
+            nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
+            x = cp.Variable(nb_cost_mat_new.shape[1])
+            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
+#                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
+#            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
+            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
+                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
+            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+            prob.solve()
+#            print(x.value)
+            edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, 
+                                             np.array([0.0])))
+            residual = np.sqrt(prob.value)
+            
+        elif rw_constraints == 'inequality':
+            # c_vs <= c_vi + c_vr.
+            if is_n_attr and is_e_attr:
+                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
+                x = cp.Variable(nb_cost_mat_new.shape[1])
+                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
+                               np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
+                               np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
+                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+                prob.solve()
+                edit_costs_new = x.value
+                residual = np.sqrt(prob.value)
+            elif is_n_attr and not is_e_attr:
+                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
+                x = cp.Variable(nb_cost_mat_new.shape[1])
+                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+                constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
+                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+                prob.solve()
+                print(x.value)
+                edit_costs_new = np.concatenate((x.value, np.array([0.0])))
+                residual = np.sqrt(prob.value)
+            elif not is_n_attr and is_e_attr:
+                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
+                x = cp.Variable(nb_cost_mat_new.shape[1])
+                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
+                               np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
+                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+                prob.solve()
+                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
+                residual = np.sqrt(prob.value)
+            else:
+                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
+                x = cp.Variable(nb_cost_mat_new.shape[1])
+                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
+                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+                prob.solve()
+                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
+                                                 x.value[2:], np.array([0.0])))
+                residual = np.sqrt(prob.value)
+    else:
+#    # method 1: simple least square method.
+#    edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
+#                                                     rcond=None)
+    
+#    # method 2: least square method with x_i >= 0.
+#    edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
+    
+    # method 3: solve as a quadratic program with constraints.
+#    P = np.dot(nb_cost_mat.T, nb_cost_mat)
+#    q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
+#    G = -1 * np.identity(nb_cost_mat.shape[1])
+#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
+#    A = np.array([1 for i in range(nb_cost_mat.shape[1])])
+#    b = 1
+#    x = cp.Variable(nb_cost_mat.shape[1])
+#    prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
+#                      [G@x <= h])
+#    prob.solve()
+#    edit_costs_new = x.value
+#    residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
+    
+#    G = -1 * np.identity(nb_cost_mat.shape[1])
+#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
+        x = cp.Variable(nb_cost_mat.shape[1])
+        cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
+        constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
+    #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+                       np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
+                       np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
+        prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+        prob.solve()
+        edit_costs_new = x.value
+        residual = np.sqrt(prob.value)
+    
+    # method 4: 
+    
+    return edit_costs_new, residual
+
+
+if __name__ == '__main__':
+    print('check test_fitDistance.py')
\ No newline at end of file
diff --git a/gklearn/preimage/ged.py b/gklearn/preimage/ged.py
new file mode 100644
index 0000000..a66baaf
--- /dev/null
+++ b/gklearn/preimage/ged.py
@@ -0,0 +1,467 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 17 18:44:59 2019
+
+@author: ljia
+"""
+import numpy as np
+import networkx as nx
+from tqdm import tqdm
+import sys
+import multiprocessing
+from multiprocessing import Pool
+from functools import partial
+
+#from gedlibpy_linlin import librariesImport, gedlibpy
+from gklearn.gedlib import librariesImport, gedlibpy
+
+def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', 
+        edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
+    """
+    Compute GED for 2 graphs.
+    """    
+    
+#    dataset = dataset.lower()
+    
+    if lib == 'gedlibpy':
+        gedlibpy.restart_env()
+        gedlibpy.add_nx_graph(convertGraph(g1, cost), "")
+        gedlibpy.add_nx_graph(convertGraph(g2, cost), "")
+
+        listID = gedlibpy.get_all_graph_ids()
+        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
+        gedlibpy.init()
+        gedlibpy.set_method(method, algo_options)
+        gedlibpy.init_method()
+
+        g = listID[0]
+        h = listID[1]
+        if stabilizer is None:
+            gedlibpy.run_method(g, h)
+            pi_forward = gedlibpy.get_forward_map(g, h)
+            pi_backward = gedlibpy.get_backward_map(g, h)
+            upper = gedlibpy.get_upper_bound(g, h)
+            lower = gedlibpy.get_lower_bound(g, h)        
+        elif stabilizer == 'mean':
+            # @todo: to be finished...
+            upper_list = [np.inf] * repeat
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
+                pi_forward = gedlibpy.get_forward_map(g, h)
+                pi_backward = gedlibpy.get_backward_map(g, h)
+                lower = gedlibpy.get_lower_bound(g, h)
+            upper = np.mean(upper_list)
+        elif stabilizer == 'median':
+            if repeat % 2 == 0:
+                repeat += 1
+            upper_list = [np.inf] * repeat
+            pi_forward_list = [0] * repeat
+            pi_backward_list = [0] * repeat
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
+                pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
+                pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
+                lower = gedlibpy.get_lower_bound(g, h)
+            upper = np.median(upper_list)
+            idx_median = upper_list.index(upper)
+            pi_forward = pi_forward_list[idx_median]
+            pi_backward = pi_backward_list[idx_median]
+        elif stabilizer == 'min':
+            upper = np.inf
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_tmp = gedlibpy.get_upper_bound(g, h)                
+                if upper_tmp < upper:
+                    upper = upper_tmp
+                    pi_forward = gedlibpy.get_forward_map(g, h)
+                    pi_backward = gedlibpy.get_backward_map(g, h)
+                    lower = gedlibpy.get_lower_bound(g, h)
+                if upper == 0:
+                    break
+        elif stabilizer == 'max':
+            upper = 0
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_tmp = gedlibpy.get_upper_bound(g, h)                
+                if upper_tmp > upper:
+                    upper = upper_tmp
+                    pi_forward = gedlibpy.get_forward_map(g, h)
+                    pi_backward = gedlibpy.get_backward_map(g, h)
+                    lower = gedlibpy.get_lower_bound(g, h)
+        elif stabilizer == 'gaussian':
+            pass
+                    
+        dis = upper
+        
+    elif lib == 'gedlib-bash':
+        import time
+        import random
+        import os
+        from gklearn.utils.graphfiles import saveDataset
+        
+        tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
+        if not os.path.exists(tmp_dir):
+            os.makedirs(tmp_dir)
+        fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
+        xparams = {'method': 'gedlib', 'graph_dir': fn_collection}
+        saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml', 
+                    filename=fn_collection, xparams=xparams)
+        
+        command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
+        command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
+        command += 'export LD_LIBRARY_PATH\n'
+        command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
+        command += './ged_for_python_bash monoterpenoides ' + fn_collection \
+                + ' \'' + algo_options + '\' '
+        for ec in edit_cost_constant:
+            command += str(ec) + ' '
+#        output = os.system(command)
+        stream = os.popen(command)
+        output = stream.readlines()
+#        print(output)
+        
+        dis = float(output[0].strip())
+        runtime = float(output[1].strip())
+        size_forward = int(output[2].strip())
+        pi_forward = [int(item.strip()) for item in output[3:3+size_forward]]
+        pi_backward = [int(item.strip()) for item in output[3+size_forward:]]
+
+#        print(dis)
+#        print(runtime)
+#        print(size_forward)
+#        print(pi_forward)
+#        print(pi_backward)
+                
+        
+    # make the map label correct (label remove map as np.inf)
+    nodes1 = [n for n in g1.nodes()]
+    nodes2 = [n for n in g2.nodes()]
+    nb1 = nx.number_of_nodes(g1)
+    nb2 = nx.number_of_nodes(g2)
+    pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
+    pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
+#        print(pi_forward)
+              
+        
+    return dis, pi_forward, pi_backward
+
+
+def convertGraph(G, cost):
+    """Convert a graph to the proper NetworkX format that can be
+    recognized by library gedlibpy.
+    """
+    G_new = nx.Graph()
+    if cost == 'LETTER' or cost == 'LETTER2':   
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
+                           y=str(attrs['attributes'][1]))
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2))
+    elif cost == 'NON_SYMBOLIC':
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd))
+            for a_name in G.graph['node_attrs']:
+                G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2))
+            for a_name in G.graph['edge_attrs']:
+                G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
+    else:
+        for nd, attrs in G.nodes(data=True):
+            G_new.add_node(str(nd), chem=attrs['atom'])
+        for nd1, nd2, attrs in G.edges(data=True):
+            G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+#                G_new.add_edge(str(nd1), str(nd2))
+        
+    return G_new
+
+
+def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
+        edit_cost_constant=[], stabilizer='min', repeat=50):
+    """
+    Compute GEDs for a group of graphs.
+    """
+    if lib == 'gedlibpy':
+        def convertGraph(G):
+            """Convert a graph to the proper NetworkX format that can be
+            recognized by library gedlibpy.
+            """
+            G_new = nx.Graph()
+            for nd, attrs in G.nodes(data=True):
+                G_new.add_node(str(nd), chem=attrs['atom'])
+            for nd1, nd2, attrs in G.edges(data=True):
+#                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+                G_new.add_edge(str(nd1), str(nd2))
+                
+            return G_new
+        
+        gedlibpy.restart_env()
+        gedlibpy.add_nx_graph(convertGraph(g1), "")
+        gedlibpy.add_nx_graph(convertGraph(g2), "")
+
+        listID = gedlibpy.get_all_graph_ids()
+        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
+        gedlibpy.init()
+        gedlibpy.set_method(method, "")
+        gedlibpy.init_method()
+
+        g = listID[0]
+        h = listID[1]
+        if stabilizer is None:
+            gedlibpy.run_method(g, h)
+            pi_forward = gedlibpy.get_forward_map(g, h)
+            pi_backward = gedlibpy.get_backward_map(g, h)
+            upper = gedlibpy.get_upper_bound(g, h)
+            lower = gedlibpy.get_lower_bound(g, h)        
+        elif stabilizer == 'min':
+            upper = np.inf
+            for itr in range(repeat):                
+                gedlibpy.run_method(g, h)                
+                upper_tmp = gedlibpy.get_upper_bound(g, h)                
+                if upper_tmp < upper:
+                    upper = upper_tmp
+                    pi_forward = gedlibpy.get_forward_map(g, h)
+                    pi_backward = gedlibpy.get_backward_map(g, h)
+                    lower = gedlibpy.get_lower_bound(g, h)
+                if upper == 0:
+                    break
+                    
+        dis = upper
+        
+        # make the map label correct (label remove map as np.inf)
+        nodes1 = [n for n in g1.nodes()]
+        nodes2 = [n for n in g2.nodes()]
+        nb1 = nx.number_of_nodes(g1)
+        nb2 = nx.number_of_nodes(g2)
+        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
+        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
+        
+    return dis, pi_forward, pi_backward
+
+
+def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', 
+               'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 
+               'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1',
+               'stabilizer': None}, parallel=False):
+    if parallel:
+        len_itr = int(len(Gn))
+        pi_forward_list = [[] for i in range(len_itr)]
+        dis_list = [0 for i in range(len_itr)]
+               
+        itr = range(0, len_itr)
+        n_jobs = multiprocessing.cpu_count()
+        if len_itr < 100 * n_jobs:
+            chunksize = int(len_itr / n_jobs) + 1
+        else:
+            chunksize = 100
+        def init_worker(gn_toshare, gn_median_toshare):
+            global G_gn, G_gn_median
+            G_gn = gn_toshare
+            G_gn_median = gn_median_toshare
+        do_partial = partial(_compute_ged_median, params_ged)
+        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
+        if verbose:
+            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
+                            desc='computing GEDs', file=sys.stdout)
+        else:
+            iterator = pool.imap_unordered(do_partial, itr, chunksize)
+        for i, dis_sum, pi_forward in iterator:
+            pi_forward_list[i] = pi_forward
+            dis_list[i] = dis_sum
+#            print('\n-------------------------------------------')
+#            print(i, j, idx_itr, dis)
+        pool.close()
+        pool.join()
+        
+    else:
+        dis_list = []
+        pi_forward_list = []
+        for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
+                           file=sys.stdout) if verbose else enumerate(Gn):
+            dis_sum = 0
+            pi_forward_list.append([])
+            for G_p in Gn_median:
+                dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
+                    **params_ged)
+                pi_forward_list[idx].append(pi_tmp_forward)
+                dis_sum += dis_tmp
+            dis_list.append(dis_sum)
+            
+    return dis_list, pi_forward_list
+
+
+def _compute_ged_median(params_ged, itr):
+#    print(itr)
+    dis_sum = 0
+    pi_forward = []
+    for G_p in G_gn_median:
+        dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p, 
+                    **params_ged)
+        pi_forward.append(pi_tmp_forward)
+        dis_sum += dis_tmp
+        
+    return itr, dis_sum, pi_forward
+
+
+def get_nb_edit_operations(g1, g2, forward_map, backward_map):
+    """Compute the number of each edit operations.
+    """
+    n_vi = 0
+    n_vr = 0
+    n_vs = 0
+    n_ei = 0
+    n_er = 0
+    n_es = 0
+    
+    nodes1 = [n for n in g1.nodes()]
+    for i, map_i in enumerate(forward_map):
+        if map_i == np.inf:
+            n_vr += 1
+        elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
+            n_vs += 1
+    for map_i in backward_map:
+        if map_i == np.inf:
+            n_vi += 1
+    
+#    idx_nodes1 = range(0, len(node1))
+    
+    edges1 = [e for e in g1.edges()]
+    nb_edges2_cnted = 0
+    for n1, n2 in edges1:
+        idx1 = nodes1.index(n1)
+        idx2 = nodes1.index(n2)
+        # one of the nodes is removed, thus the edge is removed.
+        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
+            n_er += 1
+        # corresponding edge is in g2.
+        elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
+            nb_edges2_cnted += 1
+            # edge labels are different.
+            if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
+                != g1.edges[(n1, n2)]['bond_type']:
+                    n_es += 1
+        elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
+            nb_edges2_cnted += 1
+            # edge labels are different.
+            if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
+                != g1.edges[(n1, n2)]['bond_type']:
+                    n_es += 1                
+        # corresponding nodes are in g2, however the edge is removed.
+        else:
+            n_er += 1
+    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
+    
+    return n_vi, n_vr, n_vs, n_ei, n_er, n_es
+
+
+def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
+    """Compute the number of each edit operations.
+    """
+    n_vi = 0
+    n_vr = 0
+    n_vs = 0
+    sod_vs = 0
+    n_ei = 0
+    n_er = 0
+    
+    nodes1 = [n for n in g1.nodes()]
+    for i, map_i in enumerate(forward_map):
+        if map_i == np.inf:
+            n_vr += 1
+        else:
+            n_vs += 1
+            diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
+            diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
+            sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
+    for map_i in backward_map:
+        if map_i == np.inf:
+            n_vi += 1
+    
+#    idx_nodes1 = range(0, len(node1))
+    
+    edges1 = [e for e in g1.edges()]
+    nb_edges2_cnted = 0
+    for n1, n2 in edges1:
+        idx1 = nodes1.index(n1)
+        idx2 = nodes1.index(n2)
+        # one of the nodes is removed, thus the edge is removed.
+        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
+            n_er += 1
+        # corresponding edge is in g2. Edge label is not considered.
+        elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
+            (forward_map[idx2], forward_map[idx1]) in g2.edges():
+                nb_edges2_cnted += 1
+        # corresponding nodes are in g2, however the edge is removed.
+        else:
+            n_er += 1
+    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
+    
+    return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
+
+
+def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
+    """Compute the number of each edit operations.
+    """
+    n_vi = 0
+    n_vr = 0
+    n_vs = 0
+    sod_vs = 0
+    n_ei = 0
+    n_er = 0
+    n_es = 0
+    sod_es = 0
+    
+    nodes1 = [n for n in g1.nodes()]
+    for i, map_i in enumerate(forward_map):
+        if map_i == np.inf:
+            n_vr += 1
+        else:
+            n_vs += 1
+            sum_squares = 0
+            for a_name in g1.graph['node_attrs']:
+                diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
+                sum_squares += np.square(diff)
+            sod_vs += np.sqrt(sum_squares)
+    for map_i in backward_map:
+        if map_i == np.inf:
+            n_vi += 1
+    
+#    idx_nodes1 = range(0, len(node1))
+    
+    edges1 = [e for e in g1.edges()]
+    for n1, n2 in edges1:
+        idx1 = nodes1.index(n1)
+        idx2 = nodes1.index(n2)
+        n1_g2 = forward_map[idx1]
+        n2_g2 = forward_map[idx2]
+        # one of the nodes is removed, thus the edge is removed.
+        if n1_g2 == np.inf or n2_g2 == np.inf:
+            n_er += 1
+        # corresponding edge is in g2.
+        elif (n1_g2, n2_g2) in g2.edges():
+            n_es += 1
+            sum_squares = 0
+            for a_name in g1.graph['edge_attrs']:
+                diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
+                sum_squares += np.square(diff)
+            sod_es += np.sqrt(sum_squares)
+        elif (n2_g2, n1_g2) in g2.edges():
+            n_es += 1
+            sum_squares = 0
+            for a_name in g1.graph['edge_attrs']:
+                diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
+                sum_squares += np.square(diff)
+            sod_es += np.sqrt(sum_squares)
+        # corresponding nodes are in g2, however the edge is removed.
+        else:
+            n_er += 1
+    n_ei = nx.number_of_edges(g2) - n_es
+        
+    return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
+
+
+if __name__ == '__main__':
+    print('check test_ged.py')
\ No newline at end of file
diff --git a/gklearn/preimage/iam.py b/gklearn/preimage/iam.py
new file mode 100644
index 0000000..f3e2165
--- /dev/null
+++ b/gklearn/preimage/iam.py
@@ -0,0 +1,775 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr 26 11:49:12 2019
+
+Iterative alternate minimizations using GED.
+@author: ljia
+"""
+import numpy as np
+import random
+import networkx as nx
+from tqdm import tqdm
+
+from gklearn.utils.graphdataset import get_dataset_attributes
+from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
+from gklearn.preimage.ged import GED, ged_median
+
+
+def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
+        epsilon=0.001, node_label='atom', edge_label='bond_type', 
+        connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
+        allBestEdges=False, allBestOutput=False,
+        params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
+                    'edit_cost_constant': [], 'stabilizer': None, 
+                    'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}):
+    """See my name, then you know what I do.
+    """
+#    Gn_median = Gn_median[0:10]
+#    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
+    node_ir = np.inf # corresponding to the node remove and insertion.
+    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
+    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
+                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
+                                      edge_label=edge_label)
+    node_label_set = get_node_labels(Gn_median, node_label)
+    edge_label_set = get_edge_labels(Gn_median, edge_label)
+
+    
+    def generate_graph(G, pi_p_forward):
+        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
+#        nx.draw_networkx(G)
+#        import matplotlib.pyplot as plt
+#        plt.show()
+#        print(pi_p_forward)
+                    
+        # update vertex labels.
+        # pre-compute h_i0 for each label.
+#        for label in get_node_labels(Gn, node_label):
+#            print(label)
+#        for nd in G.nodes(data=True):
+#            pass
+        if not ds_attrs['node_attr_dim']: # labels are symbolic
+            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
+                h_i0_list = []
+                label_list = []
+                for label in node_label_set:
+                    h_i0 = 0
+                    for idx, g in enumerate(Gn_median):
+                        pi_i = pi_p_forward[idx][ndi]
+                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
+                            h_i0 += 1
+                    h_i0_list.append(h_i0)
+                    label_list.append(label)
+                # case when the node is to be removed.
+                if removeNodes:
+                    h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
+                    for idx, g in enumerate(Gn_median):
+                        pi_i = pi_p_forward[idx][ndi]
+                        if pi_i == node_ir:
+                            h_i0_remove += 1
+                    h_i0_list.append(h_i0_remove)
+                    label_list.append(label_r)
+                # get the best labels.
+                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
+                if allBestNodes: # choose all best graphs.                    
+                    nlabel_best = [label_list[idx] for idx in idx_max]
+                    # generate "best" graphs with regard to "best" node labels.
+                    G_new_list_nd = []
+                    for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
+                        for nl in nlabel_best:
+                            g_tmp = g.copy()
+                            if nl == label_r:
+                                g_tmp.remove_node(nd)
+                            else:
+                                g_tmp.nodes[nd][node_label] = nl
+                            G_new_list_nd.append(g_tmp)
+    #                            nx.draw_networkx(g_tmp)
+    #                            import matplotlib.pyplot as plt
+    #                            plt.show()
+    #                            print(g_tmp.nodes(data=True))
+    #                            print(g_tmp.edges(data=True))
+                    G_new_list = [ggg.copy() for ggg in G_new_list_nd]
+                else: 
+                    # choose one of the best randomly.
+                    idx_rdm = random.randint(0, len(idx_max) - 1)
+                    best_label = label_list[idx_max[idx_rdm]]
+                    h_i0_max = h_i0_list[idx_max[idx_rdm]]
+
+                    g_new = G_new_list[0]
+                    if best_label == label_r:
+                        g_new.remove_node(nd) 
+                    else:
+                        g_new.nodes[nd][node_label] = best_label
+                    G_new_list = [g_new]
+        else: # labels are non-symbolic
+            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
+                Si_norm = 0
+                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
+                for idx, g in enumerate(Gn_median):
+                    pi_i = pi_p_forward[idx][ndi]
+                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
+                        Si_norm += 1
+                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
+                phi_i_bar /= Si_norm
+                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
+                
+#        for g in G_new_list:
+#            import matplotlib.pyplot as plt 
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+#            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+                                            
+        # update edge labels and adjacency matrix.
+        if ds_attrs['edge_labeled']:
+            G_new_list_edge = []
+            for g_new in G_new_list:
+                nd_list = [n for n in g_new.nodes()]
+                g_tmp_list = [g_new.copy()]
+                for nd1i in range(nx.number_of_nodes(g_new)): 
+                    nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
+                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
+                        nd2 = nd_list[nd2i]
+#                for nd1, nd2, _ in g_new.edges(data=True): 
+                        h_ij0_list = []
+                        label_list = []
+                        for label in edge_label_set:
+                            h_ij0 = 0
+                            for idx, g in enumerate(Gn_median):
+                                pi_i = pi_p_forward[idx][nd1i]
+                                pi_j = pi_p_forward[idx][nd2i]
+                                h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
+                                           g.has_edge(pi_i, pi_j) and 
+                                           g.edges[pi_i, pi_j][edge_label] == label)
+                                h_ij0 += h_ij0_p
+                            h_ij0_list.append(h_ij0)
+                            label_list.append(label)
+                        
+                        # get the best labels.
+                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
+                        if allBestEdges: # choose all best graphs.
+                            elabel_best = [label_list[idx] for idx in idx_max]
+                            h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
+                            # generate "best" graphs with regard to "best" node labels.
+                            G_new_list_ed = []
+                            for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
+                                for idxl, el in enumerate(elabel_best):
+                                    g_tmp_copy = g_tmp.copy()
+                                    # check whether a_ij is 0 or 1.
+                                    sij_norm = 0
+                                    for idx, g in enumerate(Gn_median):
+                                        pi_i = pi_p_forward[idx][nd1i]
+                                        pi_j = pi_p_forward[idx][nd2i]
+                                        if g.has_node(pi_i) and g.has_node(pi_j) and \
+                                            g.has_edge(pi_i, pi_j):
+                                           sij_norm += 1
+                                    if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
+                                        sij_norm * (1 - (c_er + c_ei) / c_es):
+                                        if not g_tmp_copy.has_edge(nd1, nd2):
+                                            g_tmp_copy.add_edge(nd1, nd2)
+                                        g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
+                                    else:
+                                        if g_tmp_copy.has_edge(nd1, nd2):
+                                            g_tmp_copy.remove_edge(nd1, nd2)
+                                    G_new_list_ed.append(g_tmp_copy)
+                            g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
+                        else: # choose one of the best randomly.
+                            idx_rdm = random.randint(0, len(idx_max) - 1)
+                            best_label = label_list[idx_max[idx_rdm]]
+                            h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
+                                   
+                            # check whether a_ij is 0 or 1.
+                            sij_norm = 0
+                            for idx, g in enumerate(Gn_median):
+                                pi_i = pi_p_forward[idx][nd1i]
+                                pi_j = pi_p_forward[idx][nd2i]
+                                if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                                   sij_norm += 1
+                            if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
+                                if not g_new.has_edge(nd1, nd2):
+                                    g_new.add_edge(nd1, nd2)
+                                g_new.edges[nd1, nd2][edge_label] = best_label
+                            else:
+#                            elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
+                                if g_new.has_edge(nd1, nd2):
+                                    g_new.remove_edge(nd1, nd2) 
+                            g_tmp_list = [g_new]
+                G_new_list_edge += g_tmp_list
+            G_new_list = [ggg.copy() for ggg in G_new_list_edge]    
+                    
+               
+        else: # if edges are unlabeled
+            # @todo: is this even right? G or g_tmp? check if the new one is right
+            # @todo: works only for undirected graphs.
+            
+            for g_tmp in G_new_list:
+                nd_list = [n for n in g_tmp.nodes()]
+                for nd1i in range(nx.number_of_nodes(g_tmp)):
+                    nd1 = nd_list[nd1i]
+                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
+                        nd2 = nd_list[nd2i]
+                        sij_norm = 0
+                        for idx, g in enumerate(Gn_median):
+                            pi_i = pi_p_forward[idx][nd1i]
+                            pi_j = pi_p_forward[idx][nd2i]
+                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                               sij_norm += 1
+                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
+                            # @todo: should we consider if nd1 and nd2 in g_tmp?
+                            # or just add the edge anyway?
+                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
+                                and not g_tmp.has_edge(nd1, nd2):
+                                g_tmp.add_edge(nd1, nd2)
+                        else: # @todo: which to use?
+#                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
+                            if g_tmp.has_edge(nd1, nd2):
+                                g_tmp.remove_edge(nd1, nd2)
+                        # do not change anything when equal.     
+                        
+#        for i, g in enumerate(G_new_list):
+#            import matplotlib.pyplot as plt 
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
+#            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+        
+#        # find the best graph generated in this iteration and update pi_p.
+        # @todo: should we update all graphs generated or just the best ones?
+        dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
+            params_ged=params_ged)
+        # @todo: should we remove the identical and connectivity check? 
+        # Don't know which is faster.
+        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
+            G_new_list, idx_list = remove_duplicates(G_new_list)
+            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
+            dis_list = [dis_list[idx] for idx in idx_list]
+#        if connected == True:
+#            G_new_list, idx_list = remove_disconnected(G_new_list)
+#            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
+#        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
+#        dis_min = dis_list[idx_min_tmp_list[0]]
+#        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
+#        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
+        
+#        for g in G_new_list:
+#            import matplotlib.pyplot as plt 
+#            nx.draw_networkx(g)
+#            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+        
+        return G_new_list, pi_forward_list, dis_list
+    
+    
+    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
+        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
+        dis_min = dis_all[idx_min_list[0]]
+        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
+        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
+        return G_min_list, pi_forward_min_list, dis_min
+    
+    
+    def iteration_proc(G, pi_p_forward, cur_sod):
+        G_list = [G]
+        pi_forward_list = [pi_p_forward]
+        old_sod = cur_sod * 2
+        sod_list = [cur_sod]
+        dis_list = [cur_sod]
+        # iterations.
+        itr = 0
+        # @todo: what if difference == 0?
+#        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
+#                                 np.abs(old_sod - cur_sod) == 0):
+        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
+#        while itr < ite_max:
+#        for itr in range(0, 5): # the convergence condition?
+            print('itr_iam is', itr)
+            G_new_list = []
+            pi_forward_new_list = []
+            dis_new_list = []
+            for idx, g in enumerate(G_list):
+#                label_set = get_node_labels(Gn_median + [g], node_label)                        
+                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
+                    g, pi_forward_list[idx])
+                G_new_list += G_tmp_list
+                pi_forward_new_list += pi_forward_tmp_list
+                dis_new_list += dis_tmp_list
+            # @todo: need to remove duplicates here?
+            G_list = [ggg.copy() for ggg in G_new_list]
+            pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
+            dis_list = dis_new_list[:]
+            
+            old_sod = cur_sod
+            cur_sod = np.min(dis_list)
+            sod_list.append(cur_sod)
+            
+            itr += 1
+        
+        # @todo: do we return all graphs or the best ones?
+        # get the best ones of the generated graphs.
+        G_list, pi_forward_list, dis_min = best_median_graphs(
+            G_list, pi_forward_list, dis_list)
+        
+        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
+            G_list, idx_list = remove_duplicates(G_list)
+            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
+#            dis_list = [dis_list[idx] for idx in idx_list]
+            
+#        import matplotlib.pyplot as plt
+#        for g in G_list:             
+#            nx.draw_networkx(g)
+#            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+            
+        print('\nsods:', sod_list, '\n')
+            
+        return G_list, pi_forward_list, dis_min, sod_list
+    
+    
+    def remove_duplicates(Gn):
+        """Remove duplicate graphs from list.
+        """
+        Gn_new = []
+        idx_list = []
+        for idx, g in enumerate(Gn):
+            dupl = False
+            for g_new in Gn_new:
+                if graph_isIdentical(g_new, g):
+                    dupl = True
+                    break
+            if not dupl:
+                Gn_new.append(g)
+                idx_list.append(idx)
+        return Gn_new, idx_list
+    
+    
+    def remove_disconnected(Gn):
+        """Remove disconnected graphs from list.
+        """
+        Gn_new = []
+        idx_list = []
+        for idx, g in enumerate(Gn):
+            if nx.is_connected(g):
+                Gn_new.append(g)
+                idx_list.append(idx)
+        return Gn_new, idx_list
+
+    
+    ###########################################################################
+    
+    # phase 1: initilize.
+    # compute set-median.
+    dis_min = np.inf
+    dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
+        params_ged=params_ged, parallel=True)
+    print('finish computing GEDs.')
+    # find all smallest distances.
+    if allBestInit: # try all best init graphs.
+        idx_min_list = range(len(dis_list))
+        dis_min = dis_list
+    else:
+        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
+        dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
+        idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
+        idx_min_list = [idx_min_list[idx_min_rdm]]
+    sod_set_median = np.min(dis_min)
+        
+    
+    # phase 2: iteration.
+    G_list = []
+    dis_list = []
+    pi_forward_list = []
+    G_set_median_list = []
+#    sod_list = []
+    for idx_tmp, idx_min in enumerate(idx_min_list):
+#        print('idx_min is', idx_min)
+        G = Gn_candidate[idx_min].copy()
+        G_set_median_list.append(G.copy())
+        # list of edit operations.        
+        pi_p_forward = pi_forward_all[idx_min]
+#        pi_p_backward = pi_all_backward[idx_min]        
+        Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G, 
+                                                pi_p_forward, dis_min[idx_tmp])            
+        G_list += Gi_list
+        dis_list += [dis_i_min] * len(Gi_list)
+        pi_forward_list += pi_i_forward_list
+        
+        
+    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
+        G_list, idx_list = remove_duplicates(G_list)
+        dis_list = [dis_list[idx] for idx in idx_list]
+        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
+    if connected == True:
+        G_list_con, idx_list = remove_disconnected(G_list)
+        # if there is no connected graphs at all, then remain the disconnected ones.
+        if len(G_list_con) > 0: # @todo: ??????????????????????????
+            G_list = G_list_con
+            dis_list = [dis_list[idx] for idx in idx_list]
+            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
+
+#    import matplotlib.pyplot as plt 
+#    for g in G_list:
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+    
+    # get the best median graphs
+    G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
+            G_list, pi_forward_list, dis_list)
+#    for g in G_gen_median_list:
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+    
+    if not allBestOutput:
+        # randomly choose one graph.
+        idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
+        G_gen_median_list = [G_gen_median_list[idx_rdm]]
+    
+    return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
+
+
+def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
+             dataset='monoterpenoides',
+             graph_dir=''):
+    """Compute the iam by c++ implementation (gedlib) through bash.
+    """
+    import os
+    import time
+
+    def createCollectionFile(Gn_names, y, filename):
+        """Create collection file.
+        """
+        dirname_ds = os.path.dirname(filename)
+        if dirname_ds != '':
+            dirname_ds += '/'
+            if not os.path.exists(dirname_ds) :
+                os.makedirs(dirname_ds)
+                
+        with open(filename + '.xml', 'w') as fgroup:
+            fgroup.write("<?xml version=\"1.0\"?>")
+            fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
+            fgroup.write("\n<GraphCollection>")
+            for idx, fname in enumerate(Gn_names):
+                fgroup.write("\n\t<graph file=\"" + fname + "\" class=\"" + str(y[idx]) + "\"/>")
+            fgroup.write("\n</GraphCollection>")
+            fgroup.close()
+
+    tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
+    fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
+    createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
+#    fn_collection = tmp_dir + 'collection_for_debug'
+#    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
+    
+#    if dataset == 'Letter-high' or dataset == 'Fingerprint':
+#        dataset = 'letter'
+    command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
+    command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
+    command += 'export LD_LIBRARY_PATH\n'
+    command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
+    command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
+            + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
+    if edit_cost_constant is None:
+        command += 'None'
+    else:
+        for ec in edit_cost_constant:
+            command += str(ec) + ' '
+#        output = os.system(command)
+    stream = os.popen(command)
+
+    output = stream.readlines()    
+#    print(output)
+    sod_sm = float(output[0].strip())
+    sod_gm = float(output[1].strip())
+    
+    fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
+    fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
+    
+    return sod_sm, sod_gm, fname_sm, fname_gm
+
+
+
+###############################################################################
+# Old implementations.
+    
+def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', 
+        connected=True):
+    """See my name, then you know what I do.
+    """
+#    Gn = Gn[0:10]
+    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
+    
+    # phase 1: initilize.
+    # compute set-median.
+    dis_min = np.inf
+    pi_p = []
+    pi_all = []
+    for idx1, G_p in enumerate(Gn):
+        dist_sum = 0
+        pi_all.append([])
+        for idx2, G_p_prime in enumerate(Gn):
+            dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
+            pi_all[idx1].append(pi_tmp)
+            dist_sum += dist_tmp
+        if dist_sum < dis_min:
+            dis_min = dist_sum
+            G = G_p.copy()
+            idx_min = idx1
+    # list of edit operations.        
+    pi_p = pi_all[idx_min]
+            
+    # phase 2: iteration.
+    ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'], 
+                                      edge_label=edge_label)
+    for itr in range(0, 10): # @todo: the convergence condition?
+        G_new = G.copy()
+        # update vertex labels.
+        # pre-compute h_i0 for each label.
+#        for label in get_node_labels(Gn, node_label):
+#            print(label)
+#        for nd in G.nodes(data=True):
+#            pass
+        if not ds_attrs['node_attr_dim']: # labels are symbolic
+            for nd, _ in G.nodes(data=True):
+                h_i0_list = []
+                label_list = []
+                for label in get_node_labels(Gn, node_label):
+                    h_i0 = 0
+                    for idx, g in enumerate(Gn):
+                        pi_i = pi_p[idx][nd]
+                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
+                            h_i0 += 1
+                    h_i0_list.append(h_i0)
+                    label_list.append(label)
+                # choose one of the best randomly.
+                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
+                idx_rdm = random.randint(0, len(idx_max) - 1)
+                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
+        else: # labels are non-symbolic
+            for nd, _ in G.nodes(data=True):
+                Si_norm = 0
+                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
+                for idx, g in enumerate(Gn):
+                    pi_i = pi_p[idx][nd]
+                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
+                        Si_norm += 1
+                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
+                phi_i_bar /= Si_norm
+                G_new.nodes[nd]['attributes'] = phi_i_bar
+                                            
+        # update edge labels and adjacency matrix.
+        if ds_attrs['edge_labeled']:
+            for nd1, nd2, _ in G.edges(data=True):
+                h_ij0_list = []
+                label_list = []
+                for label in get_edge_labels(Gn, edge_label):
+                    h_ij0 = 0
+                    for idx, g in enumerate(Gn):
+                        pi_i = pi_p[idx][nd1]
+                        pi_j = pi_p[idx][nd2]
+                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
+                                   g.has_edge(pi_i, pi_j) and 
+                                   g.edges[pi_i, pi_j][edge_label] == label)
+                        h_ij0 += h_ij0_p
+                    h_ij0_list.append(h_ij0)
+                    label_list.append(label)
+                # choose one of the best randomly.
+                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
+                h_ij0_max = h_ij0_list[idx_max[0]]
+                idx_rdm = random.randint(0, len(idx_max) - 1)
+                best_label = label_list[idx_max[idx_rdm]]
+                       
+                # check whether a_ij is 0 or 1.
+                sij_norm = 0
+                for idx, g in enumerate(Gn):
+                    pi_i = pi_p[idx][nd1]
+                    pi_j = pi_p[idx][nd2]
+                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                       sij_norm += 1
+                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
+                    if not G_new.has_edge(nd1, nd2):
+                        G_new.add_edge(nd1, nd2)
+                    G_new.edges[nd1, nd2][edge_label] = best_label
+                else:
+                    if G_new.has_edge(nd1, nd2):
+                        G_new.remove_edge(nd1, nd2)                
+        else: # if edges are unlabeled
+            for nd1, nd2, _ in G.edges(data=True):
+                sij_norm = 0
+                for idx, g in enumerate(Gn):
+                    pi_i = pi_p[idx][nd1]
+                    pi_j = pi_p[idx][nd2]
+                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                       sij_norm += 1
+                if sij_norm > len(Gn) * c_er / (c_er + c_ei):
+                    if not G_new.has_edge(nd1, nd2):
+                        G_new.add_edge(nd1, nd2)
+                else:
+                    if G_new.has_edge(nd1, nd2):
+                        G_new.remove_edge(nd1, nd2)
+                        
+        G = G_new.copy()
+        
+        # update pi_p
+        pi_p = []
+        for idx1, G_p in enumerate(Gn):
+            dist_tmp, pi_tmp, _ = GED(G, G_p)
+            pi_p.append(pi_tmp)
+    
+    return G
+
+# --------------------------- These are tests --------------------------------#
+    
+def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, 
+                                      node_label='atom', edge_label='bond_type'):
+    """See my name, then you know what I do.
+    """
+#    Gn = Gn[0:10]
+    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
+    
+    # phase 1: initilize.
+    # compute set-median.
+    dis_min = np.inf
+#    pi_p = []
+    pi_all_forward = []
+    pi_all_backward = []
+    for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
+        dist_sum = 0
+        pi_all_forward.append([])
+        pi_all_backward.append([])
+        for idx2, G_p_prime in enumerate(Gn):
+            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
+            pi_all_forward[idx1].append(pi_tmp_forward)
+            pi_all_backward[idx1].append(pi_tmp_backward)
+            dist_sum += dist_tmp
+        if dist_sum <= dis_min:
+            dis_min = dist_sum
+            G = G_p.copy()
+            idx_min = idx1
+    # list of edit operations.        
+    pi_p_forward = pi_all_forward[idx_min]
+    pi_p_backward = pi_all_backward[idx_min]
+            
+    # phase 2: iteration.
+    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
+                                      edge_label=edge_label)
+    label_set = get_node_labels(Gn + [G], node_label)
+    for itr in range(0, 10): # @todo: the convergence condition?
+        G_new = G.copy()
+        # update vertex labels.
+        # pre-compute h_i0 for each label.
+#        for label in get_node_labels(Gn, node_label):
+#            print(label)
+#        for nd in G.nodes(data=True):
+#            pass
+        if not ds_attrs['node_attr_dim']: # labels are symbolic
+            for nd in G.nodes():
+                h_i0_list = []
+                label_list = []
+                for label in label_set:
+                    h_i0 = 0
+                    for idx, g in enumerate(Gn):
+                        pi_i = pi_p_forward[idx][nd]
+                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
+                            h_i0 += 1
+                    h_i0_list.append(h_i0)
+                    label_list.append(label)
+                # choose one of the best randomly.
+                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
+                idx_rdm = random.randint(0, len(idx_max) - 1)
+                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
+        else: # labels are non-symbolic
+            for nd in G.nodes():
+                Si_norm = 0
+                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
+                for idx, g in enumerate(Gn):
+                    pi_i = pi_p_forward[idx][nd]
+                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
+                        Si_norm += 1
+                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
+                phi_i_bar /= Si_norm
+                G_new.nodes[nd]['attributes'] = phi_i_bar
+                                            
+        # update edge labels and adjacency matrix.
+        if ds_attrs['edge_labeled']:
+            for nd1, nd2, _ in G.edges(data=True):
+                h_ij0_list = []
+                label_list = []
+                for label in get_edge_labels(Gn, edge_label):
+                    h_ij0 = 0
+                    for idx, g in enumerate(Gn):
+                        pi_i = pi_p_forward[idx][nd1]
+                        pi_j = pi_p_forward[idx][nd2]
+                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
+                                   g.has_edge(pi_i, pi_j) and 
+                                   g.edges[pi_i, pi_j][edge_label] == label)
+                        h_ij0 += h_ij0_p
+                    h_ij0_list.append(h_ij0)
+                    label_list.append(label)
+                # choose one of the best randomly.
+                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
+                h_ij0_max = h_ij0_list[idx_max[0]]
+                idx_rdm = random.randint(0, len(idx_max) - 1)
+                best_label = label_list[idx_max[idx_rdm]]
+                       
+                # check whether a_ij is 0 or 1.
+                sij_norm = 0
+                for idx, g in enumerate(Gn):
+                    pi_i = pi_p_forward[idx][nd1]
+                    pi_j = pi_p_forward[idx][nd2]
+                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                       sij_norm += 1
+                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
+                    if not G_new.has_edge(nd1, nd2):
+                        G_new.add_edge(nd1, nd2)
+                    G_new.edges[nd1, nd2][edge_label] = best_label
+                else:
+                    if G_new.has_edge(nd1, nd2):
+                        G_new.remove_edge(nd1, nd2)                
+        else: # if edges are unlabeled
+            # @todo: works only for undirected graphs.
+            for nd1 in range(nx.number_of_nodes(G)):
+                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
+                    sij_norm = 0
+                    for idx, g in enumerate(Gn):
+                        pi_i = pi_p_forward[idx][nd1]
+                        pi_j = pi_p_forward[idx][nd2]
+                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
+                           sij_norm += 1
+                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
+                        if not G_new.has_edge(nd1, nd2):
+                            G_new.add_edge(nd1, nd2)
+                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
+                        if G_new.has_edge(nd1, nd2):
+                            G_new.remove_edge(nd1, nd2)
+                    # do not change anything when equal.
+                        
+        G = G_new.copy()
+        
+        # update pi_p
+        pi_p_forward = []
+        for G_p in Gn:
+            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
+            pi_p_forward.append(pi_tmp_forward)
+    
+    return G
+
+
+###############################################################################
+
+if __name__ == '__main__':
+    from gklearn.utils.graphfiles import loadDataset
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
+          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
+#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+#          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
+#          'extra_params': {}}
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+
+    iam(Gn)
\ No newline at end of file
diff --git a/gklearn/preimage/knn.py b/gklearn/preimage/knn.py
new file mode 100644
index 0000000..c179287
--- /dev/null
+++ b/gklearn/preimage/knn.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jan 10 13:22:04 2020
+
+@author: ljia
+"""
+import numpy as np
+#import matplotlib.pyplot as plt
+from tqdm import tqdm
+import random
+#import csv
+from shutil import copyfile
+import os
+
+from gklearn.preimage.iam import iam_bash
+from gklearn.utils.graphfiles import loadDataset, loadGXL
+from gklearn.preimage.ged import GED
+from gklearn.preimage.utils import get_same_item_indices
+
+def test_knn():
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+#    gkernel = 'treeletkernel'
+#    node_label = 'atom'
+#    edge_label = 'bond_type'
+#    ds_name = 'mono'
+    dir_output = 'results/knn/'
+    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
+    
+    k_nn = 1
+    percent = 0.1
+    repeats = 50
+    edit_cost_constant = [3, 3, 1, 3, 3, 1]
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    sod_sm_list_list
+    for repeat in range(0, repeats):
+        print('\n---------------------------------')
+        print('repeat =', repeat)
+        accuracy_sm_list = []
+        accuracy_gm_list = []
+        sod_sm_list = []
+        sod_gm_list = []
+        
+        random.seed(repeat)
+        set_median_list = []
+        gen_median_list = []
+        train_y_set = []
+        for y, values in y_idx.items():
+            print('\ny =', y)
+            size_median_set = int(len(values) * percent)
+            median_set_idx = random.sample(values, size_median_set)
+            print('median set: ', median_set_idx)
+            
+            # compute set median and gen median using IAM (C++ through bash).
+    #        Gn_median = [Gn[idx] for idx in median_set_idx]
+            group_fnames = [Gn[g].graph['filename'] for g in median_set_idx]
+            sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
+                                                          graph_dir=graph_dir)
+            print('sod_sm, sod_gm:', sod_sm, sod_gm)
+            sod_sm_list.append(sod_sm)
+            sod_gm_list.append(sod_gm)
+            fname_sm_new = dir_output + 'medians/set_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+            copyfile(fname_sm, fname_sm_new)
+            fname_gm_new = dir_output + 'medians/gen_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+            copyfile(fname_gm, fname_gm_new)
+            set_median_list.append(loadGXL(fname_sm_new))
+            gen_median_list.append(loadGXL(fname_gm_new))
+            train_y_set.append(int(y))
+        
+        print(sod_sm, sod_gm)
+        
+        # do 1-nn.
+        test_y_set = [int(y) for y in y_all]
+        accuracy_sm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
+        accuracy_gm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
+        accuracy_sm_list.append(accuracy_sm)
+        accuracy_gm_list.append(accuracy_gm)
+        print('current accuracy sm and gm:', accuracy_sm, accuracy_gm)
+        
+    # output
+    accuracy_sm_mean = np.mean(accuracy_sm_list)
+    accuracy_gm_mean = np.mean(accuracy_gm_list)
+    print('\ntotal average accuracy sm and gm:', accuracy_sm_mean, accuracy_gm_mean)
+
+        
+def knn(train_set, train_y_set, test_set, test_y_set, k=1, distance='ged'):
+    if k == 1 and distance == 'ged':
+        algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+                    'algo_options': algo_options, 'stabilizer': None}
+        accuracy = 0
+        for idx_test, g_test in tqdm(enumerate(test_set), desc='computing 1-nn', 
+                                     file=sys.stdout):
+            dis = np.inf
+            for idx_train, g_train in enumerate(train_set):
+                dis_cur, _, _ = GED(g_test, g_train, **params_ged)
+                if dis_cur < dis:
+                    dis = dis_cur
+                    test_y_cur = train_y_set[idx_train]
+            if test_y_cur == test_y_set[idx_test]:
+                accuracy += 1
+        accuracy = accuracy / len(test_set)
+        
+    return accuracy
+
+    
+
+if __name__ == '__main__':
+    test_knn()
\ No newline at end of file
diff --git a/gklearn/preimage/libs.py b/gklearn/preimage/libs.py
new file mode 100644
index 0000000..76005c6
--- /dev/null
+++ b/gklearn/preimage/libs.py
@@ -0,0 +1,6 @@
+import sys
+import pathlib
+
+# insert gedlibpy library.
+sys.path.insert(0, "../../../")
+from gedlibpy import librariesImport, gedlibpy
diff --git a/gklearn/preimage/median.py b/gklearn/preimage/median.py
new file mode 100644
index 0000000..1c5bb0f
--- /dev/null
+++ b/gklearn/preimage/median.py
@@ -0,0 +1,218 @@
+import sys
+sys.path.insert(0, "../")
+#import pathlib
+import numpy as np
+import networkx as nx
+import time
+
+from gedlibpy import librariesImport, gedlibpy
+#import script
+sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
+import gklearn
+from gklearn.utils.graphfiles import loadDataset
+
+def replace_graph_in_env(script, graph, old_id, label='median'):
+    """
+    Replace a graph in script
+
+    If old_id is -1, add a new graph to the environnemt
+
+    """
+    if(old_id > -1):
+        script.PyClearGraph(old_id)
+    new_id = script.PyAddGraph(label)
+    for i in graph.nodes():
+        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
+    for e in graph.edges:
+        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
+    script.PyInitEnv()
+    script.PySetMethod("IPFP", "")
+    script.PyInitMethod()
+
+    return new_id
+    
+#Dessin median courrant
+def draw_Letter_graph(graph, savepath=''):
+    import numpy as np
+    import networkx as nx
+    import matplotlib.pyplot as plt
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['attributes'][0]),
+           float(graph.node[n]['attributes'][1])])
+    nx.draw_networkx(graph, pos)
+    if savepath != '':
+        plt.savefig(savepath + str(time.time()) + '.eps', format='eps', dpi=300)
+    plt.show()
+    plt.clf()
+    
+#compute new mappings
+def update_mappings(script,median_id,listID):
+    med_distances = {}
+    med_mappings = {}
+    sod = 0
+    for i in range(0,len(listID)):
+        script.PyRunMethod(median_id,listID[i])
+        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
+        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
+        sod += med_distances[i]
+    return med_distances, med_mappings, sod
+
+def calcul_Sij(all_mappings, all_graphs,i,j):
+    s_ij = 0
+    for k in range(0,len(all_mappings)):
+        cur_graph =  all_graphs[k]
+        cur_mapping = all_mappings[k]
+        size_graph = cur_graph.order()
+        if ((cur_mapping[i] < size_graph) and 
+            (cur_mapping[j] < size_graph) and 
+            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
+                s_ij += 1
+        
+    return s_ij
+
+# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
+#     from scipy.stats.mstats import gmean
+
+#     for i in median.nodes():
+#         for k in listIdSet:
+#             vectors = [] #np.zeros((len(listIdSet),2))
+#             if(k != median_id):
+#                 phi_i = mappings[k][i]
+#                 if(phi_i < dataset[k].order()):
+#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
+
+#         new_labels = gmean(vectors)
+#         median.node[i]['x'] = str(new_labels[0])
+#         median.node[i]['y'] = str(new_labels[1])
+#     return median
+
+def update_median_nodes(median,dataset,mappings):
+    #update node attributes
+    for i in median.nodes():
+        nb_sub=0
+        mean_label = {'x' : 0, 'y' : 0}
+        for k in range(0,len(mappings)):
+            phi_i = mappings[k][i]
+            if ( phi_i < dataset[k].order() ):
+                nb_sub += 1
+                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
+                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
+        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
+        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
+    return median
+
+def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
+#for letter high, ceir = 1.7, alpha = 0.75
+    size_dataset = len(dataset)
+    ratio_cei_cer = cer/(cei + cer)
+    threshold = size_dataset*ratio_cei_cer
+    order_graph_median = median.order()
+    for i in range(0,order_graph_median):
+        for j in range(i+1,order_graph_median):
+            s_ij = calcul_Sij(mappings,dataset,i,j)
+            if(s_ij > threshold):
+                median.add_edge(i,j)
+            else:
+                if(median.has_edge(i,j)):
+                    median.remove_edge(i,j)
+    return median
+
+
+
+def compute_median(script, listID, dataset,verbose=False):
+    """Compute a graph median of a dataset according to an environment
+
+    Parameters
+
+    script : An gedlib initialized environnement 
+    listID (list): a list of ID in script: encodes the dataset 
+    dataset (list): corresponding graphs in networkX format. We assume that graph
+    listID[i] corresponds to dataset[i]
+
+    Returns:
+    A networkX graph, which is the median, with corresponding sod
+    """
+    print(len(listID))
+    median_set_index, median_set_sod = compute_median_set(script, listID)
+    print(median_set_index)
+    print(median_set_sod)
+    sods = []
+    #Ajout median dans environnement
+    set_median = dataset[median_set_index].copy()
+    median = dataset[median_set_index].copy()
+    cur_med_id = replace_graph_in_env(script,median,-1)
+    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+    sods.append(cur_sod)
+    if(verbose):
+        print(cur_sod)
+    ite_max = 50
+    old_sod = cur_sod * 2
+    ite = 0
+    epsilon = 0.001
+
+    best_median 
+    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
+        median = update_median_nodes(median,dataset, med_mappings)
+        median = update_median_edges(dataset,med_mappings,median)
+
+        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
+        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+        
+        
+        sods.append(cur_sod)
+        if(verbose):
+            print(cur_sod)
+        ite += 1
+    return median, cur_sod, sods, set_median
+    
+    draw_Letter_graph(median)
+
+
+def compute_median_set(script,listID):
+    'Returns the id in listID corresponding to median set'
+    #Calcul median set
+    N=len(listID)
+    map_id_to_index = {}
+    map_index_to_id = {}
+    for i in range(0,len(listID)):
+        map_id_to_index[listID[i]] = i
+        map_index_to_id[i] = listID[i]
+        
+    distances = np.zeros((N,N))
+    for i in listID:
+        for j in listID:
+            script.PyRunMethod(i,j)
+            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
+
+    median_set_index = np.argmin(np.sum(distances,0))
+    sod = np.min(np.sum(distances,0))
+    
+    return median_set_index, sod
+
+if __name__ == "__main__":
+    #Chargement du dataset
+    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
+    script.PySetEditCost("LETTER")
+    script.PyInitEnv()
+    script.PySetMethod("IPFP", "")
+    script.PyInitMethod()
+
+    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
+
+    listID = script.PyGetAllGraphIds()
+    median, sod = compute_median(script,listID,dataset,verbose=True)
+    
+    print(sod)
+    draw_Letter_graph(median)
+
+
+#if __name__ == '__main__':
+#    # test draw_Letter_graph
+#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+#          'extra_params': {}} # node nsymb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    print(y_all)
+#    for g in Gn:
+#        draw_Letter_graph(g)
\ No newline at end of file
diff --git a/gklearn/preimage/median_benoit.py b/gklearn/preimage/median_benoit.py
new file mode 100644
index 0000000..6712196
--- /dev/null
+++ b/gklearn/preimage/median_benoit.py
@@ -0,0 +1,201 @@
+import sys
+import pathlib
+import numpy as np
+import networkx as nx
+
+import librariesImport
+import script
+sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
+import gklearn
+
+def replace_graph_in_env(script, graph, old_id, label='median'):
+    """
+    Replace a graph in script
+
+    If old_id is -1, add a new graph to the environnemt
+
+    """
+    if(old_id > -1):
+        script.PyClearGraph(old_id)
+    new_id = script.PyAddGraph(label)
+    for i in graph.nodes():
+        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
+    for e in graph.edges:
+        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
+    script.PyInitEnv()
+    script.PySetMethod("IPFP", "")
+    script.PyInitMethod()
+
+    return new_id
+    
+#Dessin median courrant
+def draw_Letter_graph(graph):
+    import numpy as np
+    import networkx as nx
+    import matplotlib.pyplot as plt
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
+    nx.draw_networkx(graph,pos)
+    plt.show()
+    
+#compute new mappings
+def update_mappings(script,median_id,listID):
+    med_distances = {}
+    med_mappings = {}
+    sod = 0
+    for i in range(0,len(listID)):
+        script.PyRunMethod(median_id,listID[i])
+        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
+        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
+        sod += med_distances[i]
+    return med_distances, med_mappings, sod
+
+def calcul_Sij(all_mappings, all_graphs,i,j):
+    s_ij = 0
+    for k in range(0,len(all_mappings)):
+        cur_graph =  all_graphs[k]
+        cur_mapping = all_mappings[k]
+        size_graph = cur_graph.order()
+        if ((cur_mapping[i] < size_graph) and 
+            (cur_mapping[j] < size_graph) and 
+            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
+                s_ij += 1
+        
+    return s_ij
+
+# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
+#     from scipy.stats.mstats import gmean
+
+#     for i in median.nodes():
+#         for k in listIdSet:
+#             vectors = [] #np.zeros((len(listIdSet),2))
+#             if(k != median_id):
+#                 phi_i = mappings[k][i]
+#                 if(phi_i < dataset[k].order()):
+#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
+
+#         new_labels = gmean(vectors)
+#         median.node[i]['x'] = str(new_labels[0])
+#         median.node[i]['y'] = str(new_labels[1])
+#     return median
+
+def update_median_nodes(median,dataset,mappings):
+    #update node attributes
+    for i in median.nodes():
+        nb_sub=0
+        mean_label = {'x' : 0, 'y' : 0}
+        for k in range(0,len(mappings)):
+            phi_i = mappings[k][i]
+            if ( phi_i < dataset[k].order() ):
+                nb_sub += 1
+                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
+                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
+        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
+        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
+    return median
+
+def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
+#for letter high, ceir = 1.7, alpha = 0.75
+    size_dataset = len(dataset)
+    ratio_cei_cer = cer/(cei + cer)
+    threshold = size_dataset*ratio_cei_cer
+    order_graph_median = median.order()
+    for i in range(0,order_graph_median):
+        for j in range(i+1,order_graph_median):
+            s_ij = calcul_Sij(mappings,dataset,i,j)
+            if(s_ij > threshold):
+                median.add_edge(i,j)
+            else:
+                if(median.has_edge(i,j)):
+                    median.remove_edge(i,j)
+    return median
+
+
+
+def compute_median(script, listID, dataset,verbose=False):
+    """Compute a graph median of a dataset according to an environment
+
+    Parameters
+
+    script : An gedlib initialized environnement 
+    listID (list): a list of ID in script: encodes the dataset 
+    dataset (list): corresponding graphs in networkX format. We assume that graph
+    listID[i] corresponds to dataset[i]
+
+    Returns:
+    A networkX graph, which is the median, with corresponding sod
+    """
+    print(len(listID))
+    median_set_index, median_set_sod = compute_median_set(script, listID)
+    print(median_set_index)
+    print(median_set_sod)
+    sods = []
+    #Ajout median dans environnement
+    set_median = dataset[median_set_index].copy()
+    median = dataset[median_set_index].copy()
+    cur_med_id = replace_graph_in_env(script,median,-1)
+    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+    sods.append(cur_sod)
+    if(verbose):
+        print(cur_sod)
+    ite_max = 50
+    old_sod = cur_sod * 2
+    ite = 0
+    epsilon = 0.001
+
+    best_median 
+    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
+        median = update_median_nodes(median,dataset, med_mappings)
+        median = update_median_edges(dataset,med_mappings,median)
+
+        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
+        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+        
+        
+        sods.append(cur_sod)
+        if(verbose):
+            print(cur_sod)
+        ite += 1
+    return median, cur_sod, sods, set_median
+    
+    draw_Letter_graph(median)
+
+
+def compute_median_set(script,listID):
+    'Returns the id in listID corresponding to median set'
+    #Calcul median set
+    N=len(listID)
+    map_id_to_index = {}
+    map_index_to_id = {}
+    for i in range(0,len(listID)):
+        map_id_to_index[listID[i]] = i
+        map_index_to_id[i] = listID[i]
+        
+    distances = np.zeros((N,N))
+    for i in listID:
+        for j in listID:
+            script.PyRunMethod(i,j)
+            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
+
+    median_set_index = np.argmin(np.sum(distances,0))
+    sod = np.min(np.sum(distances,0))
+    
+    return median_set_index, sod
+
+if __name__ == "__main__":
+    #Chargement du dataset
+    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
+    script.PySetEditCost("LETTER")
+    script.PyInitEnv()
+    script.PySetMethod("IPFP", "")
+    script.PyInitMethod()
+
+    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
+
+    listID = script.PyGetAllGraphIds()
+    median, sod = compute_median(script,listID,dataset,verbose=True)
+    
+    print(sod)
+    draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_graph_estimator.py b/gklearn/preimage/median_graph_estimator.py
new file mode 100644
index 0000000..b70cc61
--- /dev/null
+++ b/gklearn/preimage/median_graph_estimator.py
@@ -0,0 +1,826 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 16 18:04:55 2020
+
+@author: ljia
+"""
+import numpy as np
+from gklearn.preimage.common_types import AlgorithmState
+from gklearn.preimage import misc
+from gklearn.preimage.timer import Timer
+from gklearn.utils.utils import graph_isIdentical
+import time
+from tqdm import tqdm
+import sys
+import networkx as nx
+
+
+class MedianGraphEstimator(object):
+	
+	def __init__(self, ged_env, constant_node_costs):
+		"""Constructor.
+		
+		Parameters
+		----------
+		ged_env : gklearn.gedlib.gedlibpy.GEDEnv
+			Initialized GED environment. The edit costs must be set by the user.
+			
+		constant_node_costs : Boolean
+			Set to True if the node relabeling costs are constant.
+		"""
+		self.__ged_env = ged_env
+		self.__init_method = 'BRANCH_FAST'
+		self.__init_options = ''
+		self.__descent_method = 'BRANCH_FAST'
+		self.__descent_options = ''
+		self.__refine_method = 'IPFP'
+		self.__refine_options = ''
+		self.__constant_node_costs = constant_node_costs
+		self.__labeled_nodes = (ged_env.get_num_node_labels() > 1)
+		self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1))
+		self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1))
+		self.__labeled_edges = (ged_env.get_num_edge_labels() > 1)
+		self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1))
+		self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1))
+		self.__init_type = 'RANDOM'
+		self.__num_random_inits = 10
+		self.__desired_num_random_inits = 10
+		self.__use_real_randomness = True
+		self.__seed = 0
+		self.__refine = True
+		self.__time_limit_in_sec = 0
+		self.__epsilon = 0.0001
+		self.__max_itrs = 100
+		self.__max_itrs_without_update = 3
+		self.__num_inits_increase_order = 10
+		self.__init_type_increase_order = 'K-MEANS++'
+		self.__max_itrs_increase_order = 10
+		self.__print_to_stdout = 2
+		self.__median_id = np.inf # @todo: check
+		self.__median_node_id_prefix = '' # @todo: check
+		self.__node_maps_from_median = {}
+		self.__sum_of_distances = 0
+		self.__best_init_sum_of_distances = np.inf
+		self.__converged_sum_of_distances = np.inf
+		self.__runtime = None
+		self.__runtime_initialized = None
+		self.__runtime_converged = None
+		self.__itrs = [] # @todo: check: {} ?
+		self.__num_decrease_order = 0
+		self.__num_increase_order = 0
+		self.__num_converged_descents = 0
+		self.__state = AlgorithmState.TERMINATED
+		
+		if ged_env is None:
+			raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
+		elif not ged_env.is_initialized():
+			raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.')
+	
+	
+	def set_options(self, options):
+		"""Sets the options of the estimator.
+
+		Parameters
+		----------
+		options : string
+			String that specifies with which options to run the estimator.
+		"""
+		self.__set_default_options()
+		options_map = misc.options_string_to_options_map(options)
+		for opt_name, opt_val in options_map.items():
+			if opt_name == 'init-type':
+				self.__init_type = opt_val
+				if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN':
+					raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"')
+			elif opt_name == 'random-inits':
+				try:
+					self.__num_random_inits = int(opt_val)
+					self.__desired_num_random_inits = self.__num_random_inits
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+				if self.__num_random_inits <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+	
+			elif opt_name == 'randomness':
+				if opt_val == 'PSEUDO':
+					self.__use_real_randomness = False
+	
+				elif opt_val == 'REAL':
+					self.__use_real_randomness = True
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
+	
+			elif opt_name == 'stdout':
+				if opt_val == '0':
+					self.__print_to_stdout = 0
+	
+				elif opt_val == '1':
+					self.__print_to_stdout = 1
+	
+				elif opt_val == '2':
+					self.__print_to_stdout = 2
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
+	
+			elif opt_name == 'refine':
+				if opt_val == 'TRUE':
+					self.__refine = True
+	
+				elif opt_val == 'FALSE':
+					self.__refine = False
+	
+				else:
+					raise Exception('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
+	
+			elif opt_name == 'time-limit':
+				try:
+					self.__time_limit_in_sec = float(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
+	
+			elif opt_name == 'max-itrs':
+				try:
+					self.__max_itrs = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
+	
+			elif opt_name == 'max-itrs-without-update':
+				try:
+					self.__max_itrs_without_update = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
+	
+			elif opt_name == 'seed':
+				try:
+					self.__seed = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
+	
+			elif opt_name == 'epsilon':
+				try:
+					self.__epsilon = float(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+	
+				if self.__epsilon <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+	
+			elif opt_name == 'inits-increase-order':
+				try:
+					self.__num_inits_increase_order = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+	
+				if self.__num_inits_increase_order <= 0:
+					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+			elif opt_name == 'init-type-increase-order':
+				self.__init_type_increase_order = opt_val
+				if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
+					raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
+	
+			elif opt_name == 'max-itrs-increase-order':
+				try:
+					self.__max_itrs_increase_order = int(opt_val)
+	
+				except:
+					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
+
+			else:
+				valid_options = '[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] '
+				valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
+				valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
+				raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"')
+ 
+		
+	def set_init_method(self, init_method, init_options=''):
+		"""Selects method to be used for computing the initial medoid graph.
+		
+		Parameters
+		----------
+		init_method : string
+			The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM.
+		
+		init_options : string
+			The options for the selected method. Default: "".
+		
+		Notes
+		-----
+		Has no effect unless "--init-type MEDOID" is passed to set_options().
+		"""
+		self.__init_method = init_method;
+		self.__init_options = init_options;
+	
+	
+	def set_descent_method(self, descent_method, descent_options=''):
+		"""Selects method to be used for block gradient descent..
+		
+		Parameters
+		----------
+		descent_method : string
+			The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST.
+		
+		descent_options : string
+			The options for the selected method. Default: "".
+		
+		Notes
+		-----
+		Has no effect unless "--init-type MEDOID" is passed to set_options().
+		"""
+		self.__descent_method = descent_method;
+		self.__descent_options = descent_options;
+
+	
+	def set_refine_method(self, refine_method, refine_options):
+		"""Selects method to be used for improving the sum of distances and the node maps for the converged median.
+		
+		Parameters
+		----------
+		refine_method : string
+			The selected method. Default: "IPFP".
+			
+		refine_options : string 
+			The options for the selected method. Default: "".
+					
+		Notes
+		-----
+		Has no effect if "--refine FALSE" is passed to set_options().
+		"""
+		self.__refine_method = refine_method
+		self.__refine_options = refine_options
+
+	
+	def run(self, graph_ids, set_median_id, gen_median_id):
+		"""Computes a generalized median graph.
+		
+		Parameters
+		----------
+		graph_ids : list[integer]
+			The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor.
+		
+		set_median_id : integer
+			The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
+
+
+		gen_median_id : integer
+			The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
+		"""
+		# Sanity checks.
+		if len(graph_ids) == 0:
+			raise Exception('Empty vector of graph IDs, unable to compute median.')
+		all_graphs_empty = True
+		for graph_id in graph_ids:
+			if self.__ged_env.get_graph_num_nodes(graph_id) > 0:
+				self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0]
+				all_graphs_empty = False
+				break
+		if all_graphs_empty:
+			raise Exception('All graphs in the collection are empty.')
+			
+		# Start timer and record start time.
+		start = time.time()
+		timer = Timer(self.__time_limit_in_sec)
+		self.__median_id = gen_median_id
+		self.__state = AlgorithmState.TERMINATED
+		
+		# Get ExchangeGraph representations of the input graphs.
+		graphs = {}
+		for graph_id in graph_ids:
+			# @todo: get_nx_graph() function may need to be modified according to the coming code.
+			graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
+# 		print(self.__ged_env.get_graph_internal_id(0))
+# 		print(graphs[0].graph)
+# 		print(graphs[0].nodes(data=True))
+# 		print(graphs[0].edges(data=True))
+# 		print(nx.adjacency_matrix(graphs[0]))
+
+			
+		# Construct initial medians.
+		medians = []
+		self.__construct_initial_medians(graph_ids, timer, medians)
+		end_init = time.time()
+		self.__runtime_initialized = end_init - start
+# 		print(medians[0].graph)
+# 		print(medians[0].nodes(data=True))
+# 		print(medians[0].edges(data=True))
+# 		print(nx.adjacency_matrix(medians[0]))
+		
+		# Reset information about iterations and number of times the median decreases and increases.
+		self.__itrs = [0] * len(medians)
+		self.__num_decrease_order = 0
+		self.__num_increase_order = 0
+		self.__num_converged_descents = 0
+		
+		# Initialize the best median.
+		best_sum_of_distances = np.inf
+		self.__best_init_sum_of_distances = np.inf
+		node_maps_from_best_median = {}
+		
+		# Run block gradient descent from all initial medians.
+		self.__ged_env.set_method(self.__descent_method, self.__descent_options)
+		for median_pos in range(0, len(medians)):
+			
+			# Terminate if the timer has expired and at least one SOD has been computed.
+			if timer.expired() and median_pos > 0:
+				break
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				print('\n===========================================================')
+				print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
+				print('-----------------------------------------------------------')
+				
+			# Get reference to the median.
+			median = medians[median_pos]
+			
+			# Load initial median into the environment.
+			self.__ged_env.load_nx_graph(median, gen_median_id)
+			self.__ged_env.init(self.__ged_env.get_init_type())
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout)
+				
+			# Compute node maps and sum of distances for initial median.
+			self.__sum_of_distances = 0
+			self.__node_maps_from_median.clear() # @todo
+			for graph_id in graph_ids:
+				self.__ged_env.run_method(gen_median_id, graph_id)
+				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
+# 				print(self.__node_maps_from_median[graph_id])
+				self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
+# 				print(self.__sum_of_distances)
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					progress.update(1)
+					
+			self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
+			self.__ged_env.load_nx_graph(median, set_median_id)
+# 			print(self.__best_init_sum_of_distances)
+			
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				print('\n')
+				
+			# Run block gradient descent from initial median.
+			converged = False
+			itrs_without_update = 0
+			while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update):
+				
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('\n===========================================================')
+					print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
+					print('-----------------------------------------------------------')
+					
+				# Initialize flags that tell us what happened in the iteration.
+				median_modified = False
+				node_maps_modified = False
+				decreased_order = False
+				increased_order = False
+				
+				# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
+				median_modified = self.__update_median(graphs, median)
+				if not median_modified or self.__itrs[median_pos] == 0:
+					decreased_order = False
+					if not decreased_order or self.__itrs[median_pos] == 0:
+						increased_order = False
+						
+				# Update the number of iterations without update of the median.
+				if median_modified or decreased_order or increased_order:
+					itrs_without_update = 0
+				else:
+					itrs_without_update += 1
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Loading median to environment: ... ', end='')
+					
+				# Load the median into the environment.
+				# @todo: should this function use the original node label?
+				self.__ged_env.load_nx_graph(median, gen_median_id)
+				self.__ged_env.init(self.__ged_env.get_init_type())
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('done.')					
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Updating induced costs: ... ', end='')
+
+				# Compute induced costs of the old node maps w.r.t. the updated median.
+				for graph_id in graph_ids:
+# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
+					# @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+					self.__ged_env.compute_induced_cost(gen_median_id, graph_id)
+# 					print('---------------------------------------')
+# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('done.')					
+					
+				# Update the node maps.
+				node_maps_modified = self.__update_node_maps() # @todo
+
+				# Update the order of the median if no improvement can be found with the current order.
+				
+				# Update the sum of distances.
+				old_sum_of_distances = self.__sum_of_distances
+				self.__sum_of_distances = 0
+				for graph_id in self.__node_maps_from_median:
+					self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above.
+					
+				# Print information about current iteration.
+				if self.__print_to_stdout == 2:
+					print('Old local SOD: ', old_sum_of_distances)
+					print('New local SOD: ', self.__sum_of_distances)
+					print('Best converged SOD: ', best_sum_of_distances)
+					print('Modified median: ', median_modified)
+					print('Modified node maps: ', node_maps_modified)
+					print('Decreased order: ', decreased_order)
+					print('Increased order: ', increased_order)
+					print('===========================================================\n')
+					
+				converged = not (median_modified or node_maps_modified or decreased_order or increased_order)
+				
+				self.__itrs[median_pos] += 1
+				
+			# Update the best median.
+			if self.__sum_of_distances < self.__best_init_sum_of_distances:
+				best_sum_of_distances = self.__sum_of_distances
+				node_maps_from_best_median = self.__node_maps_from_median
+				best_median = median
+				
+			# Update the number of converged descents.
+			if converged:
+				self.__num_converged_descents += 1
+				
+		# Store the best encountered median.
+		self.__sum_of_distances = best_sum_of_distances
+		self.__node_maps_from_median = node_maps_from_best_median
+		self.__ged_env.load_nx_graph(best_median, gen_median_id)
+		self.__ged_env.init(self.__ged_env.get_init_type())
+		end_descent = time.time()
+		self.__runtime_converged = end_descent - start
+		
+		# Refine the sum of distances and the node maps for the converged median.
+		self.__converged_sum_of_distances = self.__sum_of_distances
+		if self.__refine:
+			self.__improve_sum_of_distances(timer) # @todo
+		
+		# Record end time, set runtime and reset the number of initial medians.
+		end = time.time()
+		self.__runtime = end - start
+		self.__num_random_inits = self.__desired_num_random_inits
+		
+		# Print global information.
+		if self.__print_to_stdout != 0:
+			print('\n===========================================================')
+			print('Finished computation of generalized median graph.')
+			print('-----------------------------------------------------------')
+			print('Best SOD after initialization: ', self.__best_init_sum_of_distances)
+			print('Converged SOD: ', self.__converged_sum_of_distances)
+			if self.__refine:
+				print('Refined SOD: ', self.__sum_of_distances)
+			print('Overall runtime: ', self.__runtime)
+			print('Runtime of initialization: ', self.__runtime_initialized)
+			print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized)
+			if self.__refine:
+				print('Runtime of refinement: ', self.__runtime - self.__runtime_converged)
+			print('Number of initial medians: ', len(medians))
+			total_itr = 0
+			num_started_descents = 0
+			for itr in self.__itrs:
+				total_itr += itr
+				if itr > 0:
+					num_started_descents += 1
+			print('Size of graph collection: ', len(graph_ids))
+			print('Number of started descents: ', num_started_descents)
+			print('Number of converged descents: ', self.__num_converged_descents)
+			print('Overall number of iterations: ', total_itr)
+			print('Overall number of times the order decreased: ', self.__num_decrease_order)
+			print('Overall number of times the order increased: ', self.__num_increase_order)
+			print('===========================================================\n')
+	
+	
+	def get_sum_of_distances(self, state=''):
+		"""Returns the sum of distances.
+		
+		Parameters
+		----------
+		state : string
+			The state of the estimator. Can be 'initialized' or 'converged'. Default: ""
+			
+		Returns
+		-------
+		float
+			The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned.
+		"""
+		if not self.__median_available():
+			raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().')
+		if state == 'initialized':
+			return self.__best_init_sum_of_distances
+		if state == 'converged':
+			return self.__converged_sum_of_distances
+		return self.__sum_of_distances
+	
+	
+	def __set_default_options(self):
+		self.__init_type = 'RANDOM'
+		self.__num_random_inits = 10
+		self.__desired_num_random_inits = 10
+		self.__use_real_randomness = True
+		self.__seed = 0
+		self.__refine = True
+		self.__time_limit_in_sec = 0
+		self.__epsilon = 0.0001
+		self.__max_itrs = 100
+		self.__max_itrs_without_update = 3
+		self.__num_inits_increase_order = 10
+		self.__init_type_increase_order = 'K-MEANS++'
+		self.__max_itrs_increase_order = 10
+		self.__print_to_stdout = 2
+		
+		
+	def __construct_initial_medians(self, graph_ids, timer, initial_medians):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n===========================================================')
+			print('Constructing initial median(s).')
+			print('-----------------------------------------------------------')
+			
+		# Compute or sample the initial median(s).
+		initial_medians.clear()
+		if self.__init_type == 'MEDOID':
+			self.__compute_medoid(graph_ids, timer, initial_medians)
+		elif self.__init_type == 'MAX':
+			pass # @todo
+# 			compute_max_order_graph_(graph_ids, initial_medians)
+		elif self.__init_type == 'MIN':
+			pass # @todo
+# 			compute_min_order_graph_(graph_ids, initial_medians)
+		elif self.__init_type == 'MEAN':
+			pass # @todo
+# 			compute_mean_order_graph_(graph_ids, initial_medians)
+		else:
+			pass # @todo
+# 			sample_initial_medians_(graph_ids, initial_medians)
+
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('===========================================================')
+			
+			
+	def __compute_medoid(self, graph_ids, timer, initial_medians):
+		# Use method selected for initialization phase.
+		self.__ged_env.set_method(self.__init_method, self.__init_options)
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout)
+			
+		# Compute the medoid.
+		medoid_id = graph_ids[0]
+		best_sum_of_distances = np.inf
+		for g_id in graph_ids:
+			if timer.expired():
+				self.__state = AlgorithmState.CALLED
+				break
+			sum_of_distances = 0
+			for h_id in graph_ids:
+				self.__ged_env.run_method(g_id, h_id)
+				sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
+			if sum_of_distances < best_sum_of_distances:
+				best_sum_of_distances = sum_of_distances
+				medoid_id = g_id
+				
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress.update(1)
+		initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n')
+			
+		
+	def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
+		if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
+			if self.__state == AlgorithmState.TERMINATED:
+				self.__state = AlgorithmState.INITIALIZED
+			return True
+		return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
+	
+	
+	def __update_median(self, graphs, median):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('Updating median: ', end='')
+			
+		# Store copy of the old median.
+		old_median = median.copy() # @todo: this is just a shallow copy.
+		
+		# Update the node labels.
+		if self.__labeled_nodes:
+			self.__update_node_labels(graphs, median)
+			
+		# Update the edges and their labels.
+		self.__update_edges(graphs, median)
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('done.')
+			
+		return not self.__are_graphs_equal(median, old_median)
+		
+		
+	def __update_node_labels(self, graphs, median):
+		
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('nodes ... ', end='')
+			
+		# Iterate through all nodes of the median.
+		for i in range(0, nx.number_of_nodes(median)):
+# 			print('i: ', i)
+			# Collect the labels of the substituted nodes.
+			node_labels = []
+			for graph_id, graph in graphs.items():
+# 				print('graph_id: ', graph_id)
+# 				print(self.__node_maps_from_median[graph_id])
+				k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
+# 				print('k: ', k)
+				if k != np.inf:
+					node_labels.append(graph.nodes[k])
+					
+			# Compute the median label and update the median.
+			if len(node_labels) > 0:
+				median_label = self.__ged_env.get_median_node_label(node_labels)
+				if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
+					nx.set_node_attributes(median, {i: median_label})
+					
+					
+	def __update_edges(self, graphs, median):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('edges ... ', end='')
+			
+		# Clear the adjacency lists of the median and reset number of edges to 0.
+		median_edges = list(median.edges)		
+		for (head, tail) in median_edges:
+			median.remove_edge(head, tail)
+		
+		# @todo: what if edge is not labeled?
+		# Iterate through all possible edges (i,j) of the median.
+		for i in range(0, nx.number_of_nodes(median)):
+			for j in range(i + 1, nx.number_of_nodes(median)):
+				
+				# Collect the labels of the edges to which (i,j) is mapped by the node maps.
+				edge_labels = []
+				for graph_id, graph in graphs.items():
+					k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
+					l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
+					if k != np.inf and l != np.inf:
+						if graph.has_edge(k, l):
+							edge_labels.append(graph.edges[(k, l)])
+							
+				# Compute the median edge label and the overall edge relabeling cost.
+				rel_cost = 0
+				median_label = self.__ged_env.get_edge_label(1)
+				if median.has_edge(i, j):
+					median_label = median.edges[(i, j)]
+				if self.__labeled_edges and len(edge_labels) > 0:
+					new_median_label = self.__ged_env.median_edge_label(edge_labels)
+					if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
+						median_label = new_median_label
+					for edge_label in edge_labels:
+						rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
+						
+				# Update the median.
+				if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
+					median.add_edge(i, j, **median_label)
+				else:
+					if median.has_edge(i, j):
+						median.remove_edge(i, j)
+
+
+	def __update_node_maps(self):
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
+			
+		# Update the node maps.
+		node_maps_were_modified = False
+		for graph_id in self.__node_maps_from_median:
+			self.__ged_env.run_method(self.__median_id, graph_id)
+			if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above.
+				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned.
+				node_maps_were_modified = True
+			# Print information about current iteration.
+			if self.__print_to_stdout == 2:
+				progress.update(1)
+			
+		# Print information about current iteration.
+		if self.__print_to_stdout == 2:
+			print('\n')
+			
+		# Return true if the node maps were modified.
+		return node_maps_were_modified
+	
+	
+	def __improve_sum_of_distances(self, timer):
+		pass
+	
+	
+	def __median_available(self):
+		return self.__median_id != np.inf
+		
+				
+	def __get_node_image_from_map(self, node_map, node):
+		"""
+		Return ID of the node mapping of `node` in `node_map`.
+
+		Parameters
+		----------
+		node_map : list[tuple(int, int)]
+			List of node maps where the mapping node is found.
+		
+		node : int
+			The mapping node of this node is returned
+
+		Raises
+		------
+		Exception
+			If the node with ID `node` is not contained in the source nodes of the node map.
+
+		Returns
+		-------
+		int
+			ID of the mapping of `node`.
+			
+		Notes
+		-----
+		This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
+		"""
+		if node < len(node_map):
+			return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
+		else:
+ 			raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
+		return np.inf
+				
+	
+	def __are_graphs_equal(self, g1, g2):
+		"""
+		Check if the two graphs are equal.
+
+		Parameters
+		----------
+		g1 : NetworkX graph object
+			Graph 1 to be compared.
+		
+		g2 : NetworkX graph object
+			Graph 2 to be compared.
+
+		Returns
+		-------
+		bool
+			True if the two graph are equal.
+			
+		Notes
+		-----
+		This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere.
+		"""
+		# check original node ids.
+		if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']:
+			return False
+		# check nodes.
+		nlist1 = [n for n in g1.nodes(data=True)]
+		nlist2 = [n for n in g2.nodes(data=True)]
+		if not nlist1 == nlist2:
+			return False
+		# check edges.
+		elist1 = [n for n in g1.edges(data=True)]
+		elist2 = [n for n in g2.edges(data=True)]
+		if not elist1 == elist2:
+			return False
+
+		return True
+	
+	
+	def compute_my_cost(g, h, node_map):
+		cost = 0.0
+		for node in g.nodes:
+			cost += 0
+		
\ No newline at end of file
diff --git a/gklearn/preimage/median_linlin.py b/gklearn/preimage/median_linlin.py
new file mode 100644
index 0000000..6139558
--- /dev/null
+++ b/gklearn/preimage/median_linlin.py
@@ -0,0 +1,215 @@
+import sys
+import pathlib
+import numpy as np
+import networkx as nx
+
+from gedlibpy import librariesImport, gedlibpy
+sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
+import gklearn
+
+def replace_graph_in_env(script, graph, old_id, label='median'):
+    """
+    Replace a graph in script
+
+    If old_id is -1, add a new graph to the environnemt
+
+    """
+    if(old_id > -1):
+        script.PyClearGraph(old_id)
+    new_id = script.PyAddGraph(label)
+    for i in graph.nodes():
+        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
+    for e in graph.edges:
+        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
+    script.PyInitEnv()
+    script.PySetMethod("IPFP", "")
+    script.PyInitMethod()
+
+    return new_id
+    
+#Dessin median courrant
+def draw_Letter_graph(graph):
+    import numpy as np
+    import networkx as nx
+    import matplotlib.pyplot as plt
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
+    nx.draw_networkx(graph,pos)
+    plt.show()
+    
+#compute new mappings
+def update_mappings(script,median_id,listID):
+    med_distances = {}
+    med_mappings = {}
+    sod = 0
+    for i in range(0,len(listID)):
+        script.PyRunMethod(median_id,listID[i])
+        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
+        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
+        sod += med_distances[i]
+    return med_distances, med_mappings, sod
+
+def calcul_Sij(all_mappings, all_graphs,i,j):
+    s_ij = 0
+    for k in range(0,len(all_mappings)):
+        cur_graph =  all_graphs[k]
+        cur_mapping = all_mappings[k]
+        size_graph = cur_graph.order()
+        if ((cur_mapping[i] < size_graph) and 
+            (cur_mapping[j] < size_graph) and 
+            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
+                s_ij += 1
+        
+    return s_ij
+
+# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
+#     from scipy.stats.mstats import gmean
+
+#     for i in median.nodes():
+#         for k in listIdSet:
+#             vectors = [] #np.zeros((len(listIdSet),2))
+#             if(k != median_id):
+#                 phi_i = mappings[k][i]
+#                 if(phi_i < dataset[k].order()):
+#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
+
+#         new_labels = gmean(vectors)
+#         median.node[i]['x'] = str(new_labels[0])
+#         median.node[i]['y'] = str(new_labels[1])
+#     return median
+
+def update_median_nodes(median,dataset,mappings):
+    #update node attributes
+    for i in median.nodes():
+        nb_sub=0
+        mean_label = {'x' : 0, 'y' : 0}
+        for k in range(0,len(mappings)):
+            phi_i = mappings[k][i]
+            if ( phi_i < dataset[k].order() ):
+                nb_sub += 1
+                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
+                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
+        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
+        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
+    return median
+
+def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
+#for letter high, ceir = 1.7, alpha = 0.75
+    size_dataset = len(dataset)
+    ratio_cei_cer = cer/(cei + cer)
+    threshold = size_dataset*ratio_cei_cer
+    order_graph_median = median.order()
+    for i in range(0,order_graph_median):
+        for j in range(i+1,order_graph_median):
+            s_ij = calcul_Sij(mappings,dataset,i,j)
+            if(s_ij > threshold):
+                median.add_edge(i,j)
+            else:
+                if(median.has_edge(i,j)):
+                    median.remove_edge(i,j)
+    return median
+
+
+
+def compute_median(script, listID, dataset,verbose=False):
+    """Compute a graph median of a dataset according to an environment
+
+    Parameters
+
+    script : An gedlib initialized environnement 
+    listID (list): a list of ID in script: encodes the dataset 
+    dataset (list): corresponding graphs in networkX format. We assume that graph
+    listID[i] corresponds to dataset[i]
+
+    Returns:
+    A networkX graph, which is the median, with corresponding sod
+    """
+    print(len(listID))
+    median_set_index, median_set_sod = compute_median_set(script, listID)
+    print(median_set_index)
+    print(median_set_sod)
+    sods = []
+    #Ajout median dans environnement
+    set_median = dataset[median_set_index].copy()
+    median = dataset[median_set_index].copy()
+    cur_med_id = replace_graph_in_env(script,median,-1)
+    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+    sods.append(cur_sod)
+    if(verbose):
+        print(cur_sod)
+    ite_max = 50
+    old_sod = cur_sod * 2
+    ite = 0
+    epsilon = 0.001
+
+    best_median 
+    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
+        median = update_median_nodes(median,dataset, med_mappings)
+        median = update_median_edges(dataset,med_mappings,median)
+
+        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
+        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
+        
+        
+        sods.append(cur_sod)
+        if(verbose):
+            print(cur_sod)
+        ite += 1
+    return median, cur_sod, sods, set_median
+    
+    draw_Letter_graph(median)
+
+
+def compute_median_set(script,listID):
+    'Returns the id in listID corresponding to median set'
+    #Calcul median set
+    N=len(listID)
+    map_id_to_index = {}
+    map_index_to_id = {}
+    for i in range(0,len(listID)):
+        map_id_to_index[listID[i]] = i
+        map_index_to_id[i] = listID[i]
+        
+    distances = np.zeros((N,N))
+    for i in listID:
+        for j in listID:
+            script.PyRunMethod(i,j)
+            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
+
+    median_set_index = np.argmin(np.sum(distances,0))
+    sod = np.min(np.sum(distances,0))
+    
+    return median_set_index, sod
+
+def _convertGraph(G):
+    """Convert a graph to the proper NetworkX format that can be
+    recognized by library gedlibpy.
+    """
+    G_new = nx.Graph()
+    for nd, attrs in G.nodes(data=True):
+        G_new.add_node(str(nd), chem=attrs['atom'])
+#                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
+#                               y=str(attrs['attributes'][1]))
+    for nd1, nd2, attrs in G.edges(data=True):
+        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+#                G_new.add_edge(str(nd1), str(nd2))
+        
+    return G_new
+
+if __name__ == "__main__":
+    #Chargement du dataset
+    gedlibpy.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
+    gedlibpy.PySetEditCost("LETTER")
+    gedlibpy.PyInitEnv()
+    gedlibpy.PySetMethod("IPFP", "")
+    gedlibpy.PyInitMethod()
+
+    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
+
+    listID = gedlibpy.PyGetAllGraphIds()
+    median, sod = compute_median(gedlibpy,listID,dataset,verbose=True)
+    
+    print(sod)
+    draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py
new file mode 100644
index 0000000..dfbaef2
--- /dev/null
+++ b/gklearn/preimage/median_preimage_generator.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 26 18:27:22 2020
+
+@author: ljia
+"""
+from gklearn.preimage.preimage_generator import PreimageGenerator
+# from gklearn.utils.dataset import Dataset
+
+class MedianPreimageGenerator(PreimageGenerator):
+	
+	def __init__(self, mge, dataset):
+		self.__mge = mge
+		self.__dataset = dataset
\ No newline at end of file
diff --git a/gklearn/preimage/misc.py b/gklearn/preimage/misc.py
new file mode 100644
index 0000000..18682c8
--- /dev/null
+++ b/gklearn/preimage/misc.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 19 18:13:56 2020
+
+@author: ljia
+"""
+
+def options_string_to_options_map(options_string):
+    """Transforms an options string into an options map.
+    
+    Parameters
+    ----------
+    options_string : string
+        Options string of the form "[--<option> <arg>] [...]".
+    
+    Return
+    ------
+    options_map : dict{string : string}
+        Map with one key-value pair (<option>, <arg>) for each option contained in the string.
+    """
+    if options_string == '':
+        return
+    options_map = {}
+    words = []
+    tokenize(options_string, ' ', words)
+    expect_option_name = True
+    for word in words:
+        if expect_option_name:
+            is_opt_name, word = is_option_name(word)
+            if is_opt_name:
+                option_name = word
+                if option_name in options_map:
+                    raise Exception('Multiple specification of option "' + option_name + '".')
+                options_map[option_name] = ''
+            else:
+                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
+        else:
+            is_opt_name, word = is_option_name(word)
+            if is_opt_name:
+                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
+            else:
+                options_map[option_name] = word
+        expect_option_name = not expect_option_name
+    return options_map
+    
+
+def tokenize(sentence, sep, words):
+    """Separates a sentence into words separated by sep (unless contained in single quotes).
+    
+    Parameters
+    ----------
+    sentence : string
+        The sentence that should be tokenized.
+        
+    sep : string 
+        The separator. Must be different from "'".
+        
+    words : list[string]
+        The obtained words.
+    """
+    outside_quotes = True
+    word_length = 0
+    pos_word_start = 0
+    for pos in range(0, len(sentence)):
+        if sentence[pos] == '\'':
+            if not outside_quotes and pos < len(sentence) - 1:
+                if sentence[pos + 1] != sep:
+                    raise Exception('Sentence contains closing single quote which is followed by a char different from ' + sep + '.')
+            word_length += 1
+            outside_quotes = not outside_quotes
+        elif outside_quotes and sentence[pos] == sep:
+            if word_length > 0:
+                words.append(sentence[pos_word_start:pos_word_start + word_length])
+            pos_word_start = pos + 1
+            word_length = 0
+        else:
+            word_length += 1
+    if not outside_quotes:
+        raise Exception('Sentence contains unbalanced single quotes.')
+    if word_length > 0:
+        words.append(sentence[pos_word_start:pos_word_start + word_length])
+
+
+def is_option_name(word):
+    """Checks whether a word is an option name and, if so, removes the leading dashes.
+    
+    Parameters
+    ----------
+    word : string
+        Word.
+        
+    return
+    ------
+    True if word is of the form "--<option>".
+    
+    word : string
+        The word without the leading dashes.
+    """
+    if word[0] == '\'':
+        word = word[1:len(word) - 2]
+        return False, word
+    if len(word) < 3:
+        return False, word
+    if word[0] == '-' and word[1] == '-' and word[2] != '-':
+        word = word[2:]
+        return True, word
+    return False, word
\ No newline at end of file
diff --git a/gklearn/preimage/pathfrequency.py b/gklearn/preimage/pathfrequency.py
new file mode 100644
index 0000000..3bca1bc
--- /dev/null
+++ b/gklearn/preimage/pathfrequency.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 20 10:12:15 2019
+
+inferring a graph grom path frequency.
+@author: ljia
+"""
+#import numpy as np
+import networkx as nx
+from scipy.spatial.distance import hamming
+import itertools
+
+def SISF(K, v):
+    if output:
+        return output
+    else:
+        return 'no solution'
+
+    
+def SISF_M(K, v):
+    return output
+
+
+def GIPF_tree(v_obj, K=1, alphabet=[0, 1]):
+    if K == 1:
+        n_graph = v_obj[0] + v_obj[1]
+        D_T, father_idx = getDynamicTable(n_graph, alphabet)
+        
+        # get the vector the closest to v_obj.
+        if v_obj not in D_T:
+            print('no exact solution')
+            dis_lim = 1 / len(v_obj) # the possible shortest distance.
+            dis_min = 1.0 # minimum proportional distance
+            v_min = v_obj
+            for vc in D_T:
+                if vc[0] + vc[1] == n_graph:
+#                    print(vc)
+                    dis = hamming(vc, v_obj)
+                    if dis < dis_min:
+                        dis_min = dis
+                        v_min = vc
+                    if dis_min <= dis_lim:
+                        break
+            v_obj = v_min
+            
+        # obtain required graph by traceback procedure.        
+        return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj
+    
+def GIPF_M(K, v):
+    return G
+
+
+def getDynamicTable(n_graph, alphabet=[0, 1]):
+    # init. When only one node exists.
+    D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, 
+           (0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,}
+    D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)]
+    father_idx = [-1, -1] # index of each vector's father
+    # add possible vectors.
+    for idx, v in enumerate(D_T):
+        if v[0] + v[1] < n_graph:
+            D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5]))
+            D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5]))
+            D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5]))
+            D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2))
+            father_idx += [idx, idx, idx, idx]
+    
+#    D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)])
+#    father_idx = itertools.chain([-1], [-1]) # index of each vector's father
+#    # add possible vectors.
+#    for idx, v in enumerate(D_T):
+#        if v[0] + v[1] < n_graph:
+#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])])
+#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])])
+#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])])
+#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)])
+#            father_idx = itertools.chain(father_idx, [idx, idx, idx, idx])
+    return D_T, father_idx
+
+
+def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]):
+    g_obj = nx.Graph()
+    
+    # do vector traceback.
+    v_tb = [list(v_obj)] # traceback vectors.
+    v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors.
+    while v_tb_idx[-1] > 1:
+        idx_pre = father_idx[v_tb_idx[-1]]
+        v_tb_idx.append(idx_pre)
+        v_tb.append(list(D_T[idx_pre]))
+    v_tb = v_tb[::-1] # reverse
+#    v_tb_idx = v_tb_idx[::-1]
+
+    # construct tree.
+    v_c = v_tb[0] # current vector.
+    if v_c[0] == 1:
+        g_obj.add_node(0, node_label=alphabet[0])
+    else:
+        g_obj.add_node(0, node_label=alphabet[1])
+    for vct in v_tb[1:]:
+        if vct[0] - v_c[0] == 1:
+            if vct[2] - v_c[2] == 2: # transfer 1
+                label1 = alphabet[0]
+                label2 = alphabet[0]
+            else: # transfer 2
+                label1 = alphabet[1]
+                label2 = alphabet[0]
+        else: 
+            if vct[3] - v_c[3] == 1: # transfer 3
+                label1 = alphabet[0]
+                label2 = alphabet[1]
+            else: # transfer 4
+                label1 = alphabet[1]
+                label2 = alphabet[1]
+        for nd, attr in g_obj.nodes(data=True):
+            if attr['node_label'] == label1:
+                nb_node = nx.number_of_nodes(g_obj)
+                g_obj.add_node(nb_node, node_label=label2)
+                g_obj.add_edge(nd, nb_node)
+                break
+        v_c = vct
+    return g_obj
+
+
+import random
+def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):
+
+    '''
+    From Joel's answer at https://stackoverflow.com/a/29597209/2966723.  
+    Licensed under Creative Commons Attribution-Share Alike 
+
+    If the graph is a tree this will return the positions to plot this in a 
+    hierarchical layout.
+
+    G: the graph (must be a tree)
+
+    root: the root node of current branch 
+    - if the tree is directed and this is not given, 
+      the root will be found and used
+    - if the tree is directed and this is given, then 
+      the positions will be just for the descendants of this node.
+    - if the tree is undirected and not given, 
+      then a random choice will be used.
+
+    width: horizontal space allocated for this branch - avoids overlap with other branches
+
+    vert_gap: gap between levels of hierarchy
+
+    vert_loc: vertical location of root
+
+    xcenter: horizontal location of root
+    '''
+    if not nx.is_tree(G):
+        raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')
+
+    if root is None:
+        if isinstance(G, nx.DiGraph):
+            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11
+        else:
+            root = random.choice(list(G.nodes))
+
+    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
+        '''
+        see hierarchy_pos docstring for most arguments
+
+        pos: a dict saying where all nodes go if they have been assigned
+        parent: parent of this branch. - only affects it if non-directed
+
+        '''
+
+        if pos is None:
+            pos = {root:(xcenter,vert_loc)}
+        else:
+            pos[root] = (xcenter, vert_loc)
+        children = list(G.neighbors(root))
+        if not isinstance(G, nx.DiGraph) and parent is not None:
+            children.remove(parent)  
+        if len(children)!=0:
+            dx = width/len(children) 
+            nextx = xcenter - width/2 - dx/2
+            for child in children:
+                nextx += dx
+                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, 
+                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,
+                                    pos=pos, parent = root)
+        return pos
+
+
+    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
+
+
+if __name__ == '__main__':
+    v_obj = (6, 4, 10, 3, 3, 2)
+#    v_obj = (6, 5, 10, 3, 3, 2)
+    tree_obj, v_obj = GIPF_tree(v_obj)
+    print('One closest vector is', v_obj)
+    # plot
+    pos = hierarchy_pos(tree_obj, 0) 
+    node_labels = nx.get_node_attributes(tree_obj, 'node_label')
+    nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True)
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_generator.py b/gklearn/preimage/preimage_generator.py
new file mode 100644
index 0000000..51fb43b
--- /dev/null
+++ b/gklearn/preimage/preimage_generator.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Mar 26 18:26:36 2020
+
+@author: ljia
+"""
+
+class PreimageGenerator(object):
+	
+	def __init__(self):
+		pass
diff --git a/gklearn/preimage/preimage_iam.py b/gklearn/preimage/preimage_iam.py
new file mode 100644
index 0000000..bf79d0e
--- /dev/null
+++ b/gklearn/preimage/preimage_iam.py
@@ -0,0 +1,705 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Apr 30 17:07:43 2019
+
+A graph pre-image method combining iterative pre-image method in reference [1] 
+and the iterative alternate minimizations (IAM) in reference [2].
+@author: ljia
+@references:
+    [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph 
+    pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
+    [2] Generalized median graph via iterative alternate minimization.
+"""
+import sys
+import numpy as np
+from tqdm import tqdm
+import networkx as nx
+import matplotlib.pyplot as plt
+import random
+
+from iam import iam_upgraded
+from utils import dis_gstar, compute_kernel
+
+
+def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
+                 gkernel, epsilon=0.001, InitIAMWithAllDk=False,
+                 params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
+                             'ite_max': 50, 'epsilon': 0.001, 
+                             'removeNodes': True, 'connected': False},
+                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
+                             'edit_cost_constant': [], 'stabilizer': 'min', 
+                             'repeat': 50}):
+    """This function constructs graph pre-image by the iterative pre-image 
+    framework in reference [1], algorithm 1, where the step of generating new 
+    graphs randomly is replaced by the IAM algorithm in reference [2].
+    
+    notes
+    -----
+    Every time a set of n better graphs is acquired, their distances in kernel space are
+    compared with the k nearest ones, and the k nearest distances from the k+n
+    distances will be used as the new ones.
+    """
+    # compute k nearest neighbors of phi in DN.
+    dis_all = [] # distance between g_star and each graph.
+    term3 = 0
+    for i1, a1 in enumerate(alpha):
+        for i2, a2 in enumerate(alpha):
+            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
+    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
+        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
+        dis_all.append(dtemp)
+        
+    # sort
+    sort_idx = np.argsort(dis_all)
+    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
+    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
+    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
+    if dis_k[0] == 0: # the exact pre-image.
+        print('The exact pre-image is found from the input dataset.')
+        return 0, ghat_list, 0, 0
+    dhat = dis_k[0] # the nearest distance
+#    for g in ghat_list:
+#        draw_Letter_graph(g)
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
+#    for gi in Gk:
+#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
+##        nx.draw_networkx(gi)
+#        plt.show()
+##        draw_Letter_graph(g)
+#        print(gi.nodes(data=True))
+#        print(gi.edges(data=True))
+    
+#    i = 1
+    r = 0
+    itr_total = 0
+    dis_of_each_itr = [dhat]
+    found = False
+    nb_updated = 0
+    nb_updated_k = 0
+    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
+        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
+        print('Current preimage iteration =', r)
+        print('Total preimage iteration =', itr_total, '\n')
+        found = False
+        
+        Gn_nearest_median = [g.copy() for g in Gk]
+        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
+            ghat_new_list = []
+            for g_tmp in Gk:
+                Gn_nearest_init = [g_tmp.copy()]
+                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
+                        Gn_nearest_init, params_ged=params_ged, **params_iam)
+                ghat_new_list += ghat_new_list_tmp
+        else: # only the best graph in D_k is used to initialize IAM.
+            Gn_nearest_init = [g.copy() for g in Gk]
+            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
+                    params_ged=params_ged, **params_iam)
+
+#        for g in g_tmp_list:
+#            nx.draw_networkx(g)
+#            plt.show()
+#            draw_Letter_graph(g)
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+            
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+        dhat_new_list = []
+        for idx, g_tmp in enumerate(ghat_new_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                                len(ghat_new_list) + len(Gn_median) + 1), 
+                                alpha, knew, withterm3=False))
+        
+        for idx_g, ghat_new in enumerate(ghat_new_list):          
+            dhat_new = dhat_new_list[idx_g]
+            
+            # if the new distance is smaller than the max of D_k.           
+            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
+                # check if the new distance is the same as one in D_k.
+                is_duplicate = False
+                for dis_tmp in dis_k[1:-1]:
+                    if np.abs(dhat_new - dis_tmp) < epsilon:
+                        is_duplicate = True
+                        print('IAM: duplicate k nearest graph generated.')
+                        break
+                if not is_duplicate:
+                    if np.abs(dhat_new - dhat) < epsilon:
+                        print('IAM: I am equal!')
+#                        dhat = dhat_new
+#                        ghat_list = [ghat_new.copy()]
+                    else:
+                        print('IAM: we got better k nearest neighbors!')
+                        nb_updated_k += 1
+                        print('the k nearest neighbors are updated', 
+                              nb_updated_k, 'times.')
+                        
+                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
+                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
+                        sort_idx = np.argsort(dis_k)
+                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
+                        if dhat_new < dhat:
+                            print('IAM: I have smaller distance!')
+                            print(str(dhat) + '->' + str(dhat_new))
+                            dhat = dhat_new
+                            ghat_list = [Gk[0].copy()]
+                            r = 0
+                            nb_updated += 1
+                        
+                            print('the graph is updated', nb_updated, 'times.')                       
+                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
+                                with_labels=True)
+                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
+                            plt.show()
+                        
+                        found = True
+        if not found:
+            r += 1            
+
+        dis_of_each_itr.append(dhat)
+        itr_total += 1
+        print('\nthe k shortest distances are', dis_k)
+        print('the shortest distances for previous iterations are', dis_of_each_itr)
+        
+    print('\n\nthe graph is updated', nb_updated, 'times.')
+    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
+    print('distances in kernel space:', dis_of_each_itr, '\n')
+    
+    return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
+
+
+
+
+def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
+                            l_max, gkernel, epsilon=0.001, 
+                            InitIAMWithAllDk=False, InitRandomWithAllDk=True,
+                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
+                                        'ite_max': 50, 'epsilon': 0.001, 
+                                        'removeNodes': True, 'connected': False},
+                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
+                                        'method': 'IPFP', 'edit_cost_constant': [], 
+                                        'stabilizer': 'min', 'repeat': 50}):
+    """This function constructs graph pre-image by the iterative pre-image 
+    framework in reference [1], algorithm 1, where new graphs are generated 
+    randomly and by the IAM algorithm in reference [2].
+    
+    notes
+    -----
+    Every time a set of n better graphs is acquired, their distances in kernel space are
+    compared with the k nearest ones, and the k nearest distances from the k+n
+    distances will be used as the new ones.
+    """
+    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
+    # compute k nearest neighbors of phi in DN.
+    dis_all = [] # distance between g_star and each graph.
+    term3 = 0
+    for i1, a1 in enumerate(alpha):
+        for i2, a2 in enumerate(alpha):
+            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
+    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
+        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
+        dis_all.append(dtemp)
+        
+    # sort
+    sort_idx = np.argsort(dis_all)
+    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
+    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
+    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
+    if dis_k[0] == 0: # the exact pre-image.
+        print('The exact pre-image is found from the input dataset.')
+        return 0, ghat_list, 0, 0
+    dhat = dis_k[0] # the nearest distance
+#    for g in ghat_list:
+#        draw_Letter_graph(g)
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
+#    for gi in Gk:
+#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
+##        nx.draw_networkx(gi)
+#        plt.show()
+##        draw_Letter_graph(g)
+#        print(gi.nodes(data=True))
+#        print(gi.edges(data=True))
+    
+    r = 0
+    itr_total = 0
+    dis_of_each_itr = [dhat]
+    nb_updated_iam = 0
+    nb_updated_k_iam = 0
+    nb_updated_random = 0
+    nb_updated_k_random = 0
+#    is_iam_duplicate = False
+    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
+        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
+        print('Current preimage iteration =', r)
+        print('Total preimage iteration =', itr_total, '\n')
+        found_iam = False
+
+        Gn_nearest_median = [g.copy() for g in Gk]
+        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
+            ghat_new_list = []
+            for g_tmp in Gk:
+                Gn_nearest_init = [g_tmp.copy()]
+                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
+                        Gn_nearest_init, params_ged=params_ged, **params_iam)
+                ghat_new_list += ghat_new_list_tmp
+        else: # only the best graph in D_k is used to initialize IAM.
+            Gn_nearest_init = [g.copy() for g in Gk]
+            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
+                    params_ged=params_ged, **params_iam)
+
+#        for g in g_tmp_list:
+#            nx.draw_networkx(g)
+#            plt.show()
+#            draw_Letter_graph(g)
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+            
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+        dhat_new_list = []
+        
+        for idx, g_tmp in enumerate(ghat_new_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                            len(ghat_new_list) + len(Gn_median) + 1), 
+                            alpha, knew, withterm3=False))
+                
+        # find the new k nearest graphs. 
+        for idx_g, ghat_new in enumerate(ghat_new_list):          
+            dhat_new = dhat_new_list[idx_g]
+            
+            # if the new distance is smaller than the max of D_k.           
+            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
+                # check if the new distance is the same as one in D_k.
+                is_duplicate = False
+                for dis_tmp in dis_k[1:-1]:
+                    if np.abs(dhat_new - dis_tmp) < epsilon:
+                        is_duplicate = True
+                        print('IAM: duplicate k nearest graph generated.')
+                        break
+                if not is_duplicate:
+                    if np.abs(dhat_new - dhat) < epsilon:
+                        print('IAM: I am equal!')
+#                        dhat = dhat_new
+#                        ghat_list = [ghat_new.copy()]
+                    else:
+                        print('IAM: we got better k nearest neighbors!')
+                        nb_updated_k_iam += 1
+                        print('the k nearest neighbors are updated', 
+                              nb_updated_k_iam, 'times.')
+                        
+                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
+                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
+                        sort_idx = np.argsort(dis_k)
+                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
+                        if dhat_new < dhat:
+                            print('IAM: I have smaller distance!')
+                            print(str(dhat) + '->' + str(dhat_new))
+                            dhat = dhat_new
+                            ghat_list = [Gk[0].copy()]
+                            r = 0
+                            nb_updated_iam += 1
+                        
+                            print('the graph is updated by IAM', nb_updated_iam, 
+                                  'times.')                       
+                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
+                                with_labels=True)
+                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
+                            plt.show()
+                        
+                        found_iam = True
+                        
+        # when new distance is not smaller than the max of D_k, use random generation.
+        if not found_iam:
+            print('Distance not better, switching to random generation now.')
+            print(str(dhat) + '->' + str(dhat_new))
+            
+            if InitRandomWithAllDk: # use all k nearest graphs as the initials.
+                init_list = [g_init.copy() for g_init in Gk]
+            else: # use just the nearest graph as the initial.
+                init_list = [Gk[0].copy()]
+            
+            # number of edges to be changed.
+            if len(init_list) == 1:
+                # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
+    #            fdgs = dhat_new
+                fdgs = nb_updated_random + 1
+                if fdgs < 1:
+                    fdgs = 1
+                fdgs = int(np.ceil(np.log(fdgs)))
+                if fdgs < 1:
+                    fdgs += 1
+    #            fdgs = nb_updated_random + 1 # @todo:
+                fdgs_list = [fdgs]
+            else:
+                # @todo what if the log is negetive? how to choose alpha (scalar)?
+                fdgs_list = np.array(dis_k[:])
+                if np.min(fdgs_list) < 1:
+                    fdgs_list /= dis_k[0]
+                fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
+                if np.min(fdgs_list) < 1:
+                    fdgs_list = np.array(fdgs_list) + 1
+                
+            l = 0
+            found_random = False
+            while l < l_max and not found_random:
+                for idx_g, g_tmp in enumerate(init_list):
+                    # add and delete edges.
+                    ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
+                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
+                    nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
+                    np.random.seed()
+                    # which edges to change.                
+                    # @todo: what if fdgs is bigger than nb_vpairs?
+                    idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if 
+                                               fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
+#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
+#                                               (nx.number_of_nodes(gs) - 1), fdgs)
+                    for item in idx_change:
+                        node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
+                        node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
+                        if node2 >= node1: # skip the self pair.
+                            node2 += 1
+                        # @todo: is the randomness correct?
+                        if not ghat_new.has_edge(node1, node2):
+                            ghat_new.add_edge(node1, node2)
+    #                        nx.draw_networkx(gs)
+    #                        plt.show()
+    #                        nx.draw_networkx(ghat_new)
+    #                        plt.show()
+                        else:
+                            ghat_new.remove_edge(node1, node2)
+    #                        nx.draw_networkx(gs)
+    #                        plt.show()
+    #                        nx.draw_networkx(ghat_new)
+    #                        plt.show()
+    #                nx.draw_networkx(ghat_new)
+    #                plt.show()
+                            
+                    # compute distance between \psi and the new generated graph.
+                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
+                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
+                                         alpha, knew, withterm3=False)
+                    # @todo: the new distance is smaller or also equal?
+                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
+                        # check if the new distance is the same as one in D_k.
+                        is_duplicate = False
+                        for dis_tmp in dis_k[1:-1]:
+                            if np.abs(dhat_new - dis_tmp) < epsilon:
+                                is_duplicate = True
+                                print('Random: duplicate k nearest graph generated.')
+                                break
+                        if not is_duplicate:
+                            if np.abs(dhat_new - dhat) < epsilon:
+                                print('Random: I am equal!')
+        #                        dhat = dhat_new
+        #                        ghat_list = [ghat_new.copy()]
+                            else:
+                                print('Random: we got better k nearest neighbors!')
+                                print('l =', str(l))
+                                nb_updated_k_random += 1
+                                print('the k nearest neighbors are updated by random generation', 
+                                          nb_updated_k_random, 'times.')
+                                
+                                dis_k = [dhat_new] + dis_k # add the new nearest distances.
+                                Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
+                                sort_idx = np.argsort(dis_k)
+                                dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+                                Gk = [Gk[idx] for idx in sort_idx[0:k]]
+                                if dhat_new < dhat:
+                                    print('\nRandom: I am smaller!')
+                                    print('l =', str(l))
+                                    print(dhat, '->', dhat_new)                       
+                                    dhat = dhat_new
+                                    ghat_list = [ghat_new.copy()]
+                                    r = 0
+                                    nb_updated_random += 1
+        
+                                    print('the graph is updated by random generation', 
+                                          nb_updated_random, 'times.')
+                                             
+                                    nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), 
+                                        with_labels=True)
+        ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
+                                    plt.show()
+                                found_random = True
+                                break
+                l += 1
+            if not found_random: # l == l_max:
+                r += 1            
+            
+        dis_of_each_itr.append(dhat)
+        itr_total += 1
+        print('\nthe k shortest distances are', dis_k)
+        print('the shortest distances for previous iterations are', dis_of_each_itr)
+        
+    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
+          nb_updated_random, 'times.')
+    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
+          'times, and by random generation', nb_updated_k_random, 'times.')
+    print('distances in kernel space:', dis_of_each_itr, '\n')
+    
+    return dhat, ghat_list, dis_of_each_itr[-1], \
+            nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
+
+
+###############################################################################
+# Old implementations.
+    
+#def gk_iam(Gn, alpha):
+#    """This function constructs graph pre-image by the iterative pre-image 
+#    framework in reference [1], algorithm 1, where the step of generating new 
+#    graphs randomly is replaced by the IAM algorithm in reference [2].
+#    
+#    notes
+#    -----
+#    Every time a better graph is acquired, the older one is replaced by it.
+#    """
+#    pass
+#    # compute k nearest neighbors of phi in DN.
+#    dis_list = [] # distance between g_star and each graph.
+#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
+#        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
+#                      k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
+#                      (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
+#                      k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+#        dis_list.append(dtemp)
+#        
+#    # sort
+#    sort_idx = np.argsort(dis_list)
+#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
+#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
+#    if dis_gs[0] == 0: # the exact pre-image.
+#        print('The exact pre-image is found from the input dataset.')
+#        return 0, g0hat
+#    dhat = dis_gs[0] # the nearest distance
+#    Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
+#    gihat_list = []
+#    
+##    i = 1
+#    r = 1
+#    while r < r_max:
+#        print('r =', r)
+##        found = False
+#        Gs_nearest = Gk + gihat_list
+#        g_tmp = iam(Gs_nearest)
+#        
+#        # compute distance between \psi and the new generated graph.
+#        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
+#                       p_quit=lmbda, n_iteration=20, remove_totters=False,
+#                       n_jobs=multiprocessing.cpu_count(), verbose=False)
+#        dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * 
+#              knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
+#              (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
+#              k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+#        if dnew <= dhat: # the new distance is smaller
+#            print('I am smaller!')
+#            dhat = dnew
+#            g_new = g_tmp.copy() # found better graph.
+#            gihat_list = [g_new]
+#            dis_gs.append(dhat)
+#            r = 0
+#        else:
+#            r += 1
+#            
+#    ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
+#    
+#    return dhat, ghat
+
+
+#def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
+#    """This function constructs graph pre-image by the iterative pre-image 
+#    framework in reference [1], algorithm 1, where the step of generating new 
+#    graphs randomly is replaced by the IAM algorithm in reference [2].
+#    
+#    notes
+#    -----
+#    Every time a better graph is acquired, its distance in kernel space is
+#    compared with the k nearest ones, and the k nearest distances from the k+1
+#    distances will be used as the new ones.
+#    """
+#    # compute k nearest neighbors of phi in DN.
+#    dis_list = [] # distance between g_star and each graph.
+#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
+#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
+##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
+##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
+##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
+##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
+#        dis_list.append(dtemp)
+#        
+#    # sort
+#    sort_idx = np.argsort(dis_list)
+#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
+#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
+#    if dis_gs[0] == 0: # the exact pre-image.
+#        print('The exact pre-image is found from the input dataset.')
+#        return 0, g0hat
+#    dhat = dis_gs[0] # the nearest distance
+#    ghat = g0hat.copy()
+#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
+#    for gi in Gk:
+#        nx.draw_networkx(gi)
+#        plt.show()
+#        print(gi.nodes(data=True))
+#        print(gi.edges(data=True))
+#    Gs_nearest = Gk.copy()
+##    gihat_list = []
+#    
+##    i = 1
+#    r = 1
+#    while r < r_max:
+#        print('r =', r)
+##        found = False
+##        Gs_nearest = Gk + gihat_list
+##        g_tmp = iam(Gs_nearest)
+#        g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
+#        nx.draw_networkx(g_tmp)
+#        plt.show()
+#        print(g_tmp.nodes(data=True))
+#        print(g_tmp.edges(data=True))
+#        
+#        # compute distance between \psi and the new generated graph.
+#        gi_list = [Gn[i] for i in idx_gi]
+#        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
+#        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
+#        
+##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
+##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
+##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
+##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
+#        if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
+#            print('I am smaller!')
+#            print(str(dhat) + '->' + str(dnew))
+##            nx.draw_networkx(ghat)
+##            plt.show()
+##            print('->')
+##            nx.draw_networkx(g_tmp)
+##            plt.show()
+#            
+#            dhat = dnew
+#            g_new = g_tmp.copy() # found better graph.
+#            ghat = g_tmp.copy()
+#            dis_gs.append(dhat) # add the new nearest distance.
+#            Gs_nearest.append(g_new) # add the corresponding graph.
+#            sort_idx = np.argsort(dis_gs)
+#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
+#            r = 0
+#        else:
+#            r += 1
+#    
+#    return dhat, ghat
+
+
+#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
+#    """This function constructs graph pre-image by the iterative pre-image 
+#    framework in reference [1], algorithm 1, where the step of generating new 
+#    graphs randomly is replaced by the IAM algorithm in reference [2].
+#    
+#    notes
+#    -----
+#    Every time a set of n better graphs is acquired, their distances in kernel space are
+#    compared with the k nearest ones, and the k nearest distances from the k+n
+#    distances will be used as the new ones.
+#    """
+#    Gn_median = [Gn[idx].copy() for idx in idx_gi]
+#    # compute k nearest neighbors of phi in DN.
+#    dis_list = [] # distance between g_star and each graph.
+#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
+#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
+##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
+##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
+##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
+##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
+#        dis_list.append(dtemp)
+#        
+#    # sort
+#    sort_idx = np.argsort(dis_list)
+#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
+#    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
+#    g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
+#    if dis_gs[0] == 0: # the exact pre-image.
+#        print('The exact pre-image is found from the input dataset.')
+#        return 0, g0hat_list
+#    dhat = dis_gs[0] # the nearest distance
+#    ghat_list = [g.copy() for g in g0hat_list]
+#    for g in ghat_list:
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
+#    for gi in Gk:
+#        nx.draw_networkx(gi)
+#        plt.show()
+#        print(gi.nodes(data=True))
+#        print(gi.edges(data=True))
+#    Gs_nearest = Gk.copy()
+##    gihat_list = []
+#    
+##    i = 1
+#    r = 1
+#    while r < r_max:
+#        print('r =', r)
+##        found = False
+##        Gs_nearest = Gk + gihat_list
+##        g_tmp = iam(Gs_nearest)
+#        g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
+#                Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
+#        for g in g_tmp_list:
+#            nx.draw_networkx(g)
+#            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+#        
+#        # compute distance between \psi and the new generated graphs.
+#        gi_list = [Gn[i] for i in idx_gi]
+#        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
+#        dnew_list = []
+#        for idx, g_tmp in enumerate(g_tmp_list):
+#            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
+#                            len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
+#        
+##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
+##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
+##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
+##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
+#            
+#        # find the new k nearest graphs.
+#        dis_gs = dnew_list + dis_gs # add the new nearest distances.
+#        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
+#        sort_idx = np.argsort(dis_gs)
+#        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
+#            print('We got better k nearest neighbors! Hurray!')
+#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
+#            print(dis_gs[-1])
+#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
+#            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
+#            if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
+#                print('I have smaller or equal distance!')
+#                dhat = dis_gs[0]
+#                print(str(dhat) + '->' + str(dhat))
+#                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
+#                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
+#                for g in ghat_list:
+#                    nx.draw_networkx(g)
+#                    plt.show()
+#                    print(g.nodes(data=True))
+#                    print(g.edges(data=True))
+#            r = 0
+#        else:
+#            r += 1
+#    
+#    return dhat, ghat_list
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_random.py b/gklearn/preimage/preimage_random.py
new file mode 100644
index 0000000..e5f74cd
--- /dev/null
+++ b/gklearn/preimage/preimage_random.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar  6 16:03:11 2019
+
+pre-image
+@author: ljia
+"""
+
+import sys
+import numpy as np
+import random
+from tqdm import tqdm
+import networkx as nx
+import matplotlib.pyplot as plt
+
+from gklearn.preimage.utils import compute_kernel, dis_gstar
+
+
+def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
+    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
+    
+    # compute k nearest neighbors of phi in DN.
+    dis_list = [] # distance between g_star and each graph.
+    term3 = 0
+    for i1, a1 in enumerate(alpha):
+        for i2, a2 in enumerate(alpha):
+            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
+    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
+        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
+        dis_list.append(dtemp)
+#    print(np.max(dis_list))
+#    print(np.min(dis_list))
+#    print(np.min([item for item in dis_list if item != 0]))
+#    print(np.mean(dis_list))
+        
+    # sort
+    sort_idx = np.argsort(dis_list)
+    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
+    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
+    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
+    if dis_gs[0] == 0: # the exact pre-image.
+        print('The exact pre-image is found from the input dataset.')
+        return 0, g0hat_list[0], 0
+    dhat = dis_gs[0] # the nearest distance
+#    ghat_list = [g.copy() for g in g0hat_list]
+#    for g in ghat_list:
+#        draw_Letter_graph(g)
+#        nx.draw_networkx(g)
+#        plt.show()
+#        print(g.nodes(data=True))
+#        print(g.edges(data=True))
+    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
+#    for gi in Gk:
+##        nx.draw_networkx(gi)
+##        plt.show()
+#        draw_Letter_graph(g)
+#        print(gi.nodes(data=True))
+#        print(gi.edges(data=True))
+    Gs_nearest = [g.copy() for g in Gk]
+    gihat_list = []
+    dihat_list = []
+    
+#    i = 1
+    r = 0
+#    sod_list = [dhat]
+#    found = False
+    dis_of_each_itr = [dhat]
+    nb_updated = 0
+    g_best = []
+    while r < r_max:
+        print('\nr =', r)
+        print('itr for gk =', nb_updated, '\n')
+        found = False
+        dis_bests = dis_gs + dihat_list
+        # @todo what if the log is negetive? how to choose alpha (scalar)?
+        fdgs_list = np.array(dis_bests)
+        if np.min(fdgs_list) < 1:
+            fdgs_list /= np.min(dis_bests)
+        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
+        if np.min(fdgs_list) < 1:
+            fdgs_list = np.array(fdgs_list) + 1
+            
+        for ig, gs in enumerate(Gs_nearest + gihat_list):
+#            nx.draw_networkx(gs)
+#            plt.show()
+            for trail in range(0, l):
+#            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
+                # add and delete edges.
+                gtemp = gs.copy()
+                np.random.seed()
+                # which edges to change.
+                # @todo: should we use just half of the adjacency matrix for undirected graphs?
+                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
+                # @todo: what if fdgs is bigger than nb_vpairs?
+                idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if 
+                                           fdgs_list[ig] < nb_vpairs else nb_vpairs)
+#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
+#                                               (nx.number_of_nodes(gs) - 1), fdgs)
+                for item in idx_change:
+                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
+                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
+                    if node2 >= node1: # skip the self pair.
+                        node2 += 1
+                    # @todo: is the randomness correct?
+                    if not gtemp.has_edge(node1, node2):
+                        gtemp.add_edge(node1, node2)
+#                        nx.draw_networkx(gs)
+#                        plt.show()
+#                        nx.draw_networkx(gtemp)
+#                        plt.show()
+                    else:
+                        gtemp.remove_edge(node1, node2)
+#                        nx.draw_networkx(gs)
+#                        plt.show()
+#                        nx.draw_networkx(gtemp)
+#                        plt.show()
+#                nx.draw_networkx(gtemp)
+#                plt.show()
+                
+                # compute distance between \psi and the new generated graph.
+#                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
+#                               p_quit=lmbda, n_iteration=20, remove_totters=False,
+#                               n_jobs=multiprocessing.cpu_count(), verbose=False)
+                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
+                dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, 
+                                 withterm3=False)
+                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
+                    if dnew < dhat:
+                        print('\nI am smaller!')
+                        print('ig =', str(ig), ', l =', str(trail))
+                        print(dhat, '->', dnew)
+                        nb_updated += 1
+                    elif dnew == dhat:                   
+                        print('I am equal!') 
+#                    nx.draw_networkx(gtemp)
+#                    plt.show()
+#                    print(gtemp.nodes(data=True))
+#                    print(gtemp.edges(data=True))
+                    dhat = dnew
+                    gnew = gtemp.copy()
+                    found = True # found better graph.                  
+        if found:
+            r = 0
+            gihat_list = [gnew]
+            dihat_list = [dhat]
+        else:
+            r += 1
+            
+        dis_of_each_itr.append(dhat)
+        print('the shortest distances for previous iterations are', dis_of_each_itr)
+#    dis_best.append(dhat)
+    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
+    print('distances in kernel space:', dis_of_each_itr, '\n')
+    
+    return dhat, g_best, nb_updated
+#    return 0, 0, 0
+
+
+if __name__ == '__main__':
+    from gklearn.utils.graphfiles import loadDataset
+    
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
+#          'extra_params': {}}
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
+#            'extra_params': {}} # node symb
+    
+    DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    #DN = DN[0:10]
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 3 # 10 # iteration limit.
+    l = 500
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    #alpha_range = np.linspace(0.1, 0.9, 9)
+    k = 10 # 5 # k nearest neighbors
+    
+    # randomly select two molecules
+    #np.random.seed(1)
+    #idx1, idx2 = np.random.randint(0, len(DN), 2)
+    #g1 = DN[idx1]
+    #g2 = DN[idx2]
+    idx1 = 0
+    idx2 = 6
+    g1 = DN[idx1]
+    g2 = DN[idx2]
+    
+    # compute 
+    k_list = [] # kernel between each graph and itself.
+    k_g1_list = [] # kernel between each graph and g1
+    k_g2_list = [] # kernel between each graph and g2
+    for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): 
+    #    ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
+    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
+    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
+        ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
+        k_list.append(ktemp[0, 0])
+        k_g1_list.append(ktemp[0, 1])
+        k_g2_list.append(ktemp[0, 2])
+    
+    g_best = []
+    dis_best = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('alpha =', alpha)
+        # compute k nearest neighbors of phi in DN.
+        dis_list = [] # distance between g_star and each graph.
+        for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
+            dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
+                          k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
+                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
+                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
+            dis_list.append(np.sqrt(dtemp))
+        
+        # sort
+        sort_idx = np.argsort(dis_list)
+        dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
+        g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
+        if dis_gs[0] == 0: # the exact pre-image.
+            print('The exact pre-image is found from the input dataset.')
+            g_pimg = g0hat
+            break
+        dhat = dis_gs[0] # the nearest distance
+        Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
+        gihat_list = []
+        
+        i = 1
+        r = 1
+        while r < r_max:
+            print('r =', r)
+            found = False
+            for ig, gs in enumerate(Dk + gihat_list):
+    #            nx.draw_networkx(gs)
+    #            plt.show()
+                # @todo what if the log is negetive?
+                fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
+                for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
+                    # add and delete edges.
+                    gtemp = gs.copy()
+                    np.random.seed()
+                    # which edges to change.
+                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
+                    nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
+                    # @todo: what if fdgs is bigger than nb_vpairs?
+                    idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
+    #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
+    #                                               (nx.number_of_nodes(gs) - 1), fdgs)
+                    for item in idx_change:
+                        node1 = int(item / (nx.number_of_nodes(gs) - 1))
+                        node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
+                        if node2 >= node1: # skip the self pair.
+                            node2 += 1
+                        # @todo: is the randomness correct?
+                        if not gtemp.has_edge(node1, node2):
+                            # @todo: how to update the bond_type? 0 or 1?
+                            gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
+    #                        nx.draw_networkx(gs)
+    #                        plt.show()
+    #                        nx.draw_networkx(gtemp)
+    #                        plt.show()
+                        else:
+                            gtemp.remove_edge(node1, node2)
+    #                        nx.draw_networkx(gs)
+    #                        plt.show()
+    #                        nx.draw_networkx(gtemp)
+    #                        plt.show()
+    #                nx.draw_networkx(gtemp)
+    #                plt.show()
+                    
+                    # compute distance between phi and the new generated graph.
+    #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
+    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
+    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
+                    knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
+                    dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) * 
+                          knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
+                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
+                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
+                    if dnew < dhat: # @todo: the new distance is smaller or also equal?
+                        print('I am smaller!')
+                        print(dhat, '->', dnew)
+                        nx.draw_networkx(gtemp)
+                        plt.show()
+                        print(gtemp.nodes(data=True))
+                        print(gtemp.edges(data=True))
+                        dhat = dnew
+                        gnew = gtemp.copy()
+                        found = True # found better graph.
+                        r = 0
+                    elif dnew == dhat:                   
+                        print('I am equal!')                   
+            if found:
+                gihat_list = [gnew]
+                dis_gs.append(dhat)
+            else:
+                r += 1
+        dis_best.append(dhat)
+        g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)       
+    
+    
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+        print('the corresponding pre-image is')
+        nx.draw_networkx(g_best[idx])
+        plt.show()
\ No newline at end of file
diff --git a/gklearn/preimage/python_code.py b/gklearn/preimage/python_code.py
new file mode 100644
index 0000000..3772526
--- /dev/null
+++ b/gklearn/preimage/python_code.py
@@ -0,0 +1,122 @@
+		elif opt_name == 'random-inits':
+			try:
+				num_random_inits_ = std::stoul(opt_val)
+				desired_num_random_inits_ = num_random_inits_
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+			if num_random_inits_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
+
+		}
+		elif opt_name == 'randomness':
+			if opt_val == 'PSEUDO':
+				use_real_randomness_ = False
+
+			elif opt_val == 'REAL':
+				use_real_randomness_ = True
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
+
+		}
+		elif opt_name == 'stdout':
+			if opt_val == '0':
+				print_to_stdout_ = 0
+
+			elif opt_val == '1':
+				print_to_stdout_ = 1
+
+			elif opt_val == '2':
+				print_to_stdout_ = 2
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
+
+		}
+		elif opt_name == 'refine':
+			if opt_val == 'TRUE':
+				refine_ = True
+
+			elif opt_val == 'FALSE':
+				refine_ = False
+
+			else:
+				raise Error('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
+
+		}
+		elif opt_name == 'time-limit':
+			try:
+				time_limit_in_sec_ = std::stod(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
+
+		}
+		elif opt_name == 'max-itrs':
+			try:
+				max_itrs_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
+
+		}
+		elif opt_name == 'max-itrs-without-update':
+			try:
+				max_itrs_without_update_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
+
+		}
+		elif opt_name == 'seed':
+			try:
+				seed_ = std::stoul(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
+
+		}
+		elif opt_name == 'epsilon':
+			try:
+				epsilon_ = std::stod(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+
+			if epsilon_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
+
+		}
+		elif opt_name == 'inits-increase-order':
+			try:
+				num_inits_increase_order_ = std::stoul(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+			if num_inits_increase_order_ <= 0:
+				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
+
+		}
+		elif opt_name == 'init-type-increase-order':
+			init_type_increase_order_ = opt_val
+			if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
+				raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
+
+		}
+		elif opt_name == 'max-itrs-increase-order':
+			try:
+				max_itrs_increase_order_ = std::stoi(opt_val)
+
+			except:
+				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
+
+		}
+		else:
+			std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ')
+			valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
+			valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
+			raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"')
+
diff --git a/gklearn/preimage/test.py b/gklearn/preimage/test.py
new file mode 100644
index 0000000..4110a6f
--- /dev/null
+++ b/gklearn/preimage/test.py
@@ -0,0 +1,83 @@
+#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
+
+#Pour que "import script" trouve les librairies qu'a besoin GedLib
+#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
+import gedlibpy.librariesImport
+from  gedlibpy import gedlibpy
+import networkx as nx
+
+
+def init() :
+    print("List of Edit Cost Options : ")
+    for i in gedlibpy.list_of_edit_cost_options :
+        print (i)
+    print("")
+
+    print("List of Method Options : ")
+    for j in gedlibpy.list_of_method_options :
+        print (j)
+    print("")
+
+    print("List of Init Options : ")
+    for k in gedlibpy.list_of_init_options :
+        print (k)
+    print("")
+    
+def test():
+    
+    gedlibpy.load_GXL_graphs('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
+    listID = gedlibpy.get_all_graph_ids()
+    gedlibpy.set_edit_cost("CHEM_1")
+    gedlibpy.init()
+    gedlibpy.set_method("IPFP", "")
+    gedlibpy.init_method()
+    g = listID[0]
+    h = listID[1]
+    gedlibpy.run_method(g, h)
+    print("Node Map : ", gedlibpy.get_node_map(g,h))
+    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
+    print("Assignment Matrix : ")
+    print(gedlibpy.get_assignment_matrix(g, h))
+    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g,h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
+
+
+def convertGraph(G):
+    G_new = nx.Graph()
+    for nd, attrs in G.nodes(data=True):
+        G_new.add_node(str(nd), chem=attrs['atom'])
+    for nd1, nd2, attrs in G.edges(data=True):
+        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+        
+    return G_new
+
+
+def testNxGrapĥ():
+    from gklearn.utils.graphfiles import loadDataset
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    
+    gedlibpy.restart_env()
+    for graph in Gn:
+        g_new = convertGraph(graph)
+        gedlibpy.add_nx_graph(g_new, "")
+        
+    listID = gedlibpy.get_all_graph_ids()
+    gedlibpy.set_edit_cost("CHEM_1")
+    gedlibpy.init()
+    gedlibpy.set_method("IPFP", "")
+    gedlibpy.init_method()
+
+    print(listID)
+    g = listID[0]
+    h = listID[1]
+
+    gedlibpy.run_method(g, h)
+
+    print("Node Map : ", gedlibpy.get_node_map(g, h))
+    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
+    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
+
+#test()
+init() 
+#testNxGrapĥ()
diff --git a/gklearn/preimage/test_fitDistance.py b/gklearn/preimage/test_fitDistance.py
new file mode 100644
index 0000000..2945a24
--- /dev/null
+++ b/gklearn/preimage/test_fitDistance.py
@@ -0,0 +1,648 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 24 11:50:56 2019
+
+@author: ljia
+"""
+from matplotlib import pyplot as plt
+import numpy as np
+from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.utils import remove_edges
+from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
+from gklearn.preimage.utils import normalize_distance_matrix
+
+
+def test_update_costs():
+    from preimage.fitDistance import update_costs
+    import cvxpy as cp
+    
+    ds = np.load('results/xp_fit_method/fit_data_debug4.gm.npz')
+    nb_cost_mat = ds['nb_cost_mat']
+    dis_k_vec = ds['dis_k_vec']
+    n_edit_operations = ds['n_edit_operations']
+    ged_vec_init = ds['ged_vec_init']
+    ged_mat = ds['ged_mat']
+    
+    nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
+    x = cp.Variable(nb_cost_mat_new.shape[1])
+    cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
+#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
+#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
+#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
+#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
+#                   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
+    constraints = [x >= [0.00 for i in range(nb_cost_mat_new.shape[1])],
+                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
+#    constraints = [x >= [0.00000 for i in range(nb_cost_mat_new.shape[1])]]
+    prob = cp.Problem(cp.Minimize(cost_fun), constraints)
+    prob.solve()
+    print(x.value)
+    edit_costs_new = np.concatenate((x.value, np.array([0.0])))
+    residual = np.sqrt(prob.value)
+
+
+def median_paper_clcpc_python_best():
+    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
+       python invoking the c++ code by bash command (with updated library).
+    """
+#    ds = {'name': 'monoterpenoides', 
+#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+#    _, y_all = loadDataset(ds['dataset'])
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    itr_max = 6
+    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+                'algo_options': algo_options, 'stabilizer': None}
+    
+    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
+    repeats = 50
+    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
+    graph_dir = collection_path + 'gxl/'
+    
+    fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
+
+    for y in y_all:
+        for repeat in range(repeats):
+            edit_costs_output_file = open(fn_edit_costs_output, 'a')
+            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
+            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
+            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+                nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+                                            gkernel, itr_max, params_ged=params_ged, 
+                                            parallel=True)
+            total_time = np.sum(time_list)
+#            print('\nedit_costs:', edit_costs)
+#            print('\nresidual_list:', residual_list)
+#            print('\nedit_cost_list:', edit_cost_list)
+#            print('\ndistance matrix in kernel space:', dis_k_mat)
+#            print('\nged matrix:', ged_mat)
+#            print('\ntotal time:', total_time)
+#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+            np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y' 
+                     + y + '.repeat' + str(repeat) + '.k10..gm', 
+                     edit_costs=edit_costs, 
+                     residual_list=residual_list, edit_cost_list=edit_cost_list,
+                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
+            
+            for ec in edit_costs:
+                edit_costs_output_file.write(str(ec) + ' ')
+            edit_costs_output_file.write('\n')
+            edit_costs_output_file.close()
+    
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+    
+            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+            print(nb_consistent, nb_inconsistent, ratio_consistent)
+                      
+#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+#            plt.imshow(norm_dis_k_mat)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#            
+#            norm_ged_mat = normalize_distance_matrix(ged_mat)
+#            plt.imshow(norm_ged_mat)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#            
+#            norm_diff = norm_ged_mat - norm_dis_k_mat
+#            plt.imshow(norm_diff)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#        #    draw_count_bar(norm_diff)
+
+
+def median_paper_clcpc_python_bash_cpp():
+    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
+       python invoking the c++ code by bash command (with updated library).
+    """
+#    ds = {'name': 'monoterpenoides', 
+#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+#    _, y_all = loadDataset(ds['dataset'])
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    itr_max = 20
+    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
+    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
+                'algo_options': algo_options}
+    
+    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
+    repeats = 50
+    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
+    graph_dir = collection_path + 'gxl/'
+    
+    fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
+
+    for y in y_all:
+        for repeat in range(repeats):
+            edit_costs_output_file = open(fn_edit_costs_output, 'a')
+            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
+            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
+            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+                nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+                                            gkernel, itr_max, params_ged=params_ged, 
+                                            parallel=False)
+            total_time = np.sum(time_list)
+#            print('\nedit_costs:', edit_costs)
+#            print('\nresidual_list:', residual_list)
+#            print('\nedit_cost_list:', edit_cost_list)
+#            print('\ndistance matrix in kernel space:', dis_k_mat)
+#            print('\nged matrix:', ged_mat)
+#            print('\ntotal time:', total_time)
+#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+            np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+                     + y + '.repeat' + str(repeat) + '.gm', 
+                     edit_costs=edit_costs, 
+                     residual_list=residual_list, edit_cost_list=edit_cost_list,
+                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
+                     coef_dk=coef_dk)
+            
+            for ec in edit_costs:
+                edit_costs_output_file.write(str(ec) + ' ')
+            edit_costs_output_file.write('\n')
+            edit_costs_output_file.close()
+    
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+#    coef_dk = gmfile['coef_dk']
+    
+            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+            print(nb_consistent, nb_inconsistent, ratio_consistent)
+                      
+#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+#            plt.imshow(norm_dis_k_mat)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#            
+#            norm_ged_mat = normalize_distance_matrix(ged_mat)
+#            plt.imshow(norm_ged_mat)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#            
+#            norm_diff = norm_ged_mat - norm_dis_k_mat
+#            plt.imshow(norm_diff)
+#            plt.colorbar()
+#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
+#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
+#                        + y + '.repeat' + str(repeat) + '.png', format='png')
+#        #    plt.show()
+#            plt.clf()
+#        #    draw_count_bar(norm_diff)
+
+
+
+
+
+def test_cs_leq_ci_plus_cr_python_bash_cpp():
+    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
+       python invoking the c++ code by bash command (with updated library).
+    """
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:10]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    itr_max = 10
+    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
+    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
+                'algo_options': algo_options}
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+                                    gkernel, itr_max, params_ged=params_ged, 
+                                    parallel=False)
+    total_time = np.sum(time_list)
+    print('\nedit_costs:', edit_costs)
+    print('\nresidual_list:', residual_list)
+    print('\nedit_cost_list:', edit_cost_list)
+    print('\ndistance matrix in kernel space:', dis_k_mat)
+    print('\nged matrix:', ged_mat)
+    print('\ntotal time:', total_time)
+    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+    np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm', 
+             edit_costs=edit_costs, 
+             residual_list=residual_list, edit_cost_list=edit_cost_list,
+             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
+             coef_dk=coef_dk)
+    
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+##    Gn = Gn[0:10]
+##    remove_edges(Gn)
+#    gkernel = 'untilhpathkernel'
+#    node_label = 'atom'
+#    edge_label = 'bond_type'
+#    itr_max = 10
+#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+#                                                      gkernel, itr_max)
+#    total_time = np.sum(time_list)
+#    print('\nedit_costs:', edit_costs)
+#    print('\nresidual_list:', residual_list)
+#    print('\nedit_cost_list:', edit_cost_list)
+#    print('\ndistance matrix in kernel space:', dis_k_mat)
+#    print('\nged matrix:', ged_mat)
+#    print('\ntotal time:', total_time)
+#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm', 
+#             edit_costs=edit_costs, 
+#             residual_list=residual_list, edit_cost_list=edit_cost_list,
+#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
+    
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+#    coef_dk = gmfile['coef_dk']
+    
+    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+    print(nb_consistent, nb_inconsistent, ratio_consistent)
+    
+#    dis_k_sub = pairwise_substitution(dis_k_mat)
+#    ged_sub = pairwise_substitution(ged_mat)    
+#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm', 
+#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
+    
+    
+    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+    plt.imshow(norm_dis_k_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_ged_mat = normalize_distance_matrix(ged_mat)
+    plt.imshow(norm_ged_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+#    draw_count_bar(norm_diff)
+
+
+def test_anycosts():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:10]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    itr_max = 10
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
+    total_time = np.sum(time_list)
+    print('\nedit_costs:', edit_costs)
+    print('\nresidual_list:', residual_list)
+    print('\nedit_cost_list:', edit_cost_list)
+    print('\ndistance matrix in kernel space:', dis_k_mat)
+    print('\nged matrix:', ged_mat)
+    print('\ntotal time:', total_time)
+    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+    np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, 
+             residual_list=residual_list, edit_cost_list=edit_cost_list,
+             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+##    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+    
+    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+    plt.imshow(norm_dis_k_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
+#    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_ged_mat = normalize_distance_matrix(ged_mat)
+    plt.imshow(norm_ged_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
+#    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
+#    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+#    draw_count_bar(norm_diff)
+    
+
+def test_cs_leq_ci_plus_cr():
+    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
+    """
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:10]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    itr_max = 10
+    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+                                                      gkernel, itr_max,
+                                                      fitkernel='gaussian')
+    total_time = np.sum(time_list)
+    print('\nedit_costs:', edit_costs)
+    print('\nresidual_list:', residual_list)
+    print('\nedit_cost_list:', edit_cost_list)
+    print('\ndistance matrix in kernel space:', dis_k_mat)
+    print('\nged matrix:', ged_mat)
+    print('\ntotal time:', total_time)
+    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm', 
+             edit_costs=edit_costs, 
+             residual_list=residual_list, edit_cost_list=edit_cost_list,
+             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
+             coef_dk=coef_dk)
+    
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+##    Gn = Gn[0:10]
+##    remove_edges(Gn)
+#    gkernel = 'untilhpathkernel'
+#    node_label = 'atom'
+#    edge_label = 'bond_type'
+#    itr_max = 10
+#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
+#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
+#                                                      gkernel, itr_max)
+#    total_time = np.sum(time_list)
+#    print('\nedit_costs:', edit_costs)
+#    print('\nresidual_list:', residual_list)
+#    print('\nedit_cost_list:', edit_cost_list)
+#    print('\ndistance matrix in kernel space:', dis_k_mat)
+#    print('\nged matrix:', ged_mat)
+#    print('\ntotal time:', total_time)
+#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
+#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm', 
+#             edit_costs=edit_costs, 
+#             residual_list=residual_list, edit_cost_list=edit_cost_list,
+#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
+    
+    
+#    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+#    coef_dk = gmfile['coef_dk']
+    
+    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+    print(nb_consistent, nb_inconsistent, ratio_consistent)
+    
+#    dis_k_sub = pairwise_substitution(dis_k_mat)
+#    ged_sub = pairwise_substitution(ged_mat)    
+#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm', 
+#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
+    
+    
+    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+    plt.imshow(norm_dis_k_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_ged_mat = normalize_distance_matrix(ged_mat)
+    plt.imshow(norm_ged_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.eps', format='eps', dpi=300)
+    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
+                + '.png', format='png')
+#    plt.show()
+    plt.clf()
+#    draw_count_bar(norm_diff)
+    
+    
+def test_unfitted():
+    """unfitted.
+    """  
+    from fitDistance import compute_geds
+    from utils import kernel_distance_matrix
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:10]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+        
+
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+##    Gn = Gn[0:10]
+##    remove_edges(Gn)
+#    gkernel = 'marginalizedkernel'
+
+    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
+    ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1], 
+            [0, 1, 2, 3, 4, 5], parallel=True)
+    print('\ndistance matrix in kernel space:', dis_k_mat)
+    print('\nged matrix:', ged_mat)
+#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs, 
+#             residual_list=residual_list, edit_cost_list=edit_cost_list,
+#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
+#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) 
+    
+    # normalized distance matrices.
+#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
+#    edit_costs = gmfile['edit_costs']
+#    residual_list = gmfile['residual_list']
+#    edit_cost_list = gmfile['edit_cost_list']
+#    dis_k_mat = gmfile['dis_k_mat']
+#    ged_mat = gmfile['ged_mat']
+#    total_time = gmfile['total_time']
+#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
+    
+    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
+    print(nb_consistent, nb_inconsistent, ratio_consistent)
+    
+    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
+    plt.imshow(norm_dis_k_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_ged_mat = normalize_distance_matrix(ged_mat)
+    plt.imshow(norm_ged_mat)
+    plt.colorbar()
+    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    
+    norm_diff = norm_ged_mat - norm_dis_k_mat
+    plt.imshow(norm_diff)
+    plt.colorbar()
+    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
+    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
+#    plt.show()
+    plt.clf()
+    draw_count_bar(norm_diff)
+    
+    
+def pairwise_substitution_consistence(mat1, mat2):
+    """
+    """
+    nb_consistent = 0
+    nb_inconsistent = 0
+    # the matrix is considered symmetric.
+    upper_tri1 = mat1[np.triu_indices_from(mat1)]
+    upper_tri2 = mat2[np.tril_indices_from(mat2)]
+    for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
+        for j in range(i, len(upper_tri1)):
+            if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
+                nb_consistent += 1
+            else:
+                nb_inconsistent += 1
+    return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
+
+
+def pairwise_substitution(mat):
+    # the matrix is considered symmetric.
+    upper_tri = mat[np.triu_indices_from(mat)]
+    sub_list = []
+    for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
+        for j in range(i, len(upper_tri)):
+            sub_list.append(upper_tri[i] - upper_tri[j])
+    return sub_list
+    
+    
+def draw_count_bar(norm_diff):
+    import pandas
+    from collections import Counter, OrderedDict
+    norm_diff_cnt = norm_diff.flatten()
+    norm_diff_cnt = norm_diff_cnt * 10
+    norm_diff_cnt = np.floor(norm_diff_cnt)
+    norm_diff_cnt = Counter(norm_diff_cnt)
+    norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
+    df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
+    df.plot(kind='bar')
+    
+    
+if __name__ == '__main__':
+#    test_anycosts()
+#    test_cs_leq_ci_plus_cr()
+#    test_unfitted()
+    
+#    test_cs_leq_ci_plus_cr_python_bash_cpp()
+#    median_paper_clcpc_python_bash_cpp()
+#    median_paper_clcpc_python_best()
+
+#    x = np.array([[1,2,3],[4,5,6],[7,8,9]])
+#    xx = pairwise_substitution(x)
+    
+    test_update_costs()
\ No newline at end of file
diff --git a/gklearn/preimage/test_ged.py b/gklearn/preimage/test_ged.py
new file mode 100644
index 0000000..74e18a0
--- /dev/null
+++ b/gklearn/preimage/test_ged.py
@@ -0,0 +1,520 @@
+#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
+
+#Pour que "import script" trouve les librairies qu'a besoin GedLib
+#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
+#import gedlibpy_linlin.librariesImport
+#from  gedlibpy_linlin import gedlibpy
+from libs import *
+import networkx as nx
+import numpy as np
+from tqdm import tqdm
+import sys
+
+
+def test_NON_SYMBOLIC_cost():
+    """Test edit cost LETTER2.
+    """
+    from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
+    from gklearn.preimage.test_k_closest_graphs import reform_attributes
+    from gklearn.utils.graphfiles import loadDataset
+
+    dataset = '../../datasets/Letter-high/Letter-high_A.txt'
+    Gn, y_all = loadDataset(dataset)
+
+    g1 = Gn[200]
+    g2 = Gn[1780]
+    reform_attributes(g1)
+    reform_attributes(g2)
+
+    c_vi = 0.675
+    c_vr = 0.675
+    c_vs = 0.75
+    c_ei = 0.425
+    c_er = 0.425
+    c_es = 0
+
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
+        cost='NON_SYMBOLIC', method='IPFP', edit_cost_constant=edit_cost_constant,
+        algo_options='', stabilizer=None)
+    n_vi, n_vr, sod_vs, n_ei, n_er, sod_es = get_nb_edit_operations_nonsymbolic(g1, g2,
+        pi_forward, pi_backward)
+
+    print('# of operations:', n_vi, n_vr, sod_vs, n_ei, n_er, sod_es)
+    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
+    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
+        + c_ei * n_ei + c_er * n_er + c_es * sod_es
+    print('dis (cost computed by GED):', dis)
+    print('cost computed by # of operations and edit cost constants:', cost_computed)
+
+
+def test_LETTER2_cost():
+    """Test edit cost LETTER2.
+    """
+    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
+    from gklearn.preimage.test_k_closest_graphs import reform_attributes
+    from gklearn.utils.graphfiles import loadDataset
+
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+
+    g1 = Gn[200]
+    g2 = Gn[1780]
+    reform_attributes(g1)
+    reform_attributes(g2)
+
+    c_vi = 0.675
+    c_vr = 0.675
+    c_vs = 0.75
+    c_ei = 0.425
+    c_er = 0.425
+
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er]
+    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
+        cost='LETTER2', method='IPFP', edit_cost_constant=edit_cost_constant,
+        algo_options='', stabilizer=None)
+    n_vi, n_vr, n_vs, sod_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
+        pi_forward, pi_backward)
+
+    print('# of operations:', n_vi, n_vr, n_vs, sod_vs, n_ei, n_er)
+    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er)
+    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
+        + c_ei * n_ei + c_er * n_er
+    print('dis (cost computed by GED):', dis)
+    print('cost computed by # of operations and edit cost constants:', cost_computed)
+
+
+
+def test_get_nb_edit_operations_letter():
+    """Test whether function preimage.ged.get_nb_edit_operations_letter returns
+    correct numbers of edit operations. The distance/cost computed by GED
+    should be the same as the cost computed by number of operations and edit
+    cost constants.
+    """
+    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
+    from gklearn.preimage.test_k_closest_graphs import reform_attributes
+    from gklearn.utils.graphfiles import loadDataset
+
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+
+    g1 = Gn[200]
+    g2 = Gn[1780]
+    reform_attributes(g1)
+    reform_attributes(g2)
+
+    c_vir = 0.9
+    c_eir = 1.7
+    alpha = 0.75
+
+    edit_cost_constant = [c_vir, c_eir, alpha]
+    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
+        cost='LETTER', method='IPFP', edit_cost_constant=edit_cost_constant,
+        algo_options='', stabilizer=None)
+    n_vi, n_vr, n_vs, c_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
+        pi_forward, pi_backward)
+
+    print('# of operations and costs:', n_vi, n_vr, n_vs, c_vs, n_ei, n_er)
+    print('c_vir, c_eir, alpha:', c_vir, c_eir, alpha)
+    cost_computed = alpha * c_vir * (n_vi + n_vr) \
+        + alpha * c_vs \
+        + (1 - alpha) * c_eir * (n_ei + n_er)
+    print('dis (cost computed by GED):', dis)
+    print('cost computed by # of operations and edit cost constants:', cost_computed)
+
+
+def test_get_nb_edit_operations():
+    """Test whether function preimage.ged.get_nb_edit_operations returns correct
+    numbers of edit operations. The distance/cost computed by GED should be the
+    same as the cost computed by number of operations and edit cost constants.
+    """
+    from gklearn.preimage.ged import GED, get_nb_edit_operations
+    from gklearn.utils.graphfiles import loadDataset
+    import os
+
+    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+
+    g1 = Gn[20]
+    g2 = Gn[108]
+
+    c_vi = 3
+    c_vr = 3
+    c_vs = 1
+    c_ei = 3
+    c_er = 3
+    c_es = 1
+
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    dis, pi_forward, pi_backward = GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy',
+        cost='CONSTANT', method='IPFP', edit_cost_constant=edit_cost_constant,
+        algo_options='', stabilizer=None)
+    n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(g1, g2,
+        pi_forward, pi_backward)
+
+    print('# of operations and costs:', n_vi, n_vr, n_vs, n_ei, n_er, n_es)
+    print('edit costs:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
+    cost_computed = n_vi * c_vi + n_vr * c_vr + n_vs * c_vs \
+        + n_ei * c_ei + n_er * c_er + n_es * c_es
+    print('dis (cost computed by GED):', dis)
+    print('cost computed by # of operations and edit cost constants:', cost_computed)
+
+
+def test_ged_python_bash_cpp():
+    """Test ged computation with python invoking the c++ code by bash command (with updated library).
+    """
+    from gklearn.utils.graphfiles import loadDataset
+    from gklearn.preimage.ged import GED
+
+    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
+#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
+    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
+    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
+
+    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
+
+    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+
+    for repeat in range(0, 3):
+        # Generate the result file.
+        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_bash_' + str(repeat) + '_init40.3_20.txt'
+#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
+
+        ged_file = open(ged_filename, 'a')
+#        runtime_file = open(runtime_filename, 'a')
+
+        ged_mat = np.empty((len(Gn), len(Gn)))
+#        runtime_mat = np.empty((len(Gn), len(Gn)))
+
+        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
+            for j in range(len(Gn)):
+                print(i, j)
+                g1 = Gn[i]
+                g2 = Gn[j]
+                upper_bound, _, _ = GED(g1, g2, lib='gedlib-bash', cost='CONSTANT',
+                                method='IPFP',
+                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
+                                algo_options=algo_options)
+#                runtime = gedlibpy.get_runtime(g1, g2)
+                ged_mat[i][j] = upper_bound
+#                runtime_mat[i][j] = runtime
+
+                # Write to files.
+                ged_file.write(str(int(upper_bound)) + ' ')
+#                runtime_file.write(str(runtime) + ' ')
+
+            ged_file.write('\n')
+#            runtime_file.write('\n')
+
+        ged_file.close()
+#        runtime_file.close()
+
+    print('ged_mat')
+    print(ged_mat)
+#    print('runtime_mat:')
+#    print(runtime_mat)
+
+    return
+
+
+
+def test_ged_best_settings_updated():
+    """Test ged computation with best settings the same as in the C++ code (with updated library).
+    """
+
+    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
+    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
+#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
+
+    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
+
+    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+
+    for repeat in range(0, 3):
+        # Generate the result file.
+        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_updated_' + str(repeat) + '_init40.txt'
+        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_updated_' + str(repeat) + '_init40.txt'
+
+        gedlibpy.restart_env()
+        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
+        listID = gedlibpy.get_all_graph_ids()
+        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
+        gedlibpy.init()
+        gedlibpy.set_method("IPFP", algo_options)
+        gedlibpy.init_method()
+
+        ged_mat = np.empty((len(listID), len(listID)))
+        runtime_mat = np.empty((len(listID), len(listID)))
+
+        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
+            ged_file = open(ged_filename, 'a')
+            runtime_file = open(runtime_filename, 'a')
+
+            for j in range(len(listID)):
+                g1 = listID[i]
+                g2 = listID[j]
+                gedlibpy.run_method(g1, g2)
+                upper_bound = gedlibpy.get_upper_bound(g1, g2)
+                runtime = gedlibpy.get_runtime(g1, g2)
+                ged_mat[i][j] = upper_bound
+                runtime_mat[i][j] = runtime
+
+                # Write to files.
+                ged_file.write(str(int(upper_bound)) + ' ')
+                runtime_file.write(str(runtime) + ' ')
+
+            ged_file.write('\n')
+            runtime_file.write('\n')
+
+            ged_file.close()
+            runtime_file.close()
+
+    print('ged_mat')
+    print(ged_mat)
+    print('runtime_mat:')
+    print(runtime_mat)
+
+    return
+
+
+def test_ged_best_settings():
+    """Test ged computation with best settings the same as in the C++ code.
+    """
+
+    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
+    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
+    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
+
+    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
+
+    for repeat in range(0, 3):
+        # Generate the result file.
+        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_best_settings_' + str(repeat) + '.txt'
+        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_best_settings_' + str(repeat) + '.txt'
+
+        ged_file = open(ged_filename, 'a')
+        runtime_file = open(runtime_filename, 'a')
+
+        gedlibpy.restart_env()
+        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
+        listID = gedlibpy.get_all_graph_ids()
+        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
+        gedlibpy.init()
+        gedlibpy.set_method("IPFP", algo_options)
+        gedlibpy.init_method()
+
+        ged_mat = np.empty((len(listID), len(listID)))
+        runtime_mat = np.empty((len(listID), len(listID)))
+
+        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
+            for j in range(len(listID)):
+                g1 = listID[i]
+                g2 = listID[j]
+                gedlibpy.run_method(g1, g2)
+                upper_bound = gedlibpy.get_upper_bound(g1, g2)
+                runtime = gedlibpy.get_runtime(g1, g2)
+                ged_mat[i][j] = upper_bound
+                runtime_mat[i][j] = runtime
+
+                # Write to files.
+                ged_file.write(str(int(upper_bound)) + ' ')
+                runtime_file.write(str(runtime) + ' ')
+
+            ged_file.write('\n')
+            runtime_file.write('\n')
+
+        ged_file.close()
+        runtime_file.close()
+
+    print('ged_mat')
+    print(ged_mat)
+    print('runtime_mat:')
+    print(runtime_mat)
+
+    return
+
+
+
+def test_ged_default():
+    """Test ged computation with default settings.
+    """
+
+    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
+    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
+    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
+
+    for repeat in range(3):
+        # Generate the result file.
+        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_default_' + str(repeat) + '.txt'
+        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_default_' + str(repeat) + '.txt'
+
+        ged_file = open(ged_filename, 'a')
+        runtime_file = open(runtime_filename, 'a')
+
+        gedlibpy.restart_env()
+        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
+        listID = gedlibpy.get_all_graph_ids()
+        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
+        gedlibpy.init()
+        gedlibpy.set_method("IPFP", "")
+        gedlibpy.init_method()
+
+        ged_mat = np.empty((len(listID), len(listID)))
+        runtime_mat = np.empty((len(listID), len(listID)))
+
+        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
+            for j in range(len(listID)):
+                g1 = listID[i]
+                g2 = listID[j]
+                gedlibpy.run_method(g1, g2)
+                upper_bound = gedlibpy.get_upper_bound(g1, g2)
+                runtime = gedlibpy.get_runtime(g1, g2)
+                ged_mat[i][j] = upper_bound
+                runtime_mat[i][j] = runtime
+
+                # Write to files.
+                ged_file.write(str(int(upper_bound)) + ' ')
+                runtime_file.write(str(runtime) + ' ')
+
+            ged_file.write('\n')
+            runtime_file.write('\n')
+
+        ged_file.close()
+        runtime_file.close()
+
+    print('ged_mat')
+    print(ged_mat)
+    print('runtime_mat:')
+    print(runtime_mat)
+
+    return
+
+
+def test_ged_min():
+    """Test ged computation with the "min" stabilizer.
+    """
+    from gklearn.utils.graphfiles import loadDataset
+    from gklearn.preimage.ged import GED
+
+    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
+    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
+    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
+
+    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
+
+#    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
+
+    for repeat in range(0, 3):
+        # Generate the result file.
+        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_min_' + str(repeat) + '.txt'
+#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
+
+        ged_file = open(ged_filename, 'a')
+#        runtime_file = open(runtime_filename, 'a')
+
+        ged_mat = np.empty((len(Gn), len(Gn)))
+#        runtime_mat = np.empty((len(Gn), len(Gn)))
+
+        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
+            for j in range(len(Gn)):
+                g1 = Gn[i]
+                g2 = Gn[j]
+                upper_bound, _, _ = GED(g1, g2, lib='gedlibpy', cost='CONSTANT',
+                                method='IPFP',
+                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
+                                stabilizer='min', repeat=10)
+#                runtime = gedlibpy.get_runtime(g1, g2)
+                ged_mat[i][j] = upper_bound
+#                runtime_mat[i][j] = runtime
+
+                # Write to files.
+                ged_file.write(str(int(upper_bound)) + ' ')
+#                runtime_file.write(str(runtime) + ' ')
+
+            ged_file.write('\n')
+#            runtime_file.write('\n')
+
+        ged_file.close()
+#        runtime_file.close()
+
+    print('ged_mat')
+    print(ged_mat)
+#    print('runtime_mat:')
+#    print(runtime_mat)
+
+    return
+
+
+def init() :
+    print("List of Edit Cost Options : ")
+    for i in gedlibpy.list_of_edit_cost_options :
+        print (i)
+    print("")
+
+    print("List of Method Options : ")
+    for j in gedlibpy.list_of_method_options :
+        print (j)
+    print("")
+
+    print("List of Init Options : ")
+    for k in gedlibpy.list_of_init_options :
+        print (k)
+    print("")
+
+
+
+
+def convertGraph(G):
+    G_new = nx.Graph()
+    for nd, attrs in G.nodes(data=True):
+        G_new.add_node(str(nd), chem=attrs['atom'])
+    for nd1, nd2, attrs in G.edges(data=True):
+        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
+
+    return G_new
+
+
+def testNxGrapĥ():
+    from gklearn.utils.graphfiles import loadDataset
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+
+    gedlibpy.restart_env()
+    for graph in Gn:
+        g_new = convertGraph(graph)
+        gedlibpy.add_nx_graph(g_new, "")
+
+    listID = gedlibpy.get_all_graph_ids()
+    gedlibpy.set_edit_cost("CHEM_1")
+    gedlibpy.init()
+    gedlibpy.set_method("IPFP", "")
+    gedlibpy.init_method()
+
+    print(listID)
+    g = listID[0]
+    h = listID[1]
+
+    gedlibpy.run_method(g, h)
+
+    print("Node Map : ", gedlibpy.get_node_map(g, h))
+    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
+    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
+
+if __name__ == '__main__':
+#    test_ged_default()
+#    test_ged_min()
+#    test_ged_best_settings()
+#    test_ged_best_settings_updated()
+#    test_ged_python_bash_cpp()
+#    test_get_nb_edit_operations()
+#    test_get_nb_edit_operations_letter()
+#    test_LETTER2_cost()
+    test_NON_SYMBOLIC_cost()
+
+
+    #init()
+    #testNxGrapĥ()
diff --git a/gklearn/preimage/test_iam.py b/gklearn/preimage/test_iam.py
new file mode 100644
index 0000000..5897f50
--- /dev/null
+++ b/gklearn/preimage/test_iam.py
@@ -0,0 +1,964 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Sep  5 15:59:00 2019
+
+@author: ljia
+"""
+
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+import random
+#from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+#from gklearn.utils.logger2file import *
+from gklearn.preimage.iam import iam_upgraded
+from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
+#from gklearn.preimage.ged import ged_median
+
+
+def test_iam_monoterpenoides_with_init40():
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    # unfitted edit costs.
+    c_vi = 3
+    c_vr = 3
+    c_vs = 1
+    c_ei = 3
+    c_er = 3
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.0001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    ged_stabilizer = None
+#    ged_repeat = 50
+    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'algo_options': algo_options,
+                  'stabilizer': ged_stabilizer}
+
+    
+    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
+    graph_dir = collection_path + 'gxl/'
+    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
+    repeats = 50
+    
+    # classify graphs according to classes.
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_set_median_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    sod_list_list = []
+    for y in y_all:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y)
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        dis_ks_set_median_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(repeats):
+            # load median set.
+            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
+            Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
+            Gn_candidate = [g.copy() for g in Gn_median]
+            
+            time0 = time.time()
+            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, 
+                connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(G_gen_median_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_gen_median)
+            print('\nsmallest sod in graph space:', sod_gen_median)
+            sod_list_list.append(sod_list)
+            
+#            # show the best graph and save it to file.
+#            print('one of the possible corresponding pre-images is')
+#            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+#                    with_labels=True)
+##            plt.show()
+#    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+##            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
+##                        '_repeat' + str(repeat) + '_' + str(time.time()) +
+##                        '.png', format="PNG")
+#            plt.clf()
+#    #        print(G_gen_median_list[0].nodes(data=True))
+#    #        print(G_gen_median_list[0].edges(data=True))
+            
+        print('\nsods of the set median for this class:', sod_set_median_list[-1])
+        print('\nsods in graph space for this class:', sod_gs_list[-1])
+#        print('\ndistance in kernel space of set median for this class:', 
+#              dis_ks_set_median_list[-1])
+#        print('\nsmallest distances in kernel space for this class:', 
+#              dis_ks_min_list[-1])   
+        print('\ntimes for this class:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+#        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
+#        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_set_median_list)
+    print('\nmean sods in graph space for each class:', sod_gs_list)
+#    print('\ndistances in kernel space of set median for each class:', 
+#            dis_ks_set_median_list)
+#    print('\nmean smallest distances in kernel space for each class:', 
+#            dis_ks_min_list)
+    print('\nmean times for each class:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+#    print('\nmean distances in kernel space of set median of all:', 
+#            np.mean(dis_ks_set_median_list))
+#    print('\nmean smallest distances in kernel space of all:', 
+#            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+
+
+
+
+def test_iam_monoterpenoides():
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    # parameters for GED function from the IAM paper.
+    # fitted edit costs (Gaussian).
+    c_vi = 0.03620133402089074
+    c_vr = 0.0417574590207099
+    c_vs = 0.009992282328587499
+    c_ei = 0.08293120042342755
+    c_er = 0.09512220476358019
+    c_es = 0.09222529696841467
+#    # fitted edit costs (linear combinations).
+#    c_vi = 0.1749684054238749
+#    c_vr = 0.0734054228711457
+#    c_vs = 0.05017781726016715
+#    c_ei = 0.1869431164806936
+#    c_er = 0.32055856948274
+#    c_es = 0.2569469379247611
+#    # unfitted edit costs.
+#    c_vi = 3
+#    c_vr = 3
+#    c_vs = 1
+#    c_ei = 3
+#    c_er = 3
+#    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_set_median_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    sod_list_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for y_class in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y_class)
+        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        dis_ks_set_median_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_class)), 10)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(G_gen_median_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_gen_median)
+            print('\nsmallest sod in graph space:', sod_gen_median)
+            sod_list_list.append(sod_list)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+                    with_labels=True)
+#            plt.show()
+    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
+#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
+#                        '.png', format="PNG")
+            plt.clf()
+    #        print(G_gen_median_list[0].nodes(data=True))
+    #        print(G_gen_median_list[0].edges(data=True))
+            
+    
+            # compute distance between \psi and the set median graph.
+            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
+                gkernel, node_label, edge_label, False)
+            dhat_new_set_median_list = []
+            for idx, g_tmp in enumerate(G_set_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
+                    len(G_set_median_list) + len(Gn_median) + 1), 
+                    alpha_range, knew_set_median, withterm3=False))
+                
+            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
+            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
+            
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(G_gen_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                    len(G_gen_median_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])
+            
+
+        print('\nsods of the set median for this class:', sod_set_median_list[-1])
+        print('\nsods in graph space for this class:', sod_gs_list[-1])
+        print('\ndistance in kernel space of set median for this class:', 
+              dis_ks_set_median_list[-1])
+        print('\nsmallest distances in kernel space for this class:', 
+              dis_ks_min_list[-1])   
+        print('\ntimes for this class:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_set_median_list)
+    print('\nmean sods in graph space for each class:', sod_gs_list)
+    print('\ndistances in kernel space of set median for each class:', 
+            dis_ks_set_median_list)
+    print('\nmean smallest distances in kernel space for each class:', 
+            dis_ks_min_list)
+    print('\nmean times for each class:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean distances in kernel space of set median of all:', 
+            np.mean(dis_ks_set_median_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    nb_better_sods = 0
+    nb_worse_sods = 0
+    nb_same_sods = 0
+    for sods in sod_list_list:
+        if sods[0] > sods[-1]:
+            nb_better_sods += 1
+        elif sods[0] < sods[-1]:
+            nb_worse_sods += 1
+        else:
+            nb_same_sods += 1
+    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
+          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
+          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
+          'sods are improved.')
+    
+    
+def test_iam_mutag():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    # parameters for GED function from the IAM paper.
+    # fitted edit costs.
+    c_vi = 0.03523843108436513
+    c_vr = 0.03347339739350128
+    c_vs = 0.06871290673612238
+    c_ei = 0.08591999846720685
+    c_er = 0.07962086440894103
+    c_es = 0.08596855855478233
+    # unfitted edit costs.
+#    c_vi = 3
+#    c_vr = 3
+#    c_vs = 1
+#    c_ei = 3
+#    c_er = 3
+#    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_set_median_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    sod_list_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for y_class in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y_class)
+        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        dis_ks_set_median_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_class)), 10)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(G_gen_median_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_gen_median)
+            print('\nsmallest sod in graph space:', sod_gen_median)
+            sod_list_list.append(sod_list)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+                    with_labels=True)
+#            plt.show()
+    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + 
+#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
+#                        '.png', format="PNG")
+            plt.clf()
+    #        print(G_gen_median_list[0].nodes(data=True))
+    #        print(G_gen_median_list[0].edges(data=True))
+            
+    
+            # compute distance between \psi and the set median graph.
+            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
+                gkernel, node_label, edge_label, False)
+            dhat_new_set_median_list = []
+            for idx, g_tmp in enumerate(G_set_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
+                    len(G_set_median_list) + len(Gn_median) + 1), 
+                    alpha_range, knew_set_median, withterm3=False))
+                
+            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
+            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
+            
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(G_gen_median_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                    len(G_gen_median_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])
+            
+
+        print('\nsods of the set median for this class:', sod_set_median_list[-1])
+        print('\nsods in graph space for this class:', sod_gs_list[-1])
+        print('\ndistance in kernel space of set median for this class:', 
+              dis_ks_set_median_list[-1])
+        print('\nsmallest distances in kernel space for this class:', 
+              dis_ks_min_list[-1])   
+        print('\ntimes for this class:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_set_median_list)
+    print('\nmean sods in graph space for each class:', sod_gs_list)
+    print('\ndistances in kernel space of set median for each class:', 
+            dis_ks_set_median_list)
+    print('\nmean smallest distances in kernel space for each class:', 
+            dis_ks_min_list)
+    print('\nmean times for each class:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean distances in kernel space of set median of all:', 
+            np.mean(dis_ks_set_median_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    nb_better_sods = 0
+    nb_worse_sods = 0
+    nb_same_sods = 0
+    for sods in sod_list_list:
+        if sods[0] > sods[-1]:
+            nb_better_sods += 1
+        elif sods[0] < sods[-1]:
+            nb_worse_sods += 1
+        else:
+            nb_same_sods += 1
+    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
+          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
+          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
+          'sods are improved.')
+    
+
+###############################################################################
+# tests on different numbers of median-sets.
+
+def test_iam_median_nb():
+    
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+#    # parameters for GED function
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+#    ite_max_iam = 50
+#    epsilon_iam = 0.001
+#    removeNodes = False
+#    connected_iam = False
+#    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+#    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    ged_stabilizer = 'min'
+#    ged_repeat = 50
+#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+#                  'edit_cost_constant': edit_cost_constant, 
+#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # parameters for GED function
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+    ged_cost = 'CHEM_1'
+    ged_method = 'IPFP'
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+    # number of graphs; we what to compute the median of these graphs. 
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [len(Gn)]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+#    sod_gs_min_list = []
+#    nb_updated_list = []
+#    nb_updated_k_list = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        Gn_candidate = [g.copy() for g in Gn]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+#        km_tmp = gmfile['gm']
+#        time_km = gmfile['gmtime']
+#        # modify mixed gram matrix.
+#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+#        for i in range(len(Gn)):
+#            for j in range(i, len(Gn)):
+#                km[i, j] = km_tmp[i, j]
+#                km[j, i] = km[i, j]
+#        for i in range(len(Gn)):
+#            for j, idx in enumerate(idx_rdm):
+#                km[i, len(Gn) + j] = km[i, idx]
+#                km[len(Gn) + j, i] = km[i, idx]
+#        for i, idx1 in enumerate(idx_rdm):
+#            for j, idx2 in enumerate(idx_rdm):
+#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, 
+            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+            params_ged=params_ged)
+            
+        time_total = time.time() - time0
+        print('\ntime: ', time_total)
+        time_list.append(time_total)
+        
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+        dhat_new_list = []
+        for idx, g_tmp in enumerate(ghat_new_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                                len(ghat_new_list) + len(Gn_median) + 1), 
+                                alpha_range, knew, withterm3=False))
+            
+        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+        dis_ks_min_list.append(dhat_new_list[0])
+        g_best.append(ghat_new_list[0])
+        
+        # show the best graph and save it to file.
+#        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), 
+                with_labels=True)
+        plt.show()
+#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
+                    '.png', format="PNG")
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        sod_gs_list.append(sod_min)
+#        sod_gs_min_list.append(np.min(sod_min))
+        print('\nsmallest sod in graph space: ', sod_min)
+        
+    print('\nsods in graph space: ', sod_gs_list)
+#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+#          nb_updated_list)
+#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+#          nb_updated_k_list)
+    print('\ntimes:', time_list)
+    
+    
+def test_iam_letter_h():
+    from median import draw_Letter_graph
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+#    Gn = Gn[0:50]
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    gkernel = 'structuralspkernel'
+    
+    # parameters for GED function from the IAM paper.
+    c_vi = 3
+    c_vr = 3
+    c_vs = 1
+    c_ei = 3
+    c_er = 3
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+    ged_cost = 'LETTER'
+    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # classify graphs according to letters.
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    g_best = []
+    sod_set_median_list = []
+    idx_dict = get_same_item_indices(y_all)
+    for letter in idx_dict:
+        print('\n-------------------------------------------------------')
+        print('letter', letter)
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        sod_gs_list.append([])
+        g_best.append([])
+        sod_set_median_list.append([])
+        
+        for repeat in range(50):
+            idx_rdm = random.sample(range(len(Gn_let)), 50)
+            print('graphs chosen:', idx_rdm)
+            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
+            Gn_candidate = [g.copy() for g in Gn_median]
+        
+            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
+            time0 = time.time()
+            ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, 
+                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+                params_ged=params_ged)
+            time_total = time.time() - time0
+            print('\ntime: ', time_total)
+            time_list[-1].append(time_total)
+            g_best[-1].append(ghat_new_list[0])
+            sod_set_median_list[-1].append(sod_set_median)
+            print('\nsmallest sod of the set median:', sod_set_median)
+            sod_gs_list[-1].append(sod_min)
+            print('\nsmallest sod in graph space:', sod_min)
+            
+            # show the best graph and save it to file.
+            print('one of the possible corresponding pre-images is')
+            draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
+            
+            # compute distance between \psi and the new generated graphs.
+            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
+            dhat_new_list = []
+            for idx, g_tmp in enumerate(ghat_new_list):
+                # @todo: the term3 below could use the one at the beginning of the function.
+                dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
+                                    len(ghat_new_list) + len(Gn_median) + 1), 
+                                    alpha_range, knew, withterm3=False))
+                
+            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+            dis_ks_min_list[-1].append(dhat_new_list[0])            
+        
+        print('\nsods of the set median for this letter:', sod_set_median_list[-1])
+        print('\nsods in graph space for this letter:', sod_gs_list[-1])
+        print('\nsmallest distances in kernel space for this letter:', 
+              dis_ks_min_list[-1])
+        print('\ntimes for this letter:', time_list[-1])
+        
+        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
+        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
+        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
+        time_list[-1] = np.mean(time_list[-1])
+        
+    print('\nmean sods of the set median for each letter:', sod_set_median_list)
+    print('\nmean sods in graph space for each letter:', sod_gs_list)
+    print('\nmean smallest distances in kernel space for each letter:', 
+            dis_ks_min_list)
+    print('\nmean times for each letter:', time_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
+    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
+    print('\nmean smallest distances in kernel space of all:', 
+            np.mean(dis_ks_min_list))
+    print('\nmean times of all:', np.mean(time_list))
+    
+    
+
+    
+
+
+    
+    
+
+def test_iam_fitdistance():
+    
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+#    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+#    lmbda = 0.03 # termination probalility
+#    # parameters for GED function
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+#    ite_max_iam = 50
+#    epsilon_iam = 0.001
+#    removeNodes = False
+#    connected_iam = False
+#    # parameters for IAM function
+#    ged_cost = 'CONSTANT'
+#    ged_method = 'IPFP'
+#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+#    ged_stabilizer = 'min'
+#    ged_repeat = 50
+#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+#                  'edit_cost_constant': edit_cost_constant, 
+#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # parameters for GED function
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = False
+    connected_iam = False
+    # parameters for IAM function
+    ged_cost = 'CHEM_1'
+    ged_method = 'IPFP'
+    edit_cost_constant = []
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+    # number of graphs; we what to compute the median of these graphs. 
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [10]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+    
+    time_list = []
+    dis_ks_min_list = []
+    dis_ks_gen_median_list = []
+    sod_gs_list = []
+#    sod_gs_min_list = []
+#    nb_updated_list = []
+#    nb_updated_k_list = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        Gn_candidate = [g.copy() for g in Gn_median]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+#        km_tmp = gmfile['gm']
+#        time_km = gmfile['gmtime']
+#        # modify mixed gram matrix.
+#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+#        for i in range(len(Gn)):
+#            for j in range(i, len(Gn)):
+#                km[i, j] = km_tmp[i, j]
+#                km[j, i] = km[i, j]
+#        for i in range(len(Gn)):
+#            for j, idx in enumerate(idx_rdm):
+#                km[i, len(Gn) + j] = km[i, idx]
+#                km[len(Gn) + j, i] = km[i, idx]
+#        for i, idx1 in enumerate(idx_rdm):
+#            for j, idx2 in enumerate(idx_rdm):
+#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
+            = iam_upgraded(Gn_median, Gn_candidate, 
+            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
+            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
+            params_ged=params_ged)
+            
+        time_total = time.time() - time0
+        print('\ntime: ', time_total)
+        time_list.append(time_total)
+        
+        # compute distance between \psi and the new generated graphs.
+        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
+                              edge_label, False)
+        dhat_new_list = []
+        for idx, g_tmp in enumerate(G_gen_median_list):
+            # @todo: the term3 below could use the one at the beginning of the function.
+            dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
+                                len(G_gen_median_list) + len(Gn_median) + 1), 
+                                alpha_range, knew, withterm3=False))
+            
+        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
+        dis_ks_min_list.append(dhat_new_list[0])
+        g_best.append(G_gen_median_list[0])
+        
+        # show the best graph and save it to file.
+#        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
+                with_labels=True)
+        plt.show()
+#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
+#        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
+#                    '.png', format="PNG")
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        sod_gs_list.append(sod_gen_median)
+#        sod_gs_min_list.append(np.min(sod_gen_median))
+        print('\nsmallest sod in graph space: ', sod_gen_median)
+        print('\nsmallest sod of set median in graph space: ', sod_set_median)
+        
+    print('\nsods in graph space: ', sod_gs_list)
+#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+#          nb_updated_list)
+#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+#          nb_updated_k_list)
+    print('\ntimes:', time_list)
+        
+    
+            
+    
+    
+###############################################################################
+
+    
+if __name__ == '__main__':
+###############################################################################
+# tests on different numbers of median-sets.
+#    test_iam_median_nb()
+#    test_iam_letter_h()
+#    test_iam_monoterpenoides()
+#    test_iam_mutag()
+    
+#    test_iam_fitdistance()
+#    print("test log")
+    
+    test_iam_monoterpenoides_with_init40()
diff --git a/gklearn/preimage/test_k_closest_graphs.py b/gklearn/preimage/test_k_closest_graphs.py
new file mode 100644
index 0000000..152deab
--- /dev/null
+++ b/gklearn/preimage/test_k_closest_graphs.py
@@ -0,0 +1,462 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Dec 16 11:53:54 2019
+
+@author: ljia
+"""
+import numpy as np
+import math
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+import random
+from tqdm import tqdm
+from itertools import combinations, islice
+import multiprocessing
+from multiprocessing import Pool
+from functools import partial
+
+from gklearn.utils.graphfiles import loadDataset, loadGXL
+#from gklearn.utils.logger2file import *
+from gklearn.preimage.iam import iam_upgraded, iam_bash
+from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
+from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
+#from gklearn.preimage.ged import ged_median
+
+
+def fit_edit_cost_constants(fit_method, edit_cost_name, 
+                            edit_cost_constants=None, initial_solutions=1,
+                            Gn_median=None, node_label=None, edge_label=None,
+                            gkernel=None, dataset=None, init_ecc=None,
+                            Gn=None, Kmatrix_median=None):
+    """fit edit cost constants.    
+    """
+    if fit_method == 'random': # random
+        if edit_cost_name == 'LETTER':
+            edit_cost_constants = random.sample(range(1, 10), 3)
+            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
+        elif edit_cost_name == 'LETTER2':
+            random.seed(time.time())
+            edit_cost_constants = random.sample(range(1, 10), 5)
+#            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
+        elif edit_cost_name == 'NON_SYMBOLIC':
+            edit_cost_constants = random.sample(range(1, 10), 6)
+            if Gn_median[0].graph['node_attrs'] == []:
+                edit_cost_constants[2] = 0
+            if Gn_median[0].graph['edge_attrs'] == []:
+                edit_cost_constants[5] = 0
+        else:
+            edit_cost_constants = random.sample(range(1, 10), 6)
+        print('edit cost constants used:', edit_cost_constants)
+    elif fit_method == 'expert': # expert
+        if init_ecc is None:
+            if edit_cost_name == 'LETTER':
+                edit_cost_constants = [0.9, 1.7, 0.75] 
+            elif edit_cost_name == 'LETTER2':
+                edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
+            else:
+                edit_cost_constants = [3, 3, 1, 3, 3, 1] 
+        else:
+            edit_cost_constants = init_ecc
+    elif fit_method == 'k-graphs':
+        itr_max = 6
+        if init_ecc is None:
+            if edit_cost_name == 'LETTER':
+                init_costs = [0.9, 1.7, 0.75] 
+            elif edit_cost_name == 'LETTER2':
+                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
+            elif edit_cost_name == 'NON_SYMBOLIC':
+                init_costs = [0, 0, 1, 1, 1, 0]
+                if Gn_median[0].graph['node_attrs'] == []:
+                    init_costs[2] = 0
+                if Gn_median[0].graph['edge_attrs'] == []:
+                    init_costs[5] = 0
+            else:
+                init_costs = [3, 3, 1, 3, 3, 1] 
+        else:
+            init_costs = init_ecc
+        algo_options = '--threads 1 --initial-solutions ' \
+                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
+        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
+                      'algo_options': algo_options, 'stabilizer': None}
+        # fit on k-graph subset
+        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, 
+                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
+                init_costs=init_costs, dataset=dataset, Kmatrix=Kmatrix_median, 
+                parallel=True)
+    elif fit_method == 'whole-dataset':
+        itr_max = 6
+        if init_ecc is None:
+            if edit_cost_name == 'LETTER':
+                init_costs = [0.9, 1.7, 0.75] 
+            elif edit_cost_name == 'LETTER2':
+                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
+            else:
+                init_costs = [3, 3, 1, 3, 3, 1] 
+        else:
+            init_costs = init_ecc
+        algo_options = '--threads 1 --initial-solutions ' \
+                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
+        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
+                    'algo_options': algo_options, 'stabilizer': None}
+        # fit on all subset
+        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, 
+                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
+                init_costs=init_costs, dataset=dataset, parallel=True)
+    elif fit_method == 'precomputed':
+        pass
+    
+    return edit_cost_constants
+
+
+def compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
+                                     gkernel, edit_cost_name, 
+                                     Kmatrix_median=None):
+    # reform graphs.
+    set_median = loadGXL(fname_sm)
+    gen_median = loadGXL(fname_gm)
+#    print(gen_median.nodes(data=True))
+#    print(gen_median.edges(data=True))
+    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
+#        dataset == 'Fingerprint':
+#        for g in Gn_median:
+#            reform_attributes(g)
+        reform_attributes(set_median, Gn_median[0].graph['node_attrs'], 
+                          Gn_median[0].graph['edge_attrs'])
+        reform_attributes(gen_median, Gn_median[0].graph['node_attrs'], 
+                          Gn_median[0].graph['edge_attrs'])
+    
+    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
+        node_label = None
+        edge_label = None
+    else:
+        node_label = 'chem'
+        edge_label = 'valence'
+        
+    # compute Gram matrix for median set.
+    if Kmatrix_median is None:
+        Kmatrix_median = compute_kernel(Gn_median, gkernel, node_label, edge_label, False)
+        
+    # compute distance in kernel space for set median.
+    kernel_sm = []
+    for G_median in Gn_median:
+        km_tmp = compute_kernel([set_median, G_median], gkernel, node_label, edge_label, False)
+        kernel_sm.append(km_tmp[0, 1])
+    Kmatrix_sm = np.concatenate((np.array([kernel_sm]), np.copy(Kmatrix_median)), axis=0)
+    Kmatrix_sm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_sm]).T, Kmatrix_sm), axis=1)
+#    Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, 
+#                                node_label, edge_label, False)
+    dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), 
+                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
+#    print(gen_median.nodes(data=True))
+#    print(gen_median.edges(data=True))
+#    print(set_median.nodes(data=True))
+#    print(set_median.edges(data=True))
+    
+    # compute distance in kernel space for generalized median.
+    kernel_gm = []
+    for G_median in Gn_median:
+        km_tmp = compute_kernel([gen_median, G_median], gkernel, node_label, edge_label, False)
+        kernel_gm.append(km_tmp[0, 1])
+    Kmatrix_gm = np.concatenate((np.array([kernel_gm]), np.copy(Kmatrix_median)), axis=0)
+    Kmatrix_gm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_gm]).T, Kmatrix_gm), axis=1)
+#    Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, 
+#                                node_label, edge_label, False)
+    dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), 
+                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)
+    
+    # compute distance in kernel space for each graph in median set.
+    dis_k_gi = []
+    for idx in range(len(Gn_median)):
+        dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), 
+                             [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False))
+
+    print('dis_k_sm:', dis_k_sm)
+    print('dis_k_gm:', dis_k_gm)
+    print('dis_k_gi:', dis_k_gi)
+    idx_dis_k_gi_min = np.argmin(dis_k_gi)
+    dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min]
+    print('min dis_k_gi:', dis_k_gi_min)    
+    
+    return dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min
+
+
+def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
+                               graph_dir=None, initial_solutions=1,
+                               edit_cost_constants=None, group_min=None, 
+                               dataset=None, edit_cost_name=None, init_ecc=None,
+                               Kmatrix=None, parallel=True):
+#    dataset = dataset.lower()
+    
+#    # compute distances in kernel space.
+#    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+#                                              Kmatrix=None, gkernel=gkernel)
+#    # ged.
+#    gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz')
+#    ged_mat = gmfile['ged_mat']
+#    dis_mat = ged_mat[0:len(Gn), 0:len(Gn)]
+    
+#    # choose k closest graphs
+#    time0 = time.time()
+#    sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel)
+#    time_spent = time.time() - time0
+#    print('closest graphs:', sod_ks_min, group_min)
+#    print('time spent:', time_spent)
+#    group_min = (12, 13, 22, 29) # closest w.r.t path kernel
+#    group_min = (77, 85, 160, 171) # closest w.r.t ged
+#    group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel
+    Gn_median = [Gn[g].copy() for g in group_min]
+    if Kmatrix is not None:
+        Kmatrix_median = np.copy(Kmatrix[group_min,:])
+        Kmatrix_median = Kmatrix_median[:,group_min]
+    else:
+        Kmatrix_median = None
+        
+
+    # 1. fit edit cost constants. 
+    time0 = time.time()
+    edit_cost_constants = fit_edit_cost_constants(fit_method, edit_cost_name,
+        edit_cost_constants=edit_cost_constants, initial_solutions=initial_solutions,
+        Gn_median=Gn_median, node_label=node_label, edge_label=edge_label,
+        gkernel=gkernel, dataset=dataset, init_ecc=init_ecc,
+        Gn=Gn, Kmatrix_median=Kmatrix_median)
+    time_fitting = time.time() - time0
+    
+    
+    # 2. compute set median and gen median using IAM (C++ through bash).
+    print('\nstart computing set median and gen median using IAM (C++ through bash)...\n')
+    group_fnames = [Gn[g].graph['filename'] for g in group_min]
+    time0 = time.time()
+    sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constants,
+            cost=edit_cost_name, initial_solutions=initial_solutions,
+            graph_dir=graph_dir, dataset=dataset)
+    time_generating = time.time() - time0
+    print('\nmedians computed.\n')
+    
+    
+    # 3. compute distances to real median.
+    print('\nstart computing distances to true median....\n')
+    Gn_median = [Gn[g].copy() for g in group_min]
+    dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = \
+        compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
+                                         gkernel, edit_cost_name, 
+                                         Kmatrix_median=Kmatrix_median)
+    idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
+    print('index min dis_k_gi:', idx_dis_k_gi_min)
+    print('sod_sm:', sod_sm)
+    print('sod_gm:', sod_gm)
+    
+    # collect return values.
+    return (sod_sm, sod_gm), \
+           (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
+           (time_fitting, time_generating)
+
+
+def reform_attributes(G, na_names=[], ea_names=[]):
+    if not na_names == []: 
+        for node in G.nodes:
+            G.nodes[node]['attributes'] = [G.node[node][a_name] for a_name in na_names]
+    if not ea_names == []:
+        for edge in G.edges:
+            G.edges[edge]['attributes'] = [G.edge[edge][a_name] for a_name in ea_names]
+
+
+def get_closest_k_graphs(dis_mat, k, parallel):
+    k_graph_groups = combinations(range(0, len(dis_mat)), k)
+    sod_ks_min = np.inf
+    if parallel:
+        len_combination = get_combination_length(len(dis_mat), k)
+        len_itr_max = int(len_combination if len_combination < 1e7 else 1e7)
+#        pos_cur = 0
+        graph_groups_slices = split_iterable(k_graph_groups, len_itr_max, len_combination)
+        for graph_groups_cur in graph_groups_slices:
+#        while True:
+#            graph_groups_cur = islice(k_graph_groups, pos_cur, pos_cur + len_itr_max)
+            graph_groups_cur_list = list(graph_groups_cur) 
+            print('current position:', graph_groups_cur_list[0])
+            len_itr_cur = len(graph_groups_cur_list)
+#            if len_itr_cur < len_itr_max:
+#                break
+
+            itr = zip(graph_groups_cur_list, range(0, len_itr_cur))
+            sod_k_list = np.empty(len_itr_cur)
+            graphs_list = [None] * len_itr_cur
+            n_jobs = multiprocessing.cpu_count()
+            chunksize = int(len_itr_max / n_jobs + 1)
+            n_jobs = multiprocessing.cpu_count()
+            def init_worker(dis_mat_toshare):
+                global G_dis_mat
+                G_dis_mat = dis_mat_toshare
+            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(dis_mat,))
+#            iterator = tqdm(pool.imap_unordered(_get_closest_k_graphs_parallel, 
+#                                                itr, chunksize),
+#                            desc='Choosing k closest graphs', file=sys.stdout)
+            iterator = pool.imap_unordered(_get_closest_k_graphs_parallel, itr, chunksize)
+            for graphs, i, sod_ks in iterator:
+                sod_k_list[i] = sod_ks
+                graphs_list[i] = graphs
+            pool.close()
+            pool.join()
+            
+            arg_min = np.argmin(sod_k_list)
+            sod_ks_cur = sod_k_list[arg_min]
+            group_cur = graphs_list[arg_min]
+            if sod_ks_cur < sod_ks_min:
+                sod_ks_min = sod_ks_cur
+                group_min = group_cur
+                print('get closer graphs:', sod_ks_min, group_min)
+    else:        
+        for items in tqdm(k_graph_groups, desc='Choosing k closest graphs', file=sys.stdout):
+    #        if items[0] != itmp:
+    #            itmp = items[0]
+    #            print(items)
+            k_graph_pairs = combinations(items, 2)
+            sod_ks = 0
+            for i1, i2 in k_graph_pairs:
+                sod_ks += dis_mat[i1, i2]
+            if sod_ks < sod_ks_min:
+                sod_ks_min = sod_ks
+                group_min = items
+                print('get closer graphs:', sod_ks_min, group_min)
+                
+    return sod_ks_min, group_min
+
+
+def _get_closest_k_graphs_parallel(itr):
+    k_graph_pairs = combinations(itr[0], 2)
+    sod_ks = 0
+    for i1, i2 in k_graph_pairs:
+        sod_ks += G_dis_mat[i1, i2]
+
+    return itr[0], itr[1], sod_ks
+    
+
+def split_iterable(iterable, n, len_iter):
+    it = iter(iterable)
+    for i in range(0, len_iter, n):
+        piece = islice(it, n)
+        yield piece
+
+
+def get_combination_length(n, k):
+    len_combination = 1
+    for i in range(n, n - k, -1):
+        len_combination *= i
+    return int(len_combination / math.factorial(k))
+
+
+###############################################################################
+
+def test_k_closest_graphs():
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+#    gkernel = 'untilhpathkernel'
+#    gkernel = 'weisfeilerlehmankernel'
+    gkernel = 'treeletkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    k = 5
+    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
+    
+#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
+#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
+#                                     'precomputed', edit_costs=edit_costs, 
+##                                     'k-graphs',
+#                                     parallel=False)
+#        
+#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
+#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
+#                                     'expert', parallel=False)
+        
+    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
+        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
+                                     'expert', parallel=False)
+    return
+
+
+def test_k_closest_graphs_with_cv():
+    gkernel = 'untilhpathkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    
+    k = 4
+    
+    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
+    repeats = 50
+    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
+    graph_dir = collection_path + 'gxl/'
+    
+    sod_sm_list = []
+    sod_gm_list = []
+    dis_k_sm_list = []
+    dis_k_gm_list = []
+    dis_k_gi_min_list = []
+    for y in y_all:
+        print('\n-------------------------------------------------------')
+        print('class of y:', y)
+        
+        sod_sm_list.append([])
+        sod_gm_list.append([])
+        dis_k_sm_list.append([])
+        dis_k_gm_list.append([])
+        dis_k_gi_min_list.append([])
+    
+        for repeat in range(repeats):
+            print('\nrepeat ', repeat)
+            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
+            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
+            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
+                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, 
+                                             k, 'whole-dataset', graph_dir=graph_dir,
+                                             parallel=False)
+            
+            sod_sm_list[-1].append(sod_sm)
+            sod_gm_list[-1].append(sod_gm)
+            dis_k_sm_list[-1].append(dis_k_sm)
+            dis_k_gm_list[-1].append(dis_k_gm)
+            dis_k_gi_min_list[-1].append(dis_k_gi_min)
+            
+        print('\nsods of the set median for this class:', sod_sm_list[-1])
+        print('\nsods of the gen median for this class:', sod_gm_list[-1])
+        print('\ndistances in kernel space of set median for this class:', 
+              dis_k_sm_list[-1])
+        print('\ndistances in kernel space of gen median for this class:', 
+              dis_k_gm_list[-1])
+        print('\ndistances in kernel space of min graph for this class:', 
+              dis_k_gi_min_list[-1])
+        
+        sod_sm_list[-1] = np.mean(sod_sm_list[-1])
+        sod_gm_list[-1] = np.mean(sod_gm_list[-1])
+        dis_k_sm_list[-1] = np.mean(dis_k_sm_list[-1])
+        dis_k_gm_list[-1] = np.mean(dis_k_gm_list[-1])
+        dis_k_gi_min_list[-1] = np.mean(dis_k_gi_min_list[-1])
+        
+    print()
+    print('\nmean sods of the set median for each class:', sod_sm_list)
+    print('\nmean sods of the gen median for each class:', sod_gm_list)
+    print('\nmean distance in kernel space of set median for each class:', 
+          dis_k_sm_list)
+    print('\nmean distances in kernel space of gen median for each class:', 
+          dis_k_gm_list)
+    print('\nmean distances in kernel space of min graph for each class:', 
+          dis_k_gi_min_list)
+    
+    print('\nmean sods of the set median of all:', np.mean(sod_sm_list))
+    print('\nmean sods of the gen median of all:', np.mean(sod_gm_list))
+    print('\nmean distances in kernel space of set median of all:', 
+            np.mean(dis_k_sm_list))
+    print('\nmean distances in kernel space of gen median of all:', 
+            np.mean(dis_k_gm_list))
+    print('\nmean distances in kernel space of min graph of all:', 
+            np.mean(dis_k_gi_min_list))
+    
+    return
+    
+
+if __name__ == '__main__':
+    test_k_closest_graphs()
+#    test_k_closest_graphs_with_cv()
\ No newline at end of file
diff --git a/gklearn/preimage/test_median_graph_estimator.py b/gklearn/preimage/test_median_graph_estimator.py
new file mode 100644
index 0000000..2a930df
--- /dev/null
+++ b/gklearn/preimage/test_median_graph_estimator.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 16 17:26:40 2020
+
+@author: ljia
+"""
+	
+def test_median_graph_estimator():
+	from gklearn.utils.graphfiles import loadDataset
+	from gklearn.preimage.median_graph_estimator import MedianGraphEstimator
+	from gklearn.gedlib import librariesImport, gedlibpy
+	from gklearn.preimage.utils import get_same_item_indices
+	from gklearn.preimage.ged import convertGraph
+	import multiprocessing
+
+	# estimator parameters.
+	init_type = 'MEDOID'
+	num_inits = 1
+	threads = multiprocessing.cpu_count()
+	time_limit = 60000
+	
+	# algorithm parameters.
+	algo = 'IPFP'
+	initial_solutions = 40
+	algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
+
+	edit_cost_name = 'LETTER2'
+	edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
+	ds_name = 'COIL-DEL'
+	
+	# Load dataset.
+	# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
+	dataset = '../../datasets/Letter-high/Letter-high_A.txt'
+	Gn, y_all = loadDataset(dataset)
+	y_idx = get_same_item_indices(y_all)
+	for i, (y, values) in enumerate(y_idx.items()):
+		Gn_i = [Gn[val] for val in values]
+		break
+	
+	# Set up the environment.
+	ged_env = gedlibpy.GEDEnv()
+	# gedlibpy.restart_env()
+	ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
+	for G in Gn_i:
+		ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '')
+	graph_ids = ged_env.get_all_graph_ids()
+	set_median_id = ged_env.add_graph('set_median')
+	gen_median_id = ged_env.add_graph('gen_median')
+	ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
+	
+	# Set up the estimator.
+	mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
+	mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
+	
+	mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
+	mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1'  + ' --refine FALSE'# @todo: std::to_string(rng())
+	
+	# Select the GED algorithm.
+	algo_options = '--threads ' + str(threads) + algo_options_suffix
+	mge.set_options(mge_options)
+	mge.set_init_method(algo, algo_options)
+	mge.set_descent_method(algo, algo_options)
+	
+	# Run the estimator.
+	mge.run(graph_ids, set_median_id, gen_median_id)
+	
+	# Get SODs.
+	sod_sm = mge.get_sum_of_distances('initialized')
+	sod_gm = mge.get_sum_of_distances('converged')
+	print('sod_sm, sod_gm: ', sod_sm, sod_gm)
+	
+	# Get median graphs.
+	set_median = ged_env.get_nx_graph(set_median_id)
+	gen_median = ged_env.get_nx_graph(gen_median_id)
+	
+	return set_median, gen_median
+	
+
+
+def constant_node_costs(edit_cost_name):
+	if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER':
+		return False
+#	 elif edit_cost_name != '':
+# # 		throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>");
+#		 return False
+	# return True
+
+
+if __name__ == '__main__':
+	set_median, gen_median = test_median_graph_estimator()
\ No newline at end of file
diff --git a/gklearn/preimage/test_others.py b/gklearn/preimage/test_others.py
new file mode 100644
index 0000000..a277a17
--- /dev/null
+++ b/gklearn/preimage/test_others.py
@@ -0,0 +1,686 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jul  4 12:20:16 2019
+
+@author: ljia
+"""
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.median import draw_Letter_graph
+from gklearn.preimage.ged import GED, ged_median
+from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
+    dis_gstar, remove_edges
+
+
+# --------------------------- These are tests --------------------------------#
+    
+def test_who_is_the_closest_in_kernel_space(Gn):
+    idx_gi = [0, 6]
+    g1 = Gn[idx_gi[0]]
+    g2 = Gn[idx_gi[1]]
+    # create the "median" graph.
+    gnew = g2.copy()
+    gnew.remove_node(0)
+    nx.draw_networkx(gnew)
+    plt.show()
+    print(gnew.nodes(data=True))
+    Gn = [gnew] + Gn
+    
+    # compute gram matrix
+    Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
+    # the distance matrix
+    dmatrix = gram2distances(Kmatrix)
+    print(np.sort(dmatrix[idx_gi[0] + 1]))
+    print(np.argsort(dmatrix[idx_gi[0] + 1]))
+    print(np.sort(dmatrix[idx_gi[1] + 1]))
+    print(np.argsort(dmatrix[idx_gi[1] + 1]))
+    # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
+    dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
+    print(np.sort(dis_median))
+    print(np.argsort(dis_median))
+    return
+
+
+def test_who_is_the_closest_in_GED_space(Gn):
+    idx_gi = [0, 6]
+    g1 = Gn[idx_gi[0]]
+    g2 = Gn[idx_gi[1]]
+    # create the "median" graph.
+    gnew = g2.copy()
+    gnew.remove_node(0)
+    nx.draw_networkx(gnew)
+    plt.show()
+    print(gnew.nodes(data=True))
+    Gn = [gnew] + Gn
+    
+    # compute GEDs
+    ged_matrix = np.zeros((len(Gn), len(Gn)))
+    for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
+        for i2 in range(len(Gn)):
+            dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
+            ged_matrix[i1, i2] = dis
+    print(np.sort(ged_matrix[idx_gi[0] + 1]))
+    print(np.argsort(ged_matrix[idx_gi[0] + 1]))
+    print(np.sort(ged_matrix[idx_gi[1] + 1]))
+    print(np.argsort(ged_matrix[idx_gi[1] + 1]))
+    # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
+    dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
+    print(np.sort(dis_median))
+    print(np.argsort(dis_median))
+    return
+
+
+def test_will_IAM_give_the_median_graph_we_wanted(Gn):
+    idx_gi = [0, 6]
+    g1 = Gn[idx_gi[0]].copy()
+    g2 = Gn[idx_gi[1]].copy()
+#    del Gn[idx_gi[0]]
+#    del Gn[idx_gi[1] - 1]
+    g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
+#    g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
+    nx.draw_networkx(g_median)
+    plt.show()
+    print(g_median.nodes(data=True))
+    print(g_median.edges(data=True))
+    
+    
+def test_new_IAM_allGraph_deleteNodes(Gn):
+    idx_gi = [0, 6]
+#    g1 = Gn[idx_gi[0]].copy()
+#    g2 = Gn[idx_gi[1]].copy()
+
+#    g1 = nx.Graph(name='haha')
+#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
+#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
+#    g2 = nx.Graph(name='hahaha')
+#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
+#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
+#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
+    
+    g1 = nx.Graph(name='haha')
+    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
+                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
+    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
+    g2 = nx.Graph(name='hahaha')
+    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
+                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
+    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
+
+#    g2 = g1.copy()
+#    g2.add_nodes_from([(3, {'atom': 'O'})])
+#    g2.add_nodes_from([(4, {'atom': 'C'})])
+#    g2.add_edges_from([(1, 3, {'bond_type': '1'})])
+#    g2.add_edges_from([(3, 4, {'bond_type': '1'})])
+
+#    del Gn[idx_gi[0]]
+#    del Gn[idx_gi[1] - 1]
+    
+    nx.draw_networkx(g1)
+    plt.show()
+    print(g1.nodes(data=True))
+    print(g1.edges(data=True))
+    nx.draw_networkx(g2)
+    plt.show()
+    print(g2.nodes(data=True))
+    print(g2.edges(data=True))
+    
+    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
+#    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
+    nx.draw_networkx(g_median)
+    plt.show()
+    print(g_median.nodes(data=True))
+    print(g_median.edges(data=True))
+    
+    
+def test_the_simple_two(Gn, gkernel):
+    from gk_iam import gk_iam_nearest_multi
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # recursions
+    l = 500
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 2 # k nearest neighbors
+    
+    # randomly select two molecules
+    np.random.seed(1)
+    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
+    g1 = Gn[idx_gi[0]]
+    g2 = Gn[idx_gi[1]]
+    Gn_mix = [g.copy() for g in Gn]
+    Gn_mix.append(g1.copy())
+    Gn_mix.append(g2.copy())
+    
+#    g_tmp = iam([g1, g2])
+#    nx.draw_networkx(g_tmp)
+#    plt.show()
+    
+    # compute 
+#    k_list = [] # kernel between each graph and itself.
+#    k_g1_list = [] # kernel between each graph and g1
+#    k_g2_list = [] # kernel between each graph and g2
+#    for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): 
+#        ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
+#        k_list.append(ktemp[0][0, 0])
+#        k_g1_list.append(ktemp[0][0, 1])
+#        k_g2_list.append(ktemp[0][0, 2])
+        
+    km = compute_kernel(Gn_mix, gkernel, True)
+#    k_list = np.diag(km) # kernel between each graph and itself.
+#    k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
+#    k_g2_list = km[idx_gi[1]] # kernel between each graph and g2    
+
+    g_best = []
+    dis_best = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('alpha =', alpha)
+        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
+                                               range(len(Gn), len(Gn) + 2), km,
+                                               k, r_max,gkernel)
+        dis_best.append(dhat)
+        g_best.append(ghat_list)
+        
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+        print('the corresponding pre-images are')
+        for g in g_best[idx]:
+            nx.draw_networkx(g)
+            plt.show()
+            print(g.nodes(data=True))
+            print(g.edges(data=True))
+            
+    
+def test_remove_bests(Gn, gkernel):
+    from gk_iam import gk_iam_nearest_multi
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # recursions
+    l = 500
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 20 # k nearest neighbors
+    
+    # randomly select two molecules
+    np.random.seed(1)
+    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
+    g1 = Gn[idx_gi[0]]
+    g2 = Gn[idx_gi[1]]
+    # remove the best 2 graphs.
+    del Gn[idx_gi[0]]
+    del Gn[idx_gi[1] - 1]
+#    del Gn[8]
+    
+    Gn_mix = [g.copy() for g in Gn]
+    Gn_mix.append(g1.copy())
+    Gn_mix.append(g2.copy())
+
+    
+    # compute
+    km = compute_kernel(Gn_mix, gkernel, True)
+    g_best = []
+    dis_best = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('alpha =', alpha)
+        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
+                                               range(len(Gn), len(Gn) + 2), km, 
+                                               k, r_max, gkernel)
+        dis_best.append(dhat)
+        g_best.append(ghat_list)
+        
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+        print('the corresponding pre-images are')
+        for g in g_best[idx]:
+            draw_Letter_graph(g)
+#            nx.draw_networkx(g)
+#            plt.show()
+            print(g.nodes(data=True))
+            print(g.edges(data=True))
+            
+            
+###############################################################################
+# Tests on dataset Letter-H.
+            
+def test_gkiam_letter_h():
+    from gk_iam import gk_iam_nearest_multi
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    gkernel = 'structuralspkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 3 # recursions
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 10 # k nearest neighbors
+    
+    # classify graphs according to letters.
+    idx_dict = get_same_item_indices(y_all)
+    time_list = []
+    sod_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    for letter in idx_dict:
+        print('\n-------------------------------------------------------\n')
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
+        
+        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
+        
+        # compute
+        time0 = time.time()
+        km = compute_kernel(Gn_mix, gkernel, True)
+        g_best = []
+        dis_best = []
+        # for each alpha
+        for alpha in alpha_range:
+            print('alpha =', alpha)
+            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, 
+                Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), 
+                km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
+                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
+            dis_best.append(dhat)
+            g_best.append(ghat_list)
+        time_list.append(time.time() - time0)
+            
+        # show best graphs and save them to file.
+        for idx, item in enumerate(alpha_range):
+            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+            print('the corresponding pre-images are')
+            for g in g_best[idx]:
+                draw_Letter_graph(g, savepath='results/gk_iam/')
+#            nx.draw_networkx(g)
+#            plt.show()
+                print(g.nodes(data=True))
+                print(g.edges(data=True))
+                
+        # compute the corresponding sod in graph space. (alpha range not considered.)
+        sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', 
+                                     ged_method='IPFP', saveGXL='gedlib-letter')
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        sod_ks_min_list.append(sod_ks)
+        nb_updated_list.append(nb_updated)
+        
+                
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
+    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
+    print('\nnumber of updates for each letter: ', nb_updated_list)             
+    print('\ntimes:', time_list)
+
+#def compute_letter_median_by_average(Gn):
+#    return g_median
+    
+
+def test_iam_letter_h():
+    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    
+    lmbda = 0.03 # termination probalility
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    
+    # classify graphs according to letters.
+    idx_dict = get_same_item_indices(y_all)
+    time_list = []
+    sod_list = []
+    sod_min_list = []
+    for letter in idx_dict:        
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        
+        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
+        
+        # compute
+        g_best = []
+        dis_best = []
+        time0 = time.time()
+        # for each alpha
+        for alpha in alpha_range:
+            print('alpha =', alpha)
+            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
+                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
+                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
+            dis_best.append(dhat)
+            g_best.append(ghat_list)
+        time_list.append(time.time() - time0)
+            
+        # show best graphs and save them to file.
+        for idx, item in enumerate(alpha_range):
+            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+            print('the corresponding pre-images are')
+            for g in g_best[idx]:
+                draw_Letter_graph(g, savepath='results/iam/')
+#            nx.draw_networkx(g)
+#            plt.show()
+                print(g.nodes(data=True))
+                print(g.edges(data=True))
+                
+        # compute the corresponding sod in kernel space. (alpha range not considered.)
+        gkernel = 'structuralspkernel'        
+        sod_tmp = []
+        Gn_mix = g_best[0] + Gn_let
+        km = compute_kernel(Gn_mix, gkernel, True)
+        for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
+            dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)), 
+                              [alpha_range[0]] * len(Gn_let), km, withterm3=False)
+            sod_tmp.append(dtemp)
+        sod_list.append(sod_tmp)
+        sod_min_list.append(np.min(sod_tmp))
+        
+                
+    print('\nsods in kernel space: ', sod_list)
+    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
+    print('\ntimes:', time_list)
+    
+    
+def test_random_preimage_letter_h():
+    from preimage_random import preimage_random
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
+#          'extra_params': {}}
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
+#            'extra_params': {}} # node symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    gkernel = 'structuralspkernel'
+    
+#    lmbda = 0.03 # termination probalility
+    r_max = 3 # 10 # recursions
+    l = 500
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    #alpha_range = np.linspace(0.1, 0.9, 9)
+    k = 10 # 5 # k nearest neighbors
+    
+    # classify graphs according to letters.
+    idx_dict = get_same_item_indices(y_all)
+    time_list = []
+    sod_list = []
+    sod_min_list = []
+    for letter in idx_dict:
+        print('\n-------------------------------------------------------\n')
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
+        
+        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
+        
+        # compute
+        time0 = time.time()
+        km = compute_kernel(Gn_mix, gkernel, True)
+        g_best = []
+        dis_best = []
+        # for each alpha
+        for alpha in alpha_range:
+            print('alpha =', alpha)
+            dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let), 
+                                                   range(len(Gn_let), len(Gn_mix)), km, 
+                                                   k, r_max, gkernel, c_ei=1.7, 
+                                                   c_er=1.7, c_es=1.7)
+            dis_best.append(dhat)
+            g_best.append(ghat_list)
+        time_list.append(time.time() - time0)
+            
+        # show best graphs and save them to file.
+        for idx, item in enumerate(alpha_range):
+            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+            print('the corresponding pre-images are')
+            for g in g_best[idx]:
+                draw_Letter_graph(g, savepath='results/gk_iam/')
+#            nx.draw_networkx(g)
+#            plt.show()
+                print(g.nodes(data=True))
+                print(g.edges(data=True))
+                
+        # compute the corresponding sod in graph space. (alpha range not considered.)
+        sod_tmp, _ = ged_median(g_best[0], Gn_let)
+        sod_list.append(sod_tmp)
+        sod_min_list.append(np.min(sod_tmp))
+        
+                
+    print('\nsods in graph space: ', sod_list)
+    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
+    print('\ntimes:', time_list)
+    
+    
+
+    
+    
+    
+    
+def test_gkiam_mutag():
+    from gk_iam import gk_iam_nearest_multi
+    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
+#          'extra_params': {}} # node nsymb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+    gkernel = 'structuralspkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 3 # recursions
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 20 # k nearest neighbors
+    
+    # classify graphs according to letters.
+    idx_dict = get_same_item_indices(y_all)
+    time_list = []
+    sod_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    for letter in idx_dict:
+        print('\n-------------------------------------------------------\n')
+        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
+        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
+        
+        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
+        
+        # compute
+        time0 = time.time()
+        km = compute_kernel(Gn_mix, gkernel, True)
+        g_best = []
+        dis_best = []
+        # for each alpha
+        for alpha in alpha_range:
+            print('alpha =', alpha)
+            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
+                                                   range(len(Gn_let), len(Gn_mix)), km, 
+                                                   k, r_max, gkernel, c_ei=1.7, 
+                                                   c_er=1.7, c_es=1.7)
+            dis_best.append(dhat)
+            g_best.append(ghat_list)
+        time_list.append(time.time() - time0)
+            
+        # show best graphs and save them to file.
+        for idx, item in enumerate(alpha_range):
+            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
+            print('the corresponding pre-images are')
+            for g in g_best[idx]:
+                draw_Letter_graph(g, savepath='results/gk_iam/')
+#            nx.draw_networkx(g)
+#            plt.show()
+                print(g.nodes(data=True))
+                print(g.edges(data=True))
+                
+        # compute the corresponding sod in graph space. (alpha range not considered.)
+        sod_tmp, _ = ged_median(g_best[0], Gn_let)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        sod_ks_min_list.append(sod_ks)
+        nb_updated_list.append(nb_updated)
+        
+                
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
+    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
+    print('\nnumber of updates for each letter: ', nb_updated_list)             
+    print('\ntimes:', time_list)
+    
+    
+###############################################################################
+# Re-test.
+    
+def retest_the_simple_two():
+    from gk_iam import gk_iam_nearest_multi
+    
+    # The two simple graphs.
+#    g1 = nx.Graph(name='haha')
+#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
+#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
+#    g2 = nx.Graph(name='hahaha')
+#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
+#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
+#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
+    
+    g1 = nx.Graph(name='haha')
+    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
+                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
+    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
+    g2 = nx.Graph(name='hahaha')
+    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
+                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
+    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
+                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
+    
+#    # randomly select two molecules
+#    np.random.seed(1)
+#    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
+#    g1 = Gn[idx_gi[0]]
+#    g2 = Gn[idx_gi[1]]
+#    Gn_mix = [g.copy() for g in Gn]
+#    Gn_mix.append(g1.copy())
+#    Gn_mix.append(g2.copy())
+    
+    Gn = [g1.copy(), g2.copy()]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # recursions
+#    l = 500
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 2 # k nearest neighbors
+    epsilon = 1e-6
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    c_ei=1
+    c_er=1
+    c_es=1
+    
+    Gn_mix = Gn + [g1.copy(), g2.copy()]
+    
+    # compute         
+    time0 = time.time()
+    km = compute_kernel(Gn_mix, gkernel, True)
+    time_km = time.time() - time0
+
+    time_list = []
+    sod_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []       
+    g_best = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('\n-------------------------------------------------------\n')
+        print('alpha =', alpha)
+        time0 = time.time()
+        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
+            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
+            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
+            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
+        time_total = time.time() - time0 + time_km
+        print('time: ', time_total)
+        time_list.append(time_total)
+        sod_ks_min_list.append(dhat)
+        g_best.append(ghat_list)
+        nb_updated_list.append(nb_updated)       
+        
+    # show best graphs and save them to file.
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
+        print('one of the possible corresponding pre-images is')
+        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
+                with_labels=True)
+        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
+        plt.show()
+        print(g_best[idx][0].nodes(data=True))
+        print(g_best[idx][0].edges(data=True))
+        
+#        for g in g_best[idx]:
+#            draw_Letter_graph(g, savepath='results/gk_iam/')
+##            nx.draw_networkx(g)
+##            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+            
+    # compute the corresponding sod in graph space.
+    for idx, item in enumerate(alpha_range):
+        sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, 
+                                     ged_method=ged_method, saveGXL=saveGXL)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
+    print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) 
+    print('\nnumber of updates for each alpha: ', nb_updated_list)             
+    print('\ntimes:', time_list)
+            
+        
+
+if __name__ == '__main__':
+#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+#          'extra_params': {}}  # node/edge symb
+#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
+#          'extra_params': {}} # node nsymb
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
+#          'extra_params': {}}
+#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
+#        'extra_params': {}} # node symb
+#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:20]
+    
+#    import networkx.algorithms.isomorphism as iso
+#    G1 = nx.MultiDiGraph()
+#    G2 = nx.MultiDiGraph()
+#    G1.add_nodes_from([1,2,3], fill='red')
+#    G2.add_nodes_from([10,20,30,40], fill='red')
+#    nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
+#    nx.add_path(G2, [10,20,30,40], weight=3)
+#    nm = iso.categorical_node_match('fill', 'red')
+#    print(nx.is_isomorphic(G1, G2, node_match=nm))
+#    
+#    test_new_IAM_allGraph_deleteNodes(Gn)
+#    test_will_IAM_give_the_median_graph_we_wanted(Gn)
+#    test_who_is_the_closest_in_GED_space(Gn)
+#    test_who_is_the_closest_in_kernel_space(Gn)
+    
+#    test_the_simple_two(Gn, 'untilhpathkernel')
+#    test_remove_bests(Gn, 'untilhpathkernel')
+#    test_gkiam_letter_h()
+#    test_iam_letter_h()
+#    test_random_preimage_letter_h
+    
+###############################################################################
+# retests.
+    retest_the_simple_two()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_iam.py b/gklearn/preimage/test_preimage_iam.py
new file mode 100644
index 0000000..9b05dd9
--- /dev/null
+++ b/gklearn/preimage/test_preimage_iam.py
@@ -0,0 +1,620 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Sep  5 15:59:00 2019
+
+@author: ljia
+"""
+
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+import random
+#from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
+from gklearn.preimage.ged import ged_median
+
+from gklearn.preimage.preimage_iam import preimage_iam 
+
+
+###############################################################################
+# tests on different values on grid of median-sets and k.
+
+def test_preimage_iam_grid_k_median_nb():       
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 5 # iteration limit for pre-image.
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+#    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = True
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    # parameters for IAM function
+    c_ei=1
+    c_er=1
+    c_es=1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    
+    # number of graphs; we what to compute the median of these graphs. 
+    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    # number of nearest neighbors.
+    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    nb_updated_k_list = []
+    g_best = []
+    for idx_nb, nb_median in enumerate(nb_median_range):
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        sod_gs_list.append([])
+        sod_gs_min_list.append([])
+        nb_updated_list.append([])
+        nb_updated_k_list.append([])
+        g_best.append([])   
+        
+        for k in k_range:
+            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
+            print('k =', k)
+            time0 = time.time()
+            dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
+                preimage_iam(Gn, Gn_median,
+                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
+                gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
+                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                            'removeNodes': removeNodes, 'connected': connected_iam},
+                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
+                            'saveGXL': saveGXL})
+                
+            time_total = time.time() - time0 + time_km
+            print('time: ', time_total)
+            time_list[idx_nb].append(time_total)
+            print('\nsmallest distance in kernel space: ', dhat) 
+            dis_ks_min_list[idx_nb].append(dhat)
+            g_best[idx_nb].append(ghat_list)
+            print('\nnumber of updates of the best graph by IAM: ', nb_updated)
+            nb_updated_list[idx_nb].append(nb_updated)
+            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
+            nb_updated_k_list[idx_nb].append(nb_updated_k)
+            
+            # show the best graph and save it to file.
+            print('the shortest distance is', dhat)
+            print('one of the possible corresponding pre-images is')
+            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
+                    with_labels=True)
+            plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
+                        '_k' + str(k) + '.png', format="PNG")
+    #        plt.show()
+            plt.clf()
+    #        print(ghat_list[0].nodes(data=True))
+    #        print(ghat_list[0].edges(data=True))
+        
+            # compute the corresponding sod in graph space.
+            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
+                                         ged_method=ged_method, saveGXL=saveGXL)
+            sod_gs_list[idx_nb].append(sod_tmp)
+            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
+            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
+          sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
+          nb_updated_list)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
+          nb_updated_k_list)
+    print('\ntimes:', time_list)
+    
+    
+    
+    
+
+
+###############################################################################
+# tests on different numbers of median-sets.
+
+def test_preimage_iam_median_nb():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 3 # iteration limit for pre-image.
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = True
+    # parameters for IAM function
+#    c_vi = 0.037
+#    c_vr = 0.038
+#    c_vs = 0.075
+#    c_ei = 0.001
+#    c_er = 0.001
+#    c_es = 0.0
+    c_vi = 4
+    c_vr = 4
+    c_vs = 2
+    c_ei = 1
+    c_er = 1
+    c_es = 1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    # parameters for GED function
+#    ged_cost='CHEM_1'
+    ged_cost = 'CONSTANT'
+    ged_method = 'IPFP'
+    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
+    ged_stabilizer = 'min'
+    ged_repeat = 50
+    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
+                  'edit_cost_constant': edit_cost_constant, 
+                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
+    
+    # number of graphs; we what to compute the median of these graphs. 
+#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    nb_median_range = [2]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    nb_updated_k_list = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
+            preimage_iam(Gn, Gn_median,
+            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
+            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
+            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                        'removeNodes': removeNodes, 'connected': connected_iam},
+            params_ged=params_ged)
+            
+        time_total = time.time() - time0 + time_km
+        print('\ntime: ', time_total)
+        time_list.append(time_total)
+        print('\nsmallest distance in kernel space: ', dhat) 
+        dis_ks_min_list.append(dhat)
+        g_best.append(ghat_list)
+        print('\nnumber of updates of the best graph: ', nb_updated)
+        nb_updated_list.append(nb_updated)
+        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
+        nb_updated_k_list.append(nb_updated_k)
+        
+        # show the best graph and save it to file.
+        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
+                with_labels=True)
+        plt.show()
+#        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + 
+#                    '.png', format="PNG")
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        # compute the corresponding sod in graph space.
+        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+          nb_updated_list)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+          nb_updated_k_list)
+    print('\ntimes:', time_list)
+    
+    
+    
+    
+    
+
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+
+def test_gkiam_2combination_all_pairs():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # iteration limit for pre-image.
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = False
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    # parameters for IAM function
+    c_ei=1
+    c_er=1
+    c_es=1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    
+    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
+    # test on each pair of graphs.
+#    for idx1 in range(len(Gn) - 1, -1, -1):
+#        for idx2 in range(idx1, -1, -1):
+    for idx1 in range(187, 188):
+        for idx2 in range(167, 168):
+            g1 = Gn[idx1].copy()
+            g2 = Gn[idx2].copy()
+        #    Gn[10] = []
+        #    Gn[10] = []
+            
+            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
+            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
+            plt.show()
+            plt.clf()
+            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
+            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
+            plt.show()
+            plt.clf()
+
+            ###################################################################            
+#            Gn_mix = [g.copy() for g in Gn]
+#            Gn_mix.append(g1.copy())
+#            Gn_mix.append(g2.copy())
+#            
+#            # compute
+#            time0 = time.time()
+#            km = compute_kernel(Gn_mix, gkernel, True)
+#            time_km = time.time() - time0
+#            
+#            # write Gram matrix to file and read it.
+#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
+            
+            ###################################################################
+            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
+            km = gmfile['gm']
+            time_km = gmfile['gmtime']
+            # modify mixed gram matrix.
+            for i in range(len(Gn)):
+                km[i, len(Gn)] = km[i, idx1]
+                km[i, len(Gn) + 1] = km[i, idx2]
+                km[len(Gn), i] = km[i, idx1]
+                km[len(Gn) + 1, i] = km[i, idx2]
+            km[len(Gn), len(Gn)] = km[idx1, idx1]
+            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
+            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
+            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
+            
+            ###################################################################
+#            # use only the two graphs in median set as candidates.
+#            Gn = [g1.copy(), g2.copy()]
+#            Gn_mix = Gn + [g1.copy(), g2.copy()]
+#            # compute         
+#            time0 = time.time()
+#            km = compute_kernel(Gn_mix, gkernel, True)
+#            time_km = time.time() - time0
+    
+            
+            time_list = []
+            dis_ks_min_list = []
+            sod_gs_list = []
+            sod_gs_min_list = []
+            nb_updated_list = []
+            nb_updated_k_list = [] 
+            g_best = []
+            # for each alpha
+            for alpha in alpha_range:
+                print('\n-------------------------------------------------------\n')
+                print('alpha =', alpha)
+                time0 = time.time()
+                dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \
+                    preimage_iam(Gn, [g1, g2],
+                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
+                    gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
+                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                                'removeNodes': removeNodes, 'connected': connected_iam},
+                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
+                                'saveGXL': saveGXL})
+                time_total = time.time() - time0 + time_km
+                print('time: ', time_total)
+                time_list.append(time_total)
+                dis_ks_min_list.append(dhat)
+                g_best.append(ghat_list)
+                nb_updated_list.append(nb_updated)
+                nb_updated_k_list.append(nb_updated_k)
+                
+            # show best graphs and save them to file.
+            for idx, item in enumerate(alpha_range):
+                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
+                print('one of the possible corresponding pre-images is')
+                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
+                        with_labels=True)
+                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) 
+                            + '_alpha' + str(item) + '.png', format="PNG")
+#                plt.show()
+                plt.clf()
+#                print(g_best[idx][0].nodes(data=True))
+#                print(g_best[idx][0].edges(data=True))
+                
+        #        for g in g_best[idx]:
+        #            draw_Letter_graph(g, savepath='results/gk_iam/')
+        ##            nx.draw_networkx(g)
+        ##            plt.show()
+        #            print(g.nodes(data=True))
+        #            print(g.edges(data=True))
+                    
+            # compute the corresponding sod in graph space.
+            for idx, item in enumerate(alpha_range):
+                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
+                                             ged_method=ged_method, saveGXL=saveGXL)
+                sod_gs_list.append(sod_tmp)
+                sod_gs_min_list.append(np.min(sod_tmp))
+                
+            print('\nsods in graph space: ', sod_gs_list)
+            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
+            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
+            print('\nnumber of updates of the best graph for each alpha: ', 
+                  nb_updated_list)
+            print('\nnumber of updates of the k nearest graphs for each alpha: ', 
+                  nb_updated_k_list)
+            print('\ntimes:', time_list)
+            nb_update_mat[idx1, idx2] = nb_updated_list[0]
+            
+            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0])
+            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
+                content = file.read()
+                file.seek(0, 0)
+                file.write(str_fw + content)
+    
+    
+
+def test_gkiam_2combination():
+    from gk_iam import gk_iam_nearest_multi
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # iteration limit for pre-image.
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 20 # k nearest neighbors
+    epsilon = 1e-6
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    c_ei=1
+    c_er=1
+    c_es=1
+    
+    # randomly select two molecules
+    np.random.seed(1)
+    idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2)
+    g1 = Gn[idx_gi[0]].copy()
+    g2 = Gn[idx_gi[1]].copy()
+#    Gn[10] = []
+#    Gn[10] = []
+    
+#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
+#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
+#    plt.show()
+#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
+#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
+#    plt.show() 
+    
+    Gn_mix = [g.copy() for g in Gn]
+    Gn_mix.append(g1.copy())
+    Gn_mix.append(g2.copy())
+    
+    # compute
+#    time0 = time.time()
+#    km = compute_kernel(Gn_mix, gkernel, True)
+#    time_km = time.time() - time0
+    
+    # write Gram matrix to file and read it.
+#    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
+    gmfile = np.load('results/gram_matrix.gm.npz')
+    km = gmfile['gm']
+    time_km = gmfile['gmtime']
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []       
+    g_best = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('\n-------------------------------------------------------\n')
+        print('alpha =', alpha)
+        time0 = time.time()
+        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
+            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
+            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
+            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
+        time_total = time.time() - time0 + time_km
+        print('time: ', time_total)
+        time_list.append(time_total)
+        dis_ks_min_list.append(dhat)
+        g_best.append(ghat_list)
+        nb_updated_list.append(nb_updated)       
+        
+    # show best graphs and save them to file.
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
+        print('one of the possible corresponding pre-images is')
+        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
+                with_labels=True)
+        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
+        plt.show()
+        print(g_best[idx][0].nodes(data=True))
+        print(g_best[idx][0].edges(data=True))
+        
+#        for g in g_best[idx]:
+#            draw_Letter_graph(g, savepath='results/gk_iam/')
+##            nx.draw_networkx(g)
+##            plt.show()
+#            print(g.nodes(data=True))
+#            print(g.edges(data=True))
+            
+    # compute the corresponding sod in graph space.
+    for idx, item in enumerate(alpha_range):
+        sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
+                                     ged_method=ged_method, saveGXL=saveGXL)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
+    print('\nnumber of updates for each alpha: ', nb_updated_list)             
+    print('\ntimes:', time_list)
+    
+    
+###############################################################################
+
+    
+if __name__ == '__main__':
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+#    test_gkiam_2combination()
+#    test_gkiam_2combination_all_pairs()
+    
+###############################################################################
+# tests on different numbers of median-sets.
+    test_preimage_iam_median_nb()
+    
+###############################################################################
+# tests on different values on grid of median-sets and k.
+#    test_preimage_iam_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_mix.py b/gklearn/preimage/test_preimage_mix.py
new file mode 100644
index 0000000..888de86
--- /dev/null
+++ b/gklearn/preimage/test_preimage_mix.py
@@ -0,0 +1,539 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Sep  5 15:59:00 2019
+
+@author: ljia
+"""
+
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+import random
+#from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.ged import ged_median
+from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
+from gklearn.preimage.preimage_iam import preimage_iam_random_mix
+
+###############################################################################
+# tests on different values on grid of median-sets and k.
+
+def test_preimage_mix_grid_k_median_nb():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 5 # iteration limit for pre-image.
+    l_max = 500 # update limit for random generation
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+#    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = True
+    InitRandomWithAllDk = True
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    # parameters for IAM function
+    c_ei=1
+    c_er=1
+    c_es=1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    
+    # number of graphs; we what to compute the median of these graphs. 
+    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    # number of nearest neighbors.
+    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list_iam = []
+    nb_updated_list_random = []
+    nb_updated_k_list_iam = []
+    nb_updated_k_list_random = []
+    g_best = []
+    for idx_nb, nb_median in enumerate(nb_median_range):
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        sod_gs_list.append([])
+        sod_gs_min_list.append([])
+        nb_updated_list_iam.append([])
+        nb_updated_list_random.append([])
+        nb_updated_k_list_iam.append([])
+        nb_updated_k_list_random.append([])
+        g_best.append([])   
+        
+        for k in k_range:
+            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
+            print('k =', k)
+            time0 = time.time()
+            dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
+                nb_updated_k_iam, nb_updated_k_random = \
+                preimage_iam_random_mix(Gn, Gn_median,
+                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
+                l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
+                InitRandomWithAllDk=InitRandomWithAllDk,
+                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                            'removeNodes': removeNodes, 'connected': connected_iam},
+                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
+                            'saveGXL': saveGXL})
+                
+            time_total = time.time() - time0 + time_km
+            print('time: ', time_total)
+            time_list[idx_nb].append(time_total)
+            print('\nsmallest distance in kernel space: ', dhat) 
+            dis_ks_min_list[idx_nb].append(dhat)
+            g_best[idx_nb].append(ghat_list)
+            print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
+            nb_updated_list_iam[idx_nb].append(nb_updated_iam)
+            print('\nnumber of updates of the best graph by random generation: ', 
+                  nb_updated_random)
+            nb_updated_list_random[idx_nb].append(nb_updated_random)
+            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
+            nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
+            print('\nnumber of updates of k nearest graphs by random generation: ', 
+                  nb_updated_k_random)
+            nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) 
+            
+            # show the best graph and save it to file.
+            print('the shortest distance is', dhat)
+            print('one of the possible corresponding pre-images is')
+            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
+                    with_labels=True)
+            plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
+                        '_k' + str(k) + '.png', format="PNG")
+    #        plt.show()
+            plt.clf()
+    #        print(ghat_list[0].nodes(data=True))
+    #        print(ghat_list[0].edges(data=True))
+        
+            # compute the corresponding sod in graph space.
+            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
+                                         ged_method=ged_method, saveGXL=saveGXL)
+            sod_gs_list[idx_nb].append(sod_tmp)
+            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
+            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
+          sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
+          nb_updated_list_iam)
+    print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', 
+          nb_updated_list_random)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
+          nb_updated_k_list_iam)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', 
+          nb_updated_k_list_random)
+    print('\ntimes:', time_list)
+    
+    
+
+
+###############################################################################
+# tests on different numbers of median-sets.
+
+def test_preimage_mix_median_nb():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 5 # iteration limit for pre-image.
+    l_max = 500 # update limit for random generation
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = True
+    InitRandomWithAllDk = True
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    # parameters for IAM function
+    c_ei=1
+    c_er=1
+    c_es=1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    
+    # number of graphs; we what to compute the median of these graphs. 
+    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list_iam = []
+    nb_updated_list_random = []
+    nb_updated_k_list_iam = []
+    nb_updated_k_list_random = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
+            nb_updated_k_iam, nb_updated_k_random = \
+            preimage_iam_random_mix(Gn, Gn_median,
+            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
+            l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
+            InitRandomWithAllDk=InitRandomWithAllDk,
+            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                        'removeNodes': removeNodes, 'connected': connected_iam},
+            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
+                        'saveGXL': saveGXL})
+            
+        time_total = time.time() - time0 + time_km
+        print('time: ', time_total)
+        time_list.append(time_total)
+        print('\nsmallest distance in kernel space: ', dhat) 
+        dis_ks_min_list.append(dhat)
+        g_best.append(ghat_list)
+        print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
+        nb_updated_list_iam.append(nb_updated_iam)
+        print('\nnumber of updates of the best graph by random generation: ', 
+              nb_updated_random)
+        nb_updated_list_random.append(nb_updated_random)
+        print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
+        nb_updated_k_list_iam.append(nb_updated_k_iam)
+        print('\nnumber of updates of k nearest graphs by random generation: ', 
+              nb_updated_k_random)
+        nb_updated_k_list_random.append(nb_updated_k_random) 
+        
+        # show the best graph and save it to file.
+        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
+                with_labels=True)
+        plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
+                    '.png', format="PNG")
+#        plt.show()
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        # compute the corresponding sod in graph space.
+        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
+                                     ged_method=ged_method, saveGXL=saveGXL)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
+          nb_updated_list_iam)
+    print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', 
+          nb_updated_list_random)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
+          nb_updated_k_list_iam)
+    print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', 
+          nb_updated_k_list_random)
+    print('\ntimes:', time_list)
+    
+    
+
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+
+def test_preimage_mix_2combination_all_pairs():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # iteration limit for pre-image.
+    l_max = 500 # update limit for random generation
+    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 5 # k nearest neighbors
+    epsilon = 1e-6
+    InitIAMWithAllDk = True
+    InitRandomWithAllDk = True
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    # parameters for IAM function
+    c_ei=1
+    c_er=1
+    c_es=1
+    ite_max_iam = 50
+    epsilon_iam = 0.001
+    removeNodes = True
+    connected_iam = False
+    
+    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
+    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
+    # test on each pair of graphs.
+#    for idx1 in range(len(Gn) - 1, -1, -1):
+#        for idx2 in range(idx1, -1, -1):
+    for idx1 in range(187, 188):
+        for idx2 in range(167, 168):
+            g1 = Gn[idx1].copy()
+            g2 = Gn[idx2].copy()
+        #    Gn[10] = []
+        #    Gn[10] = []
+            
+            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
+            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
+            plt.show()
+            plt.clf()
+            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
+            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
+            plt.show()
+            plt.clf()
+
+            ###################################################################            
+#            Gn_mix = [g.copy() for g in Gn]
+#            Gn_mix.append(g1.copy())
+#            Gn_mix.append(g2.copy())
+#            
+#            # compute
+#            time0 = time.time()
+#            km = compute_kernel(Gn_mix, gkernel, True)
+#            time_km = time.time() - time0
+#            
+#            # write Gram matrix to file and read it.
+#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
+            
+            ###################################################################
+            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
+            km = gmfile['gm']
+            time_km = gmfile['gmtime']
+            # modify mixed gram matrix.
+            for i in range(len(Gn)):
+                km[i, len(Gn)] = km[i, idx1]
+                km[i, len(Gn) + 1] = km[i, idx2]
+                km[len(Gn), i] = km[i, idx1]
+                km[len(Gn) + 1, i] = km[i, idx2]
+            km[len(Gn), len(Gn)] = km[idx1, idx1]
+            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
+            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
+            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
+            
+            ###################################################################
+#            # use only the two graphs in median set as candidates.
+#            Gn = [g1.copy(), g2.copy()]
+#            Gn_mix = Gn + [g1.copy(), g2.copy()]
+#            # compute         
+#            time0 = time.time()
+#            km = compute_kernel(Gn_mix, gkernel, True)
+#            time_km = time.time() - time0
+    
+            
+            time_list = []
+            dis_ks_min_list = []
+            sod_gs_list = []
+            sod_gs_min_list = []
+            nb_updated_list_iam = []
+            nb_updated_list_random = []
+            nb_updated_k_list_iam = []
+            nb_updated_k_list_random = []
+            g_best = []
+            # for each alpha
+            for alpha in alpha_range:
+                print('\n-------------------------------------------------------\n')
+                print('alpha =', alpha)
+                time0 = time.time()
+                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
+                    nb_updated_k_iam, nb_updated_k_random = \
+                    preimage_iam_random_mix(Gn, [g1, g2],
+                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
+                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
+                    InitRandomWithAllDk=InitRandomWithAllDk,
+                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
+                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
+                                'removeNodes': removeNodes, 'connected': connected_iam},
+                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
+                                'saveGXL': saveGXL})
+                time_total = time.time() - time0 + time_km
+                print('time: ', time_total)
+                time_list.append(time_total)
+                dis_ks_min_list.append(dhat)
+                g_best.append(ghat_list)
+                nb_updated_list_iam.append(nb_updated_iam)       
+                nb_updated_list_random.append(nb_updated_random)
+                nb_updated_k_list_iam.append(nb_updated_k_iam)       
+                nb_updated_k_list_random.append(nb_updated_k_random) 
+                
+            # show best graphs and save them to file.
+            for idx, item in enumerate(alpha_range):
+                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
+                print('one of the possible corresponding pre-images is')
+                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
+                        with_labels=True)
+                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
+                            + '_alpha' + str(item) + '.png', format="PNG")
+#                plt.show()
+                plt.clf()
+#                print(g_best[idx][0].nodes(data=True))
+#                print(g_best[idx][0].edges(data=True))
+                
+        #        for g in g_best[idx]:
+        #            draw_Letter_graph(g, savepath='results/gk_iam/')
+        ##            nx.draw_networkx(g)
+        ##            plt.show()
+        #            print(g.nodes(data=True))
+        #            print(g.edges(data=True))
+                    
+            # compute the corresponding sod in graph space.
+            for idx, item in enumerate(alpha_range):
+                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
+                                             ged_method=ged_method, saveGXL=saveGXL)
+                sod_gs_list.append(sod_tmp)
+                sod_gs_min_list.append(np.min(sod_tmp))
+                
+            print('\nsods in graph space: ', sod_gs_list)
+            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
+            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
+            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
+            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
+                  nb_updated_list_random)
+            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
+                  nb_updated_k_list_iam)
+            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
+                  nb_updated_k_list_random)
+            print('\ntimes:', time_list)
+            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
+            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
+            
+            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
+                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
+            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
+                content = file.read()
+                file.seek(0, 0)
+                file.write(str_fw + content)
+    
+###############################################################################
+
+    
+if __name__ == '__main__':
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+#    test_preimage_mix_2combination_all_pairs()
+    
+###############################################################################
+# tests on different numbers of median-sets.
+#    test_preimage_mix_median_nb()
+    
+###############################################################################
+# tests on different values on grid of median-sets and k.
+    test_preimage_mix_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_random.py b/gklearn/preimage/test_preimage_random.py
new file mode 100644
index 0000000..bb77d2f
--- /dev/null
+++ b/gklearn/preimage/test_preimage_random.py
@@ -0,0 +1,398 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Sep  5 15:59:00 2019
+
+@author: ljia
+"""
+
+import numpy as np
+import networkx as nx
+import matplotlib.pyplot as plt
+import time
+import random
+#from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset
+from gklearn.preimage.preimage_random import preimage_random
+from gklearn.preimage.ged import ged_median
+from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
+
+
+###############################################################################
+# tests on different values on grid of median-sets and k.
+
+def test_preimage_random_grid_k_median_nb():    
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 5 # iteration limit for pre-image.
+    l = 500 # update limit for random generation
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+#    k = 5 # k nearest neighbors
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    
+    # number of graphs; we what to compute the median of these graphs. 
+    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    # number of nearest neighbors.
+    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    g_best = []
+    for idx_nb, nb_median in enumerate(nb_median_range):
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        
+        time_list.append([])
+        dis_ks_min_list.append([])
+        sod_gs_list.append([])
+        sod_gs_min_list.append([])
+        nb_updated_list.append([])
+        g_best.append([])   
+        
+        for k in k_range:
+            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
+            print('k =', k)
+            time0 = time.time()
+            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
+                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
+                
+            time_total = time.time() - time0 + time_km
+            print('time: ', time_total)
+            time_list[idx_nb].append(time_total)
+            print('\nsmallest distance in kernel space: ', dhat) 
+            dis_ks_min_list[idx_nb].append(dhat)
+            g_best[idx_nb].append(ghat)
+            print('\nnumber of updates of the best graph: ', nb_updated)
+            nb_updated_list[idx_nb].append(nb_updated)
+            
+            # show the best graph and save it to file.
+            print('the shortest distance is', dhat)
+            print('one of the possible corresponding pre-images is')
+            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
+                    with_labels=True)
+            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
+                        '_k' + str(k) + '.png', format="PNG")
+    #        plt.show()
+            plt.clf()
+    #        print(ghat_list[0].nodes(data=True))
+    #        print(ghat_list[0].edges(data=True))
+        
+            # compute the corresponding sod in graph space.
+            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
+                                         ged_method=ged_method, saveGXL=saveGXL)
+            sod_gs_list[idx_nb].append(sod_tmp)
+            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
+            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
+          sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
+          nb_updated_list)
+    print('\ntimes:', time_list)
+    
+
+
+
+###############################################################################
+# tests on different numbers of median-sets.
+
+def test_preimage_random_median_nb():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:50]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 5 # iteration limit for pre-image.
+    l = 500 # update limit for random generation
+#    alpha_range = np.linspace(0.5, 0.5, 1)
+    k = 5 # k nearest neighbors
+    # parameters for GED function
+    ged_cost='CHEM_1'
+    ged_method='IPFP'
+    saveGXL='gedlib'
+    
+    # number of graphs; we what to compute the median of these graphs. 
+    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
+    
+    # find out all the graphs classified to positive group 1.
+    idx_dict = get_same_item_indices(y_all)
+    Gn = [Gn[i] for i in idx_dict[1]]
+    
+#    # compute Gram matrix.
+#    time0 = time.time()
+#    km = compute_kernel(Gn, gkernel, True)
+#    time_km = time.time() - time0    
+#    # write Gram matrix to file.
+#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
+        
+    
+    time_list = []
+    dis_ks_min_list = []
+    sod_gs_list = []
+    sod_gs_min_list = []
+    nb_updated_list = []
+    g_best = []
+    for nb_median in nb_median_range:
+        print('\n-------------------------------------------------------')
+        print('number of median graphs =', nb_median)
+        random.seed(1)
+        idx_rdm = random.sample(range(len(Gn)), nb_median)
+        print('graphs chosen:', idx_rdm)
+        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
+        
+#        for g in Gn_median:
+#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
+##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
+#            plt.show()
+#            plt.clf()                         
+                    
+        ###################################################################
+        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
+        km_tmp = gmfile['gm']
+        time_km = gmfile['gmtime']
+        # modify mixed gram matrix.
+        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                km[i, j] = km_tmp[i, j]
+                km[j, i] = km[i, j]
+        for i in range(len(Gn)):
+            for j, idx in enumerate(idx_rdm):
+                km[i, len(Gn) + j] = km[i, idx]
+                km[len(Gn) + j, i] = km[i, idx]
+        for i, idx1 in enumerate(idx_rdm):
+            for j, idx2 in enumerate(idx_rdm):
+                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
+                
+        ###################################################################
+        alpha_range = [1 / nb_median] * nb_median
+        time0 = time.time()
+        dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
+            range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
+            
+        time_total = time.time() - time0 + time_km
+        print('time: ', time_total)
+        time_list.append(time_total)
+        print('\nsmallest distance in kernel space: ', dhat) 
+        dis_ks_min_list.append(dhat)
+        g_best.append(ghat)
+        print('\nnumber of updates of the best graph: ', nb_updated)
+        nb_updated_list.append(nb_updated)
+        
+        # show the best graph and save it to file.
+        print('the shortest distance is', dhat)
+        print('one of the possible corresponding pre-images is')
+        nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
+                with_labels=True)
+        plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
+                    '.png', format="PNG")
+#        plt.show()
+        plt.clf()
+#        print(ghat_list[0].nodes(data=True))
+#        print(ghat_list[0].edges(data=True))
+    
+        # compute the corresponding sod in graph space.
+        sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
+                                     ged_method=ged_method, saveGXL=saveGXL)
+        sod_gs_list.append(sod_tmp)
+        sod_gs_min_list.append(np.min(sod_tmp))
+        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
+        
+    print('\nsods in graph space: ', sod_gs_list)
+    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each set of median graphs: ', 
+          dis_ks_min_list) 
+    print('\nnumber of updates of the best graph for each set of median graphs: ', 
+          nb_updated_list)
+    print('\ntimes:', time_list)
+    
+    
+
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+    
+def test_random_preimage_2combination():
+    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
+          'extra_params': {}}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
+#    Gn = Gn[0:12]
+    remove_edges(Gn)
+    gkernel = 'marginalizedkernel'
+    
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
+#    print(dis_max, dis_min, dis_mean)
+    
+    lmbda = 0.03 # termination probalility
+    r_max = 10 # iteration limit for pre-image.
+    l = 500
+    alpha_range = np.linspace(0, 1, 11)
+    k = 5 # k nearest neighbors
+    
+    # randomly select two molecules
+    np.random.seed(1)
+    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
+    g1 = Gn[idx_gi[0]].copy()
+    g2 = Gn[idx_gi[1]].copy()
+    
+#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
+#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
+#    plt.show()
+#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
+#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
+#    plt.show()    
+    
+    ######################################################################
+#    Gn_mix = [g.copy() for g in Gn]
+#    Gn_mix.append(g1.copy())
+#    Gn_mix.append(g2.copy())
+#    
+##    g_tmp = iam([g1, g2])
+##    nx.draw_networkx(g_tmp)
+##    plt.show()
+#    
+#    # compute 
+#    time0 = time.time()
+#    km = compute_kernel(Gn_mix, gkernel, True)
+#    time_km = time.time() - time0
+    
+    ###################################################################
+    idx1 = idx_gi[0]
+    idx2 = idx_gi[1]
+    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
+    km = gmfile['gm']
+    time_km = gmfile['gmtime']
+    # modify mixed gram matrix.
+    for i in range(len(Gn)):
+        km[i, len(Gn)] = km[i, idx1]
+        km[i, len(Gn) + 1] = km[i, idx2]
+        km[len(Gn), i] = km[i, idx1]
+        km[len(Gn) + 1, i] = km[i, idx2]
+    km[len(Gn), len(Gn)] = km[idx1, idx1]
+    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
+    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
+    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
+            
+    ###################################################################
+
+    time_list = []
+    nb_updated_list = []
+    g_best = []
+    dis_ks_min_list = []
+    # for each alpha
+    for alpha in alpha_range:
+        print('\n-------------------------------------------------------\n')
+        print('alpha =', alpha)
+        time0 = time.time()
+        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
+                                          range(len(Gn), len(Gn) + 2), km,
+                                          k, r_max, l, gkernel)
+        time_total = time.time() - time0 + time_km
+        print('time: ', time_total)
+        time_list.append(time_total)
+        dis_ks_min_list.append(dhat)
+        g_best.append(ghat)
+        nb_updated_list.append(nb_updated)
+        
+    # show best graphs and save them to file.
+    for idx, item in enumerate(alpha_range):
+        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
+        print('one of the possible corresponding pre-images is')
+        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
+                with_labels=True)
+        plt.show()
+        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
+        plt.clf()
+        print(g_best[idx].nodes(data=True))
+        print(g_best[idx].edges(data=True))
+            
+#        # compute the corresponding sod in graph space. (alpha range not considered.)
+#        sod_tmp, _ = median_distance(g_best[0], Gn_let)
+#        sod_gs_list.append(sod_tmp)
+#        sod_gs_min_list.append(np.min(sod_tmp))
+#        sod_ks_min_list.append(sod_ks)
+#        nb_updated_list.append(nb_updated)
+                      
+#    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
+    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
+    print('\nnumber of updates for each alpha: ', nb_updated_list)             
+    print('\ntimes:', time_list)
+    
+###############################################################################
+
+    
+if __name__ == '__main__':
+###############################################################################
+# test on the combination of the two randomly chosen graphs. (the same as in the
+# random pre-image paper.)
+#    test_random_preimage_2combination()
+    
+###############################################################################
+# tests all algorithms on different numbers of median-sets.
+    test_preimage_random_median_nb()
+    
+###############################################################################
+# tests all algorithms on different values on grid of median-sets and k.
+#    test_preimage_random_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/timer.py b/gklearn/preimage/timer.py
new file mode 100644
index 0000000..b1cecec
--- /dev/null
+++ b/gklearn/preimage/timer.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Mar 23 09:52:50 2020
+
+@author: ljia
+"""
+import time
+
+class Timer(object):
+	"""A timer class that can be used by methods that support time limits.
+	
+	Note
+	----
+	This is the Python implementation of `the C++ code in GEDLIB <https://github.com/dbblumenthal/gedlib/blob/master/src/env/timer.hpp>`__.
+	"""
+	
+	def __init__(self, time_limit_in_sec):
+		"""Constructs a timer for a given time limit.
+		
+		Parameters
+		----------
+		time_limit_in_sec : string
+			The time limit in seconds.
+		"""		
+		self.__time_limit_in_sec = time_limit_in_sec
+		self.__start_time = time.time()
+	
+	
+	def expired(self):
+		"""Checks if the time limit has expired. 
+		
+		Return
+		------
+		Boolean true if the time limit has expired and false otherwise.
+"""
+		if self.__time_limit_in_sec > 0:
+			runtime = time.time() - self.__start_time
+			return runtime >= self.__time_limit_in_sec
+		return False
\ No newline at end of file
diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py
new file mode 100644
index 0000000..b91e4c0
--- /dev/null
+++ b/gklearn/preimage/utils.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 17 19:05:07 2019
+
+Useful functions.
+@author: ljia
+"""
+#import networkx as nx
+
+import multiprocessing
+import numpy as np
+
+from gklearn.kernels.marginalizedKernel import marginalizedkernel
+from gklearn.kernels.untilHPathKernel import untilhpathkernel
+from gklearn.kernels.spKernel import spkernel
+import functools
+from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel
+from gklearn.kernels.structuralspKernel import structuralspkernel
+from gklearn.kernels.treeletKernel import treeletkernel
+from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
+
+
+def remove_edges(Gn):
+    for G in Gn:
+        for _, _, attrs in G.edges(data=True):
+            attrs.clear()
+            
+def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
+    term1 = Kmatrix[idx_g, idx_g]
+    term2 = 0
+    for i, a in enumerate(alpha):
+        term2 += a * Kmatrix[idx_g, idx_gi[i]]
+    term2 *= 2
+    if withterm3 == False:
+        for i1, a1 in enumerate(alpha):
+            for i2, a2 in enumerate(alpha):
+                term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
+    return np.sqrt(term1 - term2 + term3)
+
+
+def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'):
+    if graph_kernel == 'marginalizedkernel':
+        Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
+                                  p_quit=0.03, n_iteration=10, remove_totters=False,
+                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
+    elif graph_kernel == 'untilhpathkernel':
+        Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
+                                  depth=7, k_func='MinMax', compute_method='trie',
+                                  parallel=parallel,
+                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
+    elif graph_kernel == 'spkernel':
+        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
+        Kmatrix = np.empty((len(Gn), len(Gn)))
+#        Kmatrix[:] = np.nan
+        Kmatrix, _, idx = spkernel(Gn, node_label=node_label, node_kernels=
+                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
+                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
+#        for i, row in enumerate(idx):
+#            for j, col in enumerate(idx):
+#                Kmatrix[row, col] = Kmatrix_tmp[i, j]
+    elif graph_kernel == 'structuralspkernel':
+        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
+        sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
+        Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, 
+                              edge_label=edge_label, node_kernels=sub_kernels,
+                              edge_kernels=sub_kernels,
+                              parallel=parallel, n_jobs=multiprocessing.cpu_count(), 
+                              verbose=verbose)
+    elif graph_kernel == 'treeletkernel':
+        pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
+#        pkernel = functools.partial(gaussiankernel, gamma=1e-6)
+        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
+        Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
+                                   sub_kernel=pkernel, parallel=parallel,
+                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
+    elif graph_kernel == 'weisfeilerlehmankernel':
+        Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
+                                   height=4, base_kernel='subtree', parallel=None,
+                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
+        
+    # normalization
+    Kmatrix_diag = Kmatrix.diagonal().copy()
+    for i in range(len(Kmatrix)):
+        for j in range(i, len(Kmatrix)):
+            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
+            Kmatrix[j][i] = Kmatrix[i][j]
+    return Kmatrix
+            
+
+def gram2distances(Kmatrix):
+    dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
+    for i1 in range(len(Kmatrix)):
+        for i2 in range(len(Kmatrix)):
+            dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
+    dmatrix = np.sqrt(dmatrix)
+    return dmatrix
+
+
+def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, 
+                           gkernel=None, verbose=True):
+    dis_mat = np.empty((len(Gn), len(Gn)))
+    if Kmatrix is None:
+        Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose)
+    for i in range(len(Gn)):
+        for j in range(i, len(Gn)):
+            dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
+            if dis < 0:
+                if dis > -1e-10:
+                    dis = 0
+                else:
+                    raise ValueError('The distance is negative.')
+            dis_mat[i, j] = np.sqrt(dis)
+            dis_mat[j, i] = dis_mat[i, j]
+    dis_max = np.max(np.max(dis_mat))
+    dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
+    dis_mean = np.mean(np.mean(dis_mat))
+    return dis_mat, dis_max, dis_min, dis_mean
+
+
+def get_same_item_indices(ls):
+    """Get the indices of the same items in a list. Return a dict keyed by items.
+    """
+    idx_dict = {}
+    for idx, item in enumerate(ls):
+        if item in idx_dict:
+            idx_dict[item].append(idx)
+        else:
+            idx_dict[item] = [idx]
+    return idx_dict
+
+
+def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None,
+                                                  node_label=None, edge_label=None):
+    dis_k_all = [] # distance between g_star and each graph.
+    alpha = [1 / len(Gn)] * len(Gn)
+    if Kmatrix is None:
+        Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
+    term3 = 0
+    for i1, a1 in enumerate(alpha):
+        for i2, a2 in enumerate(alpha):
+            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
+    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
+        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
+        dis_all.append(dtemp)
+
+
+def normalize_distance_matrix(D):
+    max_value = np.amax(D)
+    min_value = np.amin(D)
+    return (D - min_value) / (max_value - min_value)
\ No newline at end of file
diff --git a/gklearn/preimage/visualization.py b/gklearn/preimage/visualization.py
new file mode 100644
index 0000000..81b814b
--- /dev/null
+++ b/gklearn/preimage/visualization.py
@@ -0,0 +1,585 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Dec 19 17:16:23 2019
+
+@author: ljia
+"""
+import numpy as np
+from sklearn.manifold import TSNE, Isomap
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
+from tqdm import tqdm
+
+from gklearn.utils.graphfiles import loadDataset, loadGXL
+from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices
+
+
+def visualize_graph_dataset(dis_measure, visual_method, draw_figure, 
+                            draw_params={}, dis_mat=None, Gn=None, 
+                            median_set=None):
+    
+    
+    def draw_zoomed_axes(Gn_embedded, ax):
+        margin = 0.01
+        if dis_measure == 'graph-kernel':
+            index = -2
+        elif dis_measure == 'ged':
+            index = -1
+        x1 = np.min(Gn_embedded[median_set + [index], 0]) - margin * np.max(Gn_embedded)
+        x2 = np.max(Gn_embedded[median_set + [index], 0]) + margin * np.max(Gn_embedded)
+        y1 = np.min(Gn_embedded[median_set + [index], 1]) - margin * np.max(Gn_embedded)
+        y2 = np.max(Gn_embedded[median_set + [index], 1]) + margin * np.max(Gn_embedded)
+        if (x1 < 0 and y1 < 0) or ((x1 > 0 and y1 > 0)):
+            loc = 2
+        else:
+            loc = 3
+        axins = zoomed_inset_axes(ax, 4, loc=loc) # zoom-factor: 2.5, location: upper-left
+        draw_figure(axins, Gn_embedded, dis_measure=dis_measure, 
+                    median_set=median_set, **draw_params)
+        axins.set_xlim(x1, x2) # apply the x-limits
+        axins.set_ylim(y1, y2) # apply the y-limits
+        plt.yticks(visible=False)
+        plt.xticks(visible=False)
+        loc1 = 1 if loc == 2 else 3
+        mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")  
+        
+        
+    if dis_mat is None:
+        if dis_measure == 'graph-kernel':
+            gkernel = 'untilhpathkernel'
+            node_label = 'atom'
+            edge_label = 'bond_type'
+            dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+                                                      Kmatrix=None, gkernel=gkernel)
+        elif dis_measure == 'ged':
+            pass
+        
+    if visual_method == 'tsne':
+        Gn_embedded = TSNE(n_components=2, metric='precomputed').fit_transform(dis_mat)
+    elif visual_method == 'isomap':
+        Gn_embedded = Isomap(n_components=2, metric='precomputed').fit_transform(dis_mat)
+    print(Gn_embedded.shape)
+    fig, ax = plt.subplots()
+    draw_figure(plt, Gn_embedded, dis_measure=dis_measure, legend=True, 
+                median_set=median_set, **draw_params)        
+#    draw_zoomed_axes(Gn_embedded, ax)
+    plt.show()
+    plt.clf()
+    
+    return
+
+
+def draw_figure(ax, Gn_embedded, dis_measure=None, y_idx=None, legend=False,
+                median_set=None):
+    from matplotlib import colors as mcolors
+    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
+#    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
+#              '#c6dbef', '#deebf7']
+#    for i, values in enumerate(y_idx.values()):
+#        for item in values:
+##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
+#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')
+#    ax.scatter(Gn_embedded[:,0], Gn_embedded[:,1], c='b')        
+    h1 = ax.scatter(Gn_embedded[median_set, 0], Gn_embedded[median_set, 1], c='b')
+    if dis_measure == 'graph-kernel':
+        h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
+        h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
+        h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r') #c='g', marker='+') # set median
+    elif dis_measure == 'ged':
+        h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
+        h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r') #c='g', marker='+') # set median        
+    if legend:
+#    fig.subplots_adjust(bottom=0.17)
+        if dis_measure == 'graph-kernel':
+            ax.legend([h1, h2, h3, h4], 
+                      ['k closest graphs', 'true median', 'gen median', 'set median'])
+        elif dis_measure == 'ged':       
+            ax.legend([h1, h3, h4], ['k closest graphs', 'gen median', 'set median'])
+#    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
+#    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
+#            bbox_inches='tight')
+#    plt.show()
+            
+    
+###############################################################################
+    
+def visualize_distances_in_kernel():
+    
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    fname_medians = 'expert.treelet'
+    # add set median.
+    fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
+    set_median = loadGXL(fname_sm)
+    Gn.append(set_median)
+    # add generalized median (estimated pre-image.)
+    fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
+    gen_median = loadGXL(fname_gm)
+    Gn.append(gen_median)
+    
+    # compute distance matrix
+    median_set = [22, 29, 54, 74]
+    gkernel = 'treeletkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    Gn_median_set = [Gn[i].copy() for i in median_set]
+    Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
+                                    edge_label, True)
+    Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
+    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+                                              Kmatrix=Kmatrix, gkernel=gkernel)
+    print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+    print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+    print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+
+    # add distances for the image of exact median \psi.
+    dis_k_median_list = []
+    for idx, g in enumerate(Gn):
+        dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
+                                           [1 / len(Gn_median_set)] * len(Gn_median_set),
+                                           Kmatrix_median, withterm3=False))
+    dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
+    for i in range(len(Gn)):
+        for j in range(i, len(Gn)):
+            dis_mat_median[i, j] = dis_mat[i, j]
+            dis_mat_median[j, i] = dis_mat_median[i, j]
+    for i in range(len(Gn)):
+        dis_mat_median[i, -1] = dis_k_median_list[i]
+        dis_mat_median[-1, i] = dis_k_median_list[i]
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    
+    # visualization.
+#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
+#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
+    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
+                            median_set=median_set)
+        
+    
+def visualize_distances_in_ged():
+    from gklearn.preimage.fitDistance import compute_geds
+    from gklearn.preimage.ged import GED
+    ds = {'name': 'monoterpenoides', 
+          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    # add set median.
+    fname_medians = 'expert.treelet'
+    fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
+    set_median = loadGXL(fname_sm)
+    Gn.append(set_median)
+    # add generalized median (estimated pre-image.)
+    fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
+    gen_median = loadGXL(fname_gm)
+    Gn.append(gen_median)
+    
+    # compute/load ged matrix.
+#    # compute.
+##    k = 4
+##    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
+#    edit_costs = [3, 3, 1, 3, 3, 1]
+##    edit_costs = [7, 3, 5, 9, 2, 6]
+#    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+#    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+#                'algo_options': algo_options, 'stabilizer': None, 
+#                'edit_cost_constant': edit_costs}    
+#    _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
+#    np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm', ged_mat=ged_mat)
+    # load from file.
+    gmfile = np.load('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm.npz')
+    ged_mat = gmfile['ged_mat']
+#    # change medians.
+#    edit_costs = [3, 3, 1, 3, 3, 1]
+#    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+#    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+#                'algo_options': algo_options, 'stabilizer': None, 
+#                'edit_cost_constant': edit_costs}
+#    for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
+#        dis, _, _ = GED(Gn[idx], set_median, **params_ged)
+#        ged_mat[idx, -2] = dis
+#        ged_mat[-2, idx] = dis
+#        dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
+#        ged_mat[idx, -1] = dis
+#        ged_mat[-1, idx] = dis
+#    np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm', 
+#             ged_mat=ged_mat)
+
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+
+    # visualization.
+    median_set = [22, 29, 54, 74]
+    visualize_graph_dataset('ged', 'tsne', draw_figure, 
+                            draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
+                            median_set=median_set)
+    
+###############################################################################
+    
+    
+def visualize_distances_in_kernel_monoterpenoides():
+    import os
+
+    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__))  + '../../datasets/monoterpenoides/'}  # node/edge symb
+    Gn_original, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    
+    # compute distance matrix
+#    median_set = [22, 29, 54, 74]
+    gkernel = 'treeletkernel'
+    fit_method = 'expert'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    ds_name = 'monoterpenoides'
+    fname_medians = fit_method + '.' + gkernel
+    dir_output = 'results/xp_monoterpenoides/'
+    repeat = 0
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    for i, (y, values) in enumerate(y_idx.items()):
+        print('\ny =', y)
+        k = len(values)
+        
+        Gn = [Gn_original[g].copy() for g in values]
+        # add set median.
+        fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
+            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+        set_median = loadGXL(fname_sm)
+        Gn.append(set_median)
+        # add generalized median (estimated pre-image.)
+        fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
+            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+        gen_median = loadGXL(fname_gm)
+        Gn.append(gen_median)
+    
+        # compute distance matrix
+        median_set = range(0, len(values))
+    
+        Gn_median_set = [Gn[i].copy() for i in median_set]
+        Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
+                                        edge_label, False)
+        Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
+        dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+                                                  Kmatrix=Kmatrix, gkernel=gkernel)
+        print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+        print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+        print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+
+        # add distances for the image of exact median \psi.
+        dis_k_median_list = []
+        for idx, g in enumerate(Gn):
+            dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
+                                               [1 / len(Gn_median_set)] * len(Gn_median_set),
+                                               Kmatrix_median, withterm3=False))
+        dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                dis_mat_median[i, j] = dis_mat[i, j]
+                dis_mat_median[j, i] = dis_mat_median[i, j]
+        for i in range(len(Gn)):
+            dis_mat_median[i, -1] = dis_k_median_list[i]
+            dis_mat_median[-1, i] = dis_k_median_list[i]
+            
+    
+        # visualization.
+#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
+#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
+        visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+                                draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
+                                median_set=median_set)
+        
+    
+def visualize_distances_in_ged_monoterpenoides():
+    from gklearn.preimage.fitDistance import compute_geds
+    from gklearn.preimage.ged import GED
+    import os
+    
+    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
+    Gn_original, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    
+    # compute distance matrix
+#    median_set = [22, 29, 54, 74]
+    gkernel = 'treeletkernel'
+    fit_method = 'expert'
+    ds_name = 'monoterpenoides'
+    fname_medians = fit_method + '.' + gkernel
+    dir_output = 'results/xp_monoterpenoides/'
+    repeat = 0
+#    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
+    edit_costs = [3, 3, 1, 3, 3, 1]
+#    edit_costs = [7, 3, 5, 9, 2, 6]
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    for i, (y, values) in enumerate(y_idx.items()):
+        print('\ny =', y)
+        k = len(values)
+        
+        Gn = [Gn_original[g].copy() for g in values]
+        # add set median.
+        fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
+            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+        set_median = loadGXL(fname_sm)
+        Gn.append(set_median)
+        # add generalized median (estimated pre-image.)
+        fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
+            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
+        gen_median = loadGXL(fname_gm)
+        Gn.append(gen_median)
+    
+    
+        # compute/load ged matrix.
+        # compute.
+        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+        params_ged = {'dataset': ds_name, 'lib': 'gedlibpy', 'cost': 'CONSTANT', 
+                      'method': 'IPFP', 'algo_options': algo_options, 
+                      'stabilizer': None, 'edit_cost_constant': edit_costs}    
+        _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
+        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) \
+            + '.with_medians.gm', ged_mat=ged_mat)
+#        # load from file.
+#        gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm.npz')
+#        ged_mat = gmfile['ged_mat']
+#        # change medians.
+#        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+#        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+#                    'algo_options': algo_options, 'stabilizer': None, 
+#                    'edit_cost_constant': edit_costs}
+#        for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
+#            dis, _, _ = GED(Gn[idx], set_median, **params_ged)
+#            ged_mat[idx, -2] = dis
+#            ged_mat[-2, idx] = dis
+#            dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
+#            ged_mat[idx, -1] = dis
+#            ged_mat[-1, idx] = dis
+#        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm', 
+#                 ged_mat=ged_mat)
+
+        # visualization.
+        median_set = range(0, len(values))
+        visualize_graph_dataset('ged', 'tsne', draw_figure, 
+                                draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
+                                median_set=median_set)
+        
+        
+###############################################################################
+    
+    
+def visualize_distances_in_kernel_letter_h():
+    
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+#    Gn = Gn[0:50]
+    
+    # compute distance matrix
+#    median_set = [22, 29, 54, 74]
+    gkernel = 'structuralspkernel'
+    fit_method = 'expert'
+    node_label = None
+    edge_label = None
+    ds_name = 'letter-h'
+    fname_medians = fit_method + '.' + gkernel
+    dir_output = 'results/xp_letter_h/'
+    k = 150
+    repeat = 0
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    for i, (y, values) in enumerate(y_idx.items()):
+        print('\ny =', y)
+        
+        Gn = [Gn_original[g].copy() for g in values]
+        # add set median.
+        fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
+            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
+        set_median = loadGXL(fname_sm)
+        Gn.append(set_median)
+        # add generalized median (estimated pre-image.)
+        fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
+            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
+        gen_median = loadGXL(fname_gm)
+        Gn.append(gen_median)
+    
+        # compute distance matrix
+        median_set = range(0, len(values))
+    
+        Gn_median_set = [Gn[i].copy() for i in median_set]
+        Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
+                                        edge_label, False)
+        Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
+        dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
+                                                  Kmatrix=Kmatrix, gkernel=gkernel)
+        print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+        print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+        print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
+
+        # add distances for the image of exact median \psi.
+        dis_k_median_list = []
+        for idx, g in enumerate(Gn):
+            dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
+                                               [1 / len(Gn_median_set)] * len(Gn_median_set),
+                                               Kmatrix_median, withterm3=False))
+        dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
+        for i in range(len(Gn)):
+            for j in range(i, len(Gn)):
+                dis_mat_median[i, j] = dis_mat[i, j]
+                dis_mat_median[j, i] = dis_mat_median[i, j]
+        for i in range(len(Gn)):
+            dis_mat_median[i, -1] = dis_k_median_list[i]
+            dis_mat_median[-1, i] = dis_k_median_list[i]
+            
+    
+        # visualization.
+#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
+#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
+        visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
+                                draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
+                                median_set=median_set)
+        
+    
+def visualize_distances_in_ged_letter_h():
+    from fitDistance import compute_geds
+    from preimage.test_k_closest_graphs import reform_attributes
+    
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+#    Gn = Gn[0:50]
+    
+    # compute distance matrix
+#    median_set = [22, 29, 54, 74]
+    gkernel = 'structuralspkernel'
+    fit_method = 'expert'
+    ds_name = 'letter-h'
+    fname_medians = fit_method + '.' + gkernel
+    dir_output = 'results/xp_letter_h/'
+    k = 150
+    repeat = 0
+#    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
+    edit_costs = [3, 3, 1, 3, 3, 1]
+#    edit_costs = [7, 3, 5, 9, 2, 6]
+    
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    for i, (y, values) in enumerate(y_idx.items()):
+        print('\ny =', y)
+        
+        Gn = [Gn_original[g].copy() for g in values]
+        # add set median.
+        fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
+            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
+        set_median = loadGXL(fname_sm)
+        Gn.append(set_median)
+        # add generalized median (estimated pre-image.)
+        fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
+            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
+        gen_median = loadGXL(fname_gm)
+        Gn.append(gen_median)
+    
+    
+        # compute/load ged matrix.
+        # compute.
+        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+        params_ged = {'dataset': 'Letter', 'lib': 'gedlibpy', 'cost': 'CONSTANT', 
+                      'method': 'IPFP', 'algo_options': algo_options, 
+                      'stabilizer': None, 'edit_cost_constant': edit_costs}    
+        for g in Gn:
+            reform_attributes(g)
+        _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
+        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm', ged_mat=ged_mat)
+#        # load from file.
+#        gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm.npz')
+#        ged_mat = gmfile['ged_mat']
+#        # change medians.
+#        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
+#        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
+#                    'algo_options': algo_options, 'stabilizer': None, 
+#                    'edit_cost_constant': edit_costs}
+#        for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
+#            dis, _, _ = GED(Gn[idx], set_median, **params_ged)
+#            ged_mat[idx, -2] = dis
+#            ged_mat[-2, idx] = dis
+#            dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
+#            ged_mat[idx, -1] = dis
+#            ged_mat[-1, idx] = dis
+#        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm', 
+#                 ged_mat=ged_mat)
+
+    
+        # visualization.
+        median_set = range(0, len(values))
+        visualize_graph_dataset('ged', 'tsne', draw_figure, 
+                                draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
+                                median_set=median_set)
+
+
+if __name__ == '__main__':
+    visualize_distances_in_kernel_letter_h()
+#    visualize_distances_in_ged_letter_h()
+#    visualize_distances_in_kernel_monoterpenoides()
+#    visualize_distances_in_kernel_monoterpenoides()
+#    visualize_distances_in_kernel()
+#    visualize_distances_in_ged()
+    
+    
+    
+    
+    
+    
+    
+#def draw_figure_dis_k(ax, Gn_embedded, y_idx=None, legend=False):
+#    from matplotlib import colors as mcolors
+#    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
+##    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
+##              '#c6dbef', '#deebf7']
+#    for i, values in enumerate(y_idx.values()):
+#        for item in values:
+##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
+#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')        
+#    h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
+#    h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
+#    h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
+#    h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r', marker='+') # set median
+#    if legend:
+##    fig.subplots_adjust(bottom=0.17)
+#        ax.legend([h1, h2, h3, h4], ['k clostest graphs', 'true median', 'gen median', 'set median'])
+##    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
+##    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
+##            bbox_inches='tight')
+##    plt.show()
+    
+    
+    
+#def draw_figure_ged(ax, Gn_embedded, y_idx=None, legend=False):
+#    from matplotlib import colors as mcolors
+#    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
+##    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
+##              '#c6dbef', '#deebf7']
+#    for i, values in enumerate(y_idx.values()):
+#        for item in values:
+##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
+#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')        
+#    h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
+##    h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
+#    h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
+#    h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r', marker='+') # set median
+#    if legend:
+##    fig.subplots_adjust(bottom=0.17)
+#        ax.legend([h1, h3, h4], ['k clostest graphs', 'gen median', 'set median'])
+##    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
+##    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
+##            bbox_inches='tight')
+##    plt.show()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_fit_method.py b/gklearn/preimage/xp_fit_method.py
new file mode 100644
index 0000000..ead2786
--- /dev/null
+++ b/gklearn/preimage/xp_fit_method.py
@@ -0,0 +1,935 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 14 15:39:29 2020
+
+@author: ljia
+"""
+import numpy as np
+import random
+import csv
+from shutil import copyfile
+import networkx as nx
+import matplotlib.pyplot as plt
+import os
+import time
+
+from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
+from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
+from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
+from gklearn.preimage.find_best_k import getRelations
+
+
+def get_dataset(ds_name):
+    if ds_name == 'Letter-high': # node non-symb
+        dataset = 'cpp_ext/data/collections/Letter.xml'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' 
+        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
+        for G in Gn:
+            reform_attributes(G, na_names=['x', 'y'])
+            G.graph['node_labels'] = []
+            G.graph['edge_labels'] = []
+            G.graph['node_attrs'] = ['x', 'y']
+            G.graph['edge_attrs'] = []
+    elif ds_name == 'Letter-med': # node non-symb
+        dataset = 'cpp_ext/data/collections/Letter.xml'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/' 
+        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
+        for G in Gn:
+            reform_attributes(G, na_names=['x', 'y'])
+            G.graph['node_labels'] = []
+            G.graph['edge_labels'] = []
+            G.graph['node_attrs'] = ['x', 'y']
+            G.graph['edge_attrs'] = []
+    elif ds_name == 'Letter-low': # node non-symb
+        dataset = 'cpp_ext/data/collections/Letter.xml'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/' 
+        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
+        for G in Gn:
+            reform_attributes(G, na_names=['x', 'y'])
+            G.graph['node_labels'] = []
+            G.graph['edge_labels'] = []
+            G.graph['node_attrs'] = ['x', 'y']
+            G.graph['edge_attrs'] = []
+    elif ds_name == 'Fingerprint':
+#        dataset = 'cpp_ext/data/collections/Fingerprint.xml'
+#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
+#        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
+#        for G in Gn:
+#            reform_attributes(G)
+        dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
+        Gn, y_all = loadDataset(dataset)
+    elif ds_name == 'SYNTHETIC':
+        pass
+    elif ds_name == 'SYNTHETICnew':
+        dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
+#        dataset = '../../datasets/Letter-high/Letter-high_A.txt'
+#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
+        Gn, y_all = loadDataset(dataset)
+    elif ds_name == 'Synthie':
+        pass
+    elif ds_name == 'COIL-DEL':
+        dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
+        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
+        Gn, y_all = loadDataset(dataset)
+    elif ds_name == 'COIL-RAG':
+        pass
+    elif ds_name == 'COLORS-3':
+        pass
+    elif ds_name == 'FRANKENSTEIN':
+        pass
+    
+    return Gn, y_all, graph_dir
+
+
+def init_output_file(ds_name, gkernel, fit_method, dir_output):
+#    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
+    f_detail = open(dir_output + fn_output_detail, 'a')
+    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', 
+              'GED method', 'attr distance', 'fit method', 'k', 
+              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
+              'median set'])
+    f_detail.close()
+    
+#    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
+    f_summary = open(dir_output + fn_output_summary, 'a')
+    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', 
+              'GED method', 'attr distance', 'fit method', 'k', 
+              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
+              '# SOD SM -> GM', '# dis_k SM -> GM', 
+              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
+              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
+              'repeats better dis_k gi -> GM'])
+    f_summary.close()
+    
+    return fn_output_detail, fn_output_summary
+
+
+def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
+                                   Gn_data=None, k_dis_data=None, Kmatrix=None,
+                                   is_separate=False):
+    
+    # 1. set parameters.
+    print('1. setting parameters...')
+    ds_name = parameters['ds_name']
+    gkernel = parameters['gkernel']
+    edit_cost_name = parameters['edit_cost_name']
+    ged_method = parameters['ged_method']
+    attr_distance = parameters['attr_distance']
+    fit_method = parameters['fit_method']
+    init_ecc = parameters['init_ecc']
+
+    node_label = None
+    edge_label = None
+    dir_output = 'results/xp_fit_method/'    
+      
+    
+    # 2. get dataset.
+    print('2. getting dataset...')
+    if Gn_data is None:
+        Gn, y_all, graph_dir = get_dataset(ds_name)
+    else:
+        Gn = Gn_data[0]
+        y_all = Gn_data[1]
+        graph_dir = Gn_data[2]
+        
+    
+    # 3. compute kernel distance matrix.
+    print('3. computing kernel distance matrix...')
+    if k_dis_data is None:
+        dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, 
+            None, Kmatrix=Kmatrix, gkernel=gkernel)
+    else:
+#        dis_mat = k_dis_data[0]
+#        dis_max = k_dis_data[1]
+#        dis_min = k_dis_data[2]
+#        dis_mean = k_dis_data[3]
+#        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
+        pass
+
+
+    if save_results:
+        # create result files.
+        print('creating output files...')
+        fn_output_detail, fn_output_summary = init_output_file(ds_name, gkernel, 
+                                                               fit_method, dir_output)
+
+            
+    # start repeats.    
+    repeats = 1
+#    k_list = range(2, 11)
+    k_list = [0]
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    random.seed(1)
+    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
+    
+    for k in k_list:
+#        print('\n--------- k =', k, '----------')
+        
+        sod_sm_mean_list = []
+        sod_gm_mean_list = []
+        dis_k_sm_mean_list = []
+        dis_k_gm_mean_list = []
+        dis_k_gi_min_mean_list = []
+        time_fitting_mean_list = []
+        time_generating_mean_list = []
+        time_total_mean_list = []
+        
+        # 3. start generating and computing over targets.
+        print('4. starting generating and computing over targets......')
+        for i, (y, values) in enumerate(y_idx.items()):
+#            y = 'I'
+#            values = y_idx[y]
+#            values = values[0:10]            
+            print('\ny =', y)
+#            if y.strip() == 'A':
+#                continue
+            
+            k = len(values)
+            print('\n--------- k =', k, '----------')
+            
+            if k < 2:
+                print('\nk = ', k, ', skip.\n')
+                continue
+            
+            sod_sm_list = []
+            sod_gm_list = []
+            dis_k_sm_list = []
+            dis_k_gm_list = []
+            dis_k_gi_min_list = []
+            time_fitting_list = []
+            time_generating_list = []
+            time_total_list = []
+            nb_sod_sm2gm = [0, 0, 0]
+            nb_dis_k_sm2gm = [0, 0, 0]
+            nb_dis_k_gi2sm = [0, 0, 0]
+            nb_dis_k_gi2gm = [0, 0, 0]
+            repeats_better_sod_sm2gm = []
+            repeats_better_dis_k_sm2gm = []
+            repeats_better_dis_k_gi2sm = []
+            repeats_better_dis_k_gi2gm = []
+            
+            # get Gram matrix for this part of data.
+            if Kmatrix is not None:
+                if is_separate:
+                    Kmatrix_sub = Kmatrix[i].copy()
+                else:
+                    Kmatrix_sub = Kmatrix[values,:]
+                    Kmatrix_sub = Kmatrix_sub[:,values]
+            else:
+                Kmatrix_sub = None
+            
+            for repeat in range(repeats):
+                print('\nrepeat =', repeat)
+                random.seed(rdn_seed_list[repeat])
+                median_set_idx_idx = random.sample(range(0, len(values)), k)
+                median_set_idx = [values[idx] for idx in median_set_idx_idx]
+                print('median set: ', median_set_idx)
+                Gn_median = [Gn[g] for g in values]
+#                from notebooks.utils.plot_all_graphs import draw_Fingerprint_graph
+#                for Gn in Gn_median:
+#                    draw_Fingerprint_graph(Gn, save=None)
+                
+                # GENERATING & COMPUTING!!
+                res_sods, res_dis_ks, res_times = median_on_k_closest_graphs(Gn_median, 
+                        node_label, edge_label, 
+                        gkernel, k, fit_method=fit_method, graph_dir=graph_dir,
+                        edit_cost_constants=None, group_min=median_set_idx_idx, 
+                        dataset=ds_name, initial_solutions=initial_solutions,
+                        edit_cost_name=edit_cost_name, init_ecc=init_ecc,
+                        Kmatrix=Kmatrix_sub, parallel=False)
+                sod_sm = res_sods[0]
+                sod_gm = res_sods[1] 
+                dis_k_sm = res_dis_ks[0]
+                dis_k_gm = res_dis_ks[1]
+                dis_k_gi = res_dis_ks[2]
+                dis_k_gi_min = res_dis_ks[3]
+                idx_dis_k_gi_min = res_dis_ks[4]
+                time_fitting = res_times[0]
+                time_generating = res_times[1]                    
+                
+                # write result detail.
+                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
+                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
+                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
+                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
+                if save_results:
+                    f_detail = open(dir_output + fn_output_detail, 'a')
+                    csv.writer(f_detail).writerow([ds_name, gkernel, 
+                              edit_cost_name, ged_method, attr_distance,
+                              fit_method, k, y, repeat,
+                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
+                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
+                              dis_k_gi2gm, time_fitting, time_generating,
+                              time_fitting + time_generating, median_set_idx])
+                    f_detail.close()
+                
+                # compute result summary.
+                sod_sm_list.append(sod_sm)
+                sod_gm_list.append(sod_gm)
+                dis_k_sm_list.append(dis_k_sm)
+                dis_k_gm_list.append(dis_k_gm)
+                dis_k_gi_min_list.append(dis_k_gi_min)
+                time_fitting_list.append(time_fitting)
+                time_generating_list.append(time_generating)
+                time_total_list.append(time_fitting + time_generating)
+                # # SOD SM -> GM
+                if sod_sm > sod_gm:
+                    nb_sod_sm2gm[0] += 1
+                    repeats_better_sod_sm2gm.append(repeat)
+                elif sod_sm == sod_gm:
+                    nb_sod_sm2gm[1] += 1
+                elif sod_sm < sod_gm:
+                    nb_sod_sm2gm[2] += 1
+                # # dis_k SM -> GM
+                if dis_k_sm > dis_k_gm:
+                    nb_dis_k_sm2gm[0] += 1
+                    repeats_better_dis_k_sm2gm.append(repeat)
+                elif dis_k_sm == dis_k_gm:
+                    nb_dis_k_sm2gm[1] += 1
+                elif dis_k_sm < dis_k_gm:
+                    nb_dis_k_sm2gm[2] += 1
+                # # dis_k gi -> SM
+                if dis_k_gi_min > dis_k_sm:
+                    nb_dis_k_gi2sm[0] += 1
+                    repeats_better_dis_k_gi2sm.append(repeat)
+                elif dis_k_gi_min == dis_k_sm:
+                    nb_dis_k_gi2sm[1] += 1
+                elif dis_k_gi_min < dis_k_sm:
+                    nb_dis_k_gi2sm[2] += 1
+                # # dis_k gi -> GM
+                if dis_k_gi_min > dis_k_gm:
+                    nb_dis_k_gi2gm[0] += 1
+                    repeats_better_dis_k_gi2gm.append(repeat)
+                elif dis_k_gi_min == dis_k_gm:
+                    nb_dis_k_gi2gm[1] += 1
+                elif dis_k_gi_min < dis_k_gm:
+                    nb_dis_k_gi2gm[2] += 1
+                    
+                # save median graphs.
+                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
+                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
+                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
+                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
+                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
+                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
+                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
+#                reform_attributes(G_best_kernel)
+                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
+                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default')
+                
+                # plot median graphs.
+                if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
+                    set_median = loadGXL(fn_pre_sm_new + '.gxl')
+                    gen_median = loadGXL(fn_pre_gm_new + '.gxl')                
+                    draw_Letter_graph(set_median, fn_pre_sm_new)
+                    draw_Letter_graph(gen_median, fn_pre_gm_new)
+                    draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
+                    
+            # write result summary for each letter. 
+            sod_sm_mean_list.append(np.mean(sod_sm_list))
+            sod_gm_mean_list.append(np.mean(sod_gm_list))
+            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
+            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
+            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
+            time_fitting_mean_list.append(np.mean(time_fitting_list))
+            time_generating_mean_list.append(np.mean(time_generating_list))
+            time_total_mean_list.append(np.mean(time_total_list))
+            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
+            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
+            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            if save_results:
+                f_summary = open(dir_output + fn_output_summary, 'a')
+                csv.writer(f_summary).writerow([ds_name, gkernel, 
+                          edit_cost_name, ged_method, attr_distance,
+                          fit_method, k, y,
+                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
+                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
+                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, 
+                          time_fitting_mean_list[-1], time_generating_mean_list[-1],
+                          time_total_mean_list[-1], nb_sod_sm2gm, 
+                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
+                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
+                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
+                f_summary.close()
+            
+
+        # write result summary for each letter. 
+        sod_sm_mean = np.mean(sod_sm_mean_list)
+        sod_gm_mean = np.mean(sod_gm_mean_list)
+        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
+        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
+        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
+        time_fitting_mean = np.mean(time_fitting_list)
+        time_generating_mean = np.mean(time_generating_list)
+        time_total_mean = np.mean(time_total_list)
+        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
+        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
+        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
+        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
+        if save_results:
+            f_summary = open(dir_output + fn_output_summary, 'a')
+            csv.writer(f_summary).writerow([ds_name, gkernel, 
+                      edit_cost_name, ged_method, attr_distance,
+                      fit_method, k, 'all',
+                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
+                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                      dis_k_gi2sm_mean, dis_k_gi2gm_mean,
+                      time_fitting_mean, time_generating_mean, time_total_mean])
+            f_summary.close()
+        
+    print('\ncomplete.')
+    
+    
+#Dessin median courrant
+def draw_Letter_graph(graph, file_prefix):
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
+    nx.draw_networkx(graph, pos)
+    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
+#    plt.show()
+    plt.clf()
+    
+    
+def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
+    
+    if is_separate:
+        print('the Gram matrix is computed for each class.')
+        y_idx = get_same_item_indices(y_all)
+        Kmatrix = []
+        run_time = []
+        k_dis_data = []
+        for i, (y, values) in enumerate(y_idx.items()):
+            print('The ', str(i), ' class:')
+            Gn_i = [Gn[val] for val in values]
+            time0 = time.time()            
+            Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
+            run_time.append(time.time() - time0)
+            k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, 
+                Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
+        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
+        dis_max = np.max([item[1] for item in k_dis_data])
+        dis_min = np.min([item[2] for item in k_dis_data])
+        dis_mean = np.mean([item[3] for item in k_dis_data])
+        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
+              dis_mean)
+
+    else:
+        time0 = time.time()
+        Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
+        run_time = time.time() - time0
+        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
+        k_dis_data = kernel_distance_matrix(Gn, None, None, 
+            Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+        print('the Gram matrix is computed for the whole dataset.')
+        print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], 
+              k_dis_data[2], k_dis_data[3])
+    
+    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
+    return Kmatrix, run_time, k_dis_data
+    
+
+if __name__ == "__main__":
+#    #### xp 1: Letter-high, spkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-high'
+#    gkernel = 'spkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    # compute pair distances.
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=None, gkernel=gkernel, verbose=True)
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    # fitting and computing.
+#    fit_methods = ['random', 'expert', 'k-graphs']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method}
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
+        
+        
+#    #### xp 2: Letter-high, sspkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-high'
+#    gkernel = 'structuralspkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    # compute pair distances.
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=None, gkernel=gkernel, verbose=True)
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    # fitting and computing.
+#    fit_methods = ['random', 'expert', 'k-graphs']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
+        
+        
+#    #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
+#    run_time = gmfile['run_time']
+#    # normalization
+#    Kmatrix_diag = Kmatrix.diagonal().copy()
+#    for i in range(len(Kmatrix)):
+#        for j in range(i, len(Kmatrix)):
+#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
+#            Kmatrix[j][i] = Kmatrix[i][j]
+##    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
+##             Kmatrix=Kmatrix, run_time=run_time)
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'SYNTHETICnew'
+#    gkernel = 'structuralspkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without nodes and edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
+#          and nx.number_of_edges(G) != 0)]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:10]
+##    y_all = y_all[0:10]
+#    for G in Gn:
+#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
+#    # compute pair distances.
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'NON_SYMBOLIC',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method}
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=1,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+        
+#    ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
+#    # normalization
+#    Kmatrix_diag = Kmatrix.diagonal().copy()
+#    for i in range(len(Kmatrix)):
+#        for j in range(i, len(Kmatrix)):
+#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
+#            Kmatrix[j][i] = Kmatrix[i][j]
+#    run_time = 21821.35
+#    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
+#             Kmatrix=Kmatrix, run_time=run_time)
+#    
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'SYNTHETICnew'
+#    gkernel = 'spkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+##    # remove graphs without nodes and edges.
+##    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
+##          and nx.number_of_edges(G) != 0)]
+##    idx = [G[0] for G in Gn]
+##    Gn = [G[1] for G in Gn]
+##    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:5]
+##    y_all = y_all[0:5]
+#    for G in Gn:
+#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
+#    
+#    # compute/read Gram matrix and pair distances.
+##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
+##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+##         Kmatrix=Kmatrix)
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
+#    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'NON_SYMBOLIC',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method}
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=1,
+#                                       Gn_data=[Gn, y_all, graph_dir],
+#                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+    
+    
+#    #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Fingerprint'
+#    gkernel = 'structuralspkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without nodes and edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
+##          and nx.number_of_edges(G) != 0)]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+#    y_idx = get_same_item_indices(y_all)
+#    # remove unused labels.
+#    for G in Gn:
+#        G.graph['edge_attrs'] = []
+#        for edge in G.edges:
+#            del G.edges[edge]['attributes']
+#            del G.edges[edge]['orient']
+#            del G.edges[edge]['angle']
+##    Gn = Gn[805:815]
+##    y_all = y_all[805:815]
+#    for G in Gn:
+#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
+#            
+#    # compute/read Gram matrix and pair distances.
+##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+##         Kmatrix=Kmatrix)
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+        
+#    #### xp 6: Letter-med, sspkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-med'
+#    gkernel = 'structuralspkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    
+#    # compute/read Gram matrix and pair distances.
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+##    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+        
+#    #### xp 7: Letter-low, sspkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-low'
+#    gkernel = 'structuralspkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    
+#    # compute/read Gram matrix and pair distances.
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+##    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+    
+#    #### xp 8: Letter-med, spkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-med'
+#    gkernel = 'spkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without nodes and edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
+#          and nx.number_of_edges(G) != 0)]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    
+#    # compute/read Gram matrix and pair distances.
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+##    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+
+#    #### xp 9: Letter-low, spkernel.
+#    # load dataset.
+#    print('getting dataset and computing kernel distance matrix first...')
+#    ds_name = 'Letter-low'
+#    gkernel = 'spkernel'
+#    Gn, y_all, graph_dir = get_dataset(ds_name)
+#    # remove graphs without nodes and edges.
+#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
+#          and nx.number_of_edges(G) != 0)]
+#    idx = [G[0] for G in Gn]
+#    Gn = [G[1] for G in Gn]
+#    y_all = [y_all[i] for i in idx]
+##    Gn = Gn[0:50]
+##    y_all = y_all[0:50]
+#    
+#    # compute/read Gram matrix and pair distances.
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+##    Kmatrix = gmfile['Kmatrix']
+##    run_time = gmfile['run_time']
+##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+##    Kmatrix = np.zeros((len(Gn), len(Gn)))
+##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+#    
+#    # fitting and computing.
+#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
+#    for fit_method in fit_methods:
+#        print('\n-------------------------------------')
+#        print('fit method:', fit_method)
+#        parameters = {'ds_name': ds_name,
+#                      'gkernel': gkernel,
+#                      'edit_cost_name': 'LETTER2',
+#                      'ged_method': 'mIPFP',
+#                      'attr_distance': 'euclidean',
+#                      'fit_method': fit_method,
+#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
+#        print('parameters: ', parameters)
+#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+#                                       initial_solutions=40,
+#                                       Gn_data = [Gn, y_all, graph_dir],
+#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
+#                                       Kmatrix=Kmatrix)
+        
+        
+    #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
+    # load dataset.
+    print('getting dataset and computing kernel distance matrix first...')
+    ds_name = 'COIL-DEL'
+    gkernel = 'structuralspkernel'
+    Gn, y_all, graph_dir = get_dataset(ds_name)
+    # remove graphs without nodes and edges.
+    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
+#          and nx.number_of_edges(G) != 0)]
+    idx = [G[0] for G in Gn]
+    Gn = [G[1] for G in Gn]
+    y_all = [y_all[i] for i in idx]
+    # remove unused labels.
+    for G in Gn:
+        G.graph['edge_labels'] = []
+        for edge in G.edges:
+            del G.edges[edge]['bond_type']
+            del G.edges[edge]['valence']
+#    Gn = Gn[805:815]
+#    y_all = y_all[805:815]
+    for G in Gn:
+        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
+            
+    # compute/read Gram matrix and pair distances.
+    is_separate = True
+    Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, 
+                                                              y_all, 
+                                                              gkernel, 
+                                                              parallel='imap_unordered',
+                                                              is_separate=is_separate)
+#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
+#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
+#         Kmatrix=Kmatrix)
+#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
+#    Kmatrix = gmfile['Kmatrix']
+#    run_time = gmfile['run_time']
+#    Kmatrix = Kmatrix[[0,1,2,3,4],:]
+#    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
+#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
+#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
+#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
+#    Kmatrix = np.zeros((len(Gn), len(Gn)))
+#    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
+    
+    # fitting and computing.
+    fit_methods = ['k-graphs', 'random', 'random', 'random']
+    for fit_method in fit_methods:
+        print('\n-------------------------------------')
+        print('fit method:', fit_method)
+        parameters = {'ds_name': ds_name,
+                      'gkernel': gkernel,
+                      'edit_cost_name': 'LETTER2',
+                      'ged_method': 'mIPFP',
+                      'attr_distance': 'euclidean',
+                      'fit_method': fit_method,
+                      'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
+        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
+                                       initial_solutions=40,
+                                       Gn_data=[Gn, y_all, graph_dir],
+                                       k_dis_data=k_dis_data,
+                                       Kmatrix=Kmatrix, 
+                                       is_separate=is_separate)
\ No newline at end of file
diff --git a/gklearn/preimage/xp_letter_h.py b/gklearn/preimage/xp_letter_h.py
new file mode 100644
index 0000000..1e16fcf
--- /dev/null
+++ b/gklearn/preimage/xp_letter_h.py
@@ -0,0 +1,476 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue Jan 14 15:39:29 2020
+
+@author: ljia
+"""
+import numpy as np
+import random
+import csv
+from shutil import copyfile
+import networkx as nx
+import matplotlib.pyplot as plt
+
+from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
+from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
+from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
+from gklearn.preimage.find_best_k import getRelations
+
+
+def xp_letter_h_LETTER2_cost():
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+    
+    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
+    for G in Gn:
+        reform_attributes(G)
+#    ds = {'name': 'Letter-high', 
+#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'structuralspkernel'
+    node_label = None
+    edge_label = None
+    ds_name = 'letter-h'
+    dir_output = 'results/xp_letter_h/'
+    save_results = True
+    cost = 'LETTER2'
+    
+    repeats = 1
+#    k_list = range(2, 11)
+    k_list = [150]
+    fit_method = 'k-graphs'
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    
+    if save_results:
+        # create result files.
+        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+        f_detail = open(dir_output + fn_output_detail, 'a')
+        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+                  'dis_k gi -> GM', 'median set'])
+        f_detail.close()
+        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+        f_summary = open(dir_output + fn_output_summary, 'a')
+        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
+                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
+                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
+                  'repeats better dis_k gi -> GM'])
+        f_summary.close()
+    
+    random.seed(1)
+    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
+    
+    for k in k_list:
+        print('\n--------- k =', k, '----------')
+        
+        sod_sm_mean_list = []
+        sod_gm_mean_list = []
+        dis_k_sm_mean_list = []
+        dis_k_gm_mean_list = []
+        dis_k_gi_min_mean_list = []
+#        nb_sod_sm2gm = [0, 0, 0]
+#        nb_dis_k_sm2gm = [0, 0, 0]
+#        nb_dis_k_gi2sm = [0, 0, 0]
+#        nb_dis_k_gi2gm = [0, 0, 0]
+#        repeats_better_sod_sm2gm = []
+#        repeats_better_dis_k_sm2gm = []
+#        repeats_better_dis_k_gi2sm = []
+#        repeats_better_dis_k_gi2gm = []
+        
+        for i, (y, values) in enumerate(y_idx.items()):
+            print('\ny =', y)
+#            y = 'F'
+#            values = y_idx[y]
+#            values = values[0:10]
+            
+            k = len(values)
+            
+            sod_sm_list = []
+            sod_gm_list = []
+            dis_k_sm_list = []
+            dis_k_gm_list = []
+            dis_k_gi_min_list = []
+            nb_sod_sm2gm = [0, 0, 0]
+            nb_dis_k_sm2gm = [0, 0, 0]
+            nb_dis_k_gi2sm = [0, 0, 0]
+            nb_dis_k_gi2gm = [0, 0, 0]
+            repeats_better_sod_sm2gm = []
+            repeats_better_dis_k_sm2gm = []
+            repeats_better_dis_k_gi2sm = []
+            repeats_better_dis_k_gi2gm = []
+            
+            for repeat in range(repeats):
+                print('\nrepeat =', repeat)
+                random.seed(rdn_seed_list[repeat])
+                median_set_idx_idx = random.sample(range(0, len(values)), k)
+                median_set_idx = [values[idx] for idx in median_set_idx_idx]
+                print('median set: ', median_set_idx)
+                Gn_median = [Gn[g] for g in values]
+        
+                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
+                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
+                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
+                        edit_costs=None, group_min=median_set_idx_idx, 
+                        dataset='Letter', cost=cost, parallel=False)
+                    
+                # write result detail.
+                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
+                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
+                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
+                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
+                if save_results:
+                    f_detail = open(dir_output + fn_output_detail, 'a')
+                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
+                              y, repeat,
+                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
+                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
+                              dis_k_gi2gm, median_set_idx])
+                    f_detail.close()
+                
+                # compute result summary.
+                sod_sm_list.append(sod_sm)
+                sod_gm_list.append(sod_gm)
+                dis_k_sm_list.append(dis_k_sm)
+                dis_k_gm_list.append(dis_k_gm)
+                dis_k_gi_min_list.append(dis_k_gi_min)
+                # # SOD SM -> GM
+                if sod_sm > sod_gm:
+                    nb_sod_sm2gm[0] += 1
+                    repeats_better_sod_sm2gm.append(repeat)
+                elif sod_sm == sod_gm:
+                    nb_sod_sm2gm[1] += 1
+                elif sod_sm < sod_gm:
+                    nb_sod_sm2gm[2] += 1
+                # # dis_k SM -> GM
+                if dis_k_sm > dis_k_gm:
+                    nb_dis_k_sm2gm[0] += 1
+                    repeats_better_dis_k_sm2gm.append(repeat)
+                elif dis_k_sm == dis_k_gm:
+                    nb_dis_k_sm2gm[1] += 1
+                elif dis_k_sm < dis_k_gm:
+                    nb_dis_k_sm2gm[2] += 1
+                # # dis_k gi -> SM
+                if dis_k_gi_min > dis_k_sm:
+                    nb_dis_k_gi2sm[0] += 1
+                    repeats_better_dis_k_gi2sm.append(repeat)
+                elif dis_k_gi_min == dis_k_sm:
+                    nb_dis_k_gi2sm[1] += 1
+                elif dis_k_gi_min < dis_k_sm:
+                    nb_dis_k_gi2sm[2] += 1
+                # # dis_k gi -> GM
+                if dis_k_gi_min > dis_k_gm:
+                    nb_dis_k_gi2gm[0] += 1
+                    repeats_better_dis_k_gi2gm.append(repeat)
+                elif dis_k_gi_min == dis_k_gm:
+                    nb_dis_k_gi2gm[1] += 1
+                elif dis_k_gi_min < dis_k_gm:
+                    nb_dis_k_gi2gm[2] += 1
+                    
+                # save median graphs.
+                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
+                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
+                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
+                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
+                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
+                reform_attributes(G_best_kernel)
+                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
+                
+                # plot median graphs.
+                set_median = loadGXL(fn_pre_sm_new + '.gxl')
+                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
+                draw_Letter_graph(set_median, fn_pre_sm_new)
+                draw_Letter_graph(gen_median, fn_pre_gm_new)
+                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
+                    
+            # write result summary for each letter. 
+            sod_sm_mean_list.append(np.mean(sod_sm_list))
+            sod_gm_mean_list.append(np.mean(sod_gm_list))
+            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
+            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
+            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
+            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
+            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
+            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            if save_results:
+                f_summary = open(dir_output + fn_output_summary, 'a')
+                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
+                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
+                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
+                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
+                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
+                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
+                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
+                f_summary.close()
+            
+
+        # write result summary for each letter. 
+        sod_sm_mean = np.mean(sod_sm_mean_list)
+        sod_gm_mean = np.mean(sod_gm_mean_list)
+        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
+        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
+        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
+        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
+        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
+        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
+        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
+        if save_results:
+            f_summary = open(dir_output + fn_output_summary, 'a')
+            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
+                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
+                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
+            f_summary.close()
+        
+    print('\ncomplete.')
+
+
+def xp_letter_h():
+    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
+    for G in Gn:
+        reform_attributes(G)
+#    ds = {'name': 'Letter-high', 
+#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'structuralspkernel'
+    node_label = None
+    edge_label = None
+    ds_name = 'letter-h'
+    dir_output = 'results/xp_letter_h/'
+    save_results = False
+    
+    repeats = 1
+#    k_list = range(2, 11)
+    k_list = [150]
+    fit_method = 'k-graphs'
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    
+    if save_results:
+        # create result files.
+        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+        f_detail = open(dir_output + fn_output_detail, 'a')
+        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+                  'dis_k gi -> GM', 'median set'])
+        f_detail.close()
+        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+        f_summary = open(dir_output + fn_output_summary, 'a')
+        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
+                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
+                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
+                  'repeats better dis_k gi -> GM'])
+        f_summary.close()
+    
+    random.seed(1)
+    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
+    
+    for k in k_list:
+        print('\n--------- k =', k, '----------')
+        
+        sod_sm_mean_list = []
+        sod_gm_mean_list = []
+        dis_k_sm_mean_list = []
+        dis_k_gm_mean_list = []
+        dis_k_gi_min_mean_list = []
+#        nb_sod_sm2gm = [0, 0, 0]
+#        nb_dis_k_sm2gm = [0, 0, 0]
+#        nb_dis_k_gi2sm = [0, 0, 0]
+#        nb_dis_k_gi2gm = [0, 0, 0]
+#        repeats_better_sod_sm2gm = []
+#        repeats_better_dis_k_sm2gm = []
+#        repeats_better_dis_k_gi2sm = []
+#        repeats_better_dis_k_gi2gm = []
+        
+        for i, (y, values) in enumerate(y_idx.items()):
+            print('\ny =', y)
+#            y = 'N'
+#            values = y_idx[y]
+#            values = values[0:10]
+            
+            k = len(values)
+            
+            sod_sm_list = []
+            sod_gm_list = []
+            dis_k_sm_list = []
+            dis_k_gm_list = []
+            dis_k_gi_min_list = []
+            nb_sod_sm2gm = [0, 0, 0]
+            nb_dis_k_sm2gm = [0, 0, 0]
+            nb_dis_k_gi2sm = [0, 0, 0]
+            nb_dis_k_gi2gm = [0, 0, 0]
+            repeats_better_sod_sm2gm = []
+            repeats_better_dis_k_sm2gm = []
+            repeats_better_dis_k_gi2sm = []
+            repeats_better_dis_k_gi2gm = []
+            
+            for repeat in range(repeats):
+                print('\nrepeat =', repeat)
+                random.seed(rdn_seed_list[repeat])
+                median_set_idx_idx = random.sample(range(0, len(values)), k)
+                median_set_idx = [values[idx] for idx in median_set_idx_idx]
+                print('median set: ', median_set_idx)
+                Gn_median = [Gn[g] for g in values]
+        
+                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
+                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
+                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
+                        edit_costs=None, group_min=median_set_idx_idx, 
+                        dataset='Letter', parallel=False)
+                    
+                # write result detail.
+                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
+                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
+                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
+                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
+                if save_results:
+                    f_detail = open(dir_output + fn_output_detail, 'a')
+                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
+                              y, repeat,
+                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
+                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
+                              dis_k_gi2gm, median_set_idx])
+                    f_detail.close()
+                
+                # compute result summary.
+                sod_sm_list.append(sod_sm)
+                sod_gm_list.append(sod_gm)
+                dis_k_sm_list.append(dis_k_sm)
+                dis_k_gm_list.append(dis_k_gm)
+                dis_k_gi_min_list.append(dis_k_gi_min)
+                # # SOD SM -> GM
+                if sod_sm > sod_gm:
+                    nb_sod_sm2gm[0] += 1
+                    repeats_better_sod_sm2gm.append(repeat)
+                elif sod_sm == sod_gm:
+                    nb_sod_sm2gm[1] += 1
+                elif sod_sm < sod_gm:
+                    nb_sod_sm2gm[2] += 1
+                # # dis_k SM -> GM
+                if dis_k_sm > dis_k_gm:
+                    nb_dis_k_sm2gm[0] += 1
+                    repeats_better_dis_k_sm2gm.append(repeat)
+                elif dis_k_sm == dis_k_gm:
+                    nb_dis_k_sm2gm[1] += 1
+                elif dis_k_sm < dis_k_gm:
+                    nb_dis_k_sm2gm[2] += 1
+                # # dis_k gi -> SM
+                if dis_k_gi_min > dis_k_sm:
+                    nb_dis_k_gi2sm[0] += 1
+                    repeats_better_dis_k_gi2sm.append(repeat)
+                elif dis_k_gi_min == dis_k_sm:
+                    nb_dis_k_gi2sm[1] += 1
+                elif dis_k_gi_min < dis_k_sm:
+                    nb_dis_k_gi2sm[2] += 1
+                # # dis_k gi -> GM
+                if dis_k_gi_min > dis_k_gm:
+                    nb_dis_k_gi2gm[0] += 1
+                    repeats_better_dis_k_gi2gm.append(repeat)
+                elif dis_k_gi_min == dis_k_gm:
+                    nb_dis_k_gi2gm[1] += 1
+                elif dis_k_gi_min < dis_k_gm:
+                    nb_dis_k_gi2gm[2] += 1
+                    
+                # save median graphs.
+                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
+                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
+                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
+                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
+                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
+                reform_attributes(G_best_kernel)
+                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
+                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
+                
+                # plot median graphs.
+                set_median = loadGXL(fn_pre_sm_new + '.gxl')
+                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
+                draw_Letter_graph(set_median, fn_pre_sm_new)
+                draw_Letter_graph(gen_median, fn_pre_gm_new)
+                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
+                    
+            # write result summary for each letter. 
+            sod_sm_mean_list.append(np.mean(sod_sm_list))
+            sod_gm_mean_list.append(np.mean(sod_gm_list))
+            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
+            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
+            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
+            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
+            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
+            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            if save_results:
+                f_summary = open(dir_output + fn_output_summary, 'a')
+                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
+                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
+                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
+                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
+                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
+                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
+                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
+                f_summary.close()
+            
+
+        # write result summary for each letter. 
+        sod_sm_mean = np.mean(sod_sm_mean_list)
+        sod_gm_mean = np.mean(sod_gm_mean_list)
+        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
+        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
+        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
+        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
+        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
+        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
+        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
+        if save_results:
+            f_summary = open(dir_output + fn_output_summary, 'a')
+            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
+                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
+                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
+            f_summary.close()
+        
+    print('\ncomplete.')
+    
+    
+#Dessin median courrant
+def draw_Letter_graph(graph, file_prefix):
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
+    nx.draw_networkx(graph, pos)
+    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
+#    plt.show()
+    plt.clf()
+        
+
+if __name__ == "__main__":
+#    xp_letter_h()
+    xp_letter_h_LETTER2_cost()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_monoterpenoides.py b/gklearn/preimage/xp_monoterpenoides.py
new file mode 100644
index 0000000..2270471
--- /dev/null
+++ b/gklearn/preimage/xp_monoterpenoides.py
@@ -0,0 +1,249 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Jan 16 11:03:11 2020
+
+@author: ljia
+"""
+
+import numpy as np
+import random
+import csv
+from shutil import copyfile
+import networkx as nx
+import matplotlib.pyplot as plt
+
+from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
+from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
+from gklearn.preimage.utils import get_same_item_indices
+from gklearn.preimage.find_best_k import getRelations
+
+def xp_monoterpenoides():
+    import os
+
+    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
+          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
+    Gn, y_all = loadDataset(ds['dataset'])
+#    ds = {'name': 'Letter-high', 
+#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
+#    Gn, y_all = loadDataset(ds['dataset'])
+#    Gn = Gn[0:50]
+    gkernel = 'treeletkernel'
+    node_label = 'atom'
+    edge_label = 'bond_type'
+    ds_name = 'monoterpenoides'
+    dir_output = 'results/xp_monoterpenoides/'
+    
+    repeats = 1
+#    k_list = range(2, 11)
+    k_list = [0]
+    fit_method = 'k-graphs'
+    # get indices by classes.
+    y_idx = get_same_item_indices(y_all)
+    
+    # create result files.
+    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+    f_detail = open(dir_output + fn_output_detail, 'a')
+    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM', 'median set'])
+    f_detail.close()
+    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
+    f_summary = open(dir_output + fn_output_summary, 'a')
+    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
+              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
+              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
+              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
+              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
+              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
+              'repeats better dis_k gi -> GM'])
+    f_summary.close()
+    
+    random.seed(1)
+    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
+    
+    for k in k_list:
+        print('\n--------- k =', k, '----------')
+        
+        sod_sm_mean_list = []
+        sod_gm_mean_list = []
+        dis_k_sm_mean_list = []
+        dis_k_gm_mean_list = []
+        dis_k_gi_min_mean_list = []
+#        nb_sod_sm2gm = [0, 0, 0]
+#        nb_dis_k_sm2gm = [0, 0, 0]
+#        nb_dis_k_gi2sm = [0, 0, 0]
+#        nb_dis_k_gi2gm = [0, 0, 0]
+#        repeats_better_sod_sm2gm = []
+#        repeats_better_dis_k_sm2gm = []
+#        repeats_better_dis_k_gi2sm = []
+#        repeats_better_dis_k_gi2gm = []
+        
+        for i, (y, values) in enumerate(y_idx.items()):
+            print('\ny =', y)
+#            y = 'I'
+#            values = y_idx[y]
+            
+            k = len(values)
+#            k = kkk
+            
+            sod_sm_list = []
+            sod_gm_list = []
+            dis_k_sm_list = []
+            dis_k_gm_list = []
+            dis_k_gi_min_list = []
+            nb_sod_sm2gm = [0, 0, 0]
+            nb_dis_k_sm2gm = [0, 0, 0]
+            nb_dis_k_gi2sm = [0, 0, 0]
+            nb_dis_k_gi2gm = [0, 0, 0]
+            repeats_better_sod_sm2gm = []
+            repeats_better_dis_k_sm2gm = []
+            repeats_better_dis_k_gi2sm = []
+            repeats_better_dis_k_gi2gm = []
+            
+            for repeat in range(repeats):
+                print('\nrepeat =', repeat)
+                random.seed(rdn_seed_list[repeat])
+                median_set_idx_idx = random.sample(range(0, len(values)), k)
+                median_set_idx = [values[idx] for idx in median_set_idx_idx]
+                print('median set: ', median_set_idx)
+                Gn_median = [Gn[g] for g in values]
+        
+                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
+                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
+                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
+                        edit_costs=None, group_min=median_set_idx_idx, 
+                        dataset=ds_name, parallel=False)
+                    
+                # write result detail.
+                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
+                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
+                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
+                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
+                f_detail = open(dir_output + fn_output_detail, 'a')
+                csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
+                          y, repeat,
+                          sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
+                          dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
+                          dis_k_gi2gm, median_set_idx])
+                f_detail.close()
+                
+                # compute result summary.
+                sod_sm_list.append(sod_sm)
+                sod_gm_list.append(sod_gm)
+                dis_k_sm_list.append(dis_k_sm)
+                dis_k_gm_list.append(dis_k_gm)
+                dis_k_gi_min_list.append(dis_k_gi_min)
+                # # SOD SM -> GM
+                if sod_sm > sod_gm:
+                    nb_sod_sm2gm[0] += 1
+                    repeats_better_sod_sm2gm.append(repeat)
+                elif sod_sm == sod_gm:
+                    nb_sod_sm2gm[1] += 1
+                elif sod_sm < sod_gm:
+                    nb_sod_sm2gm[2] += 1
+                # # dis_k SM -> GM
+                if dis_k_sm > dis_k_gm:
+                    nb_dis_k_sm2gm[0] += 1
+                    repeats_better_dis_k_sm2gm.append(repeat)
+                elif dis_k_sm == dis_k_gm:
+                    nb_dis_k_sm2gm[1] += 1
+                elif dis_k_sm < dis_k_gm:
+                    nb_dis_k_sm2gm[2] += 1
+                # # dis_k gi -> SM
+                if dis_k_gi_min > dis_k_sm:
+                    nb_dis_k_gi2sm[0] += 1
+                    repeats_better_dis_k_gi2sm.append(repeat)
+                elif dis_k_gi_min == dis_k_sm:
+                    nb_dis_k_gi2sm[1] += 1
+                elif dis_k_gi_min < dis_k_sm:
+                    nb_dis_k_gi2sm[2] += 1
+                # # dis_k gi -> GM
+                if dis_k_gi_min > dis_k_gm:
+                    nb_dis_k_gi2gm[0] += 1
+                    repeats_better_dis_k_gi2gm.append(repeat)
+                elif dis_k_gi_min == dis_k_gm:
+                    nb_dis_k_gi2gm[1] += 1
+                elif dis_k_gi_min < dis_k_gm:
+                    nb_dis_k_gi2gm[2] += 1
+                    
+                # save median graphs.
+                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
+                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
+                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
+                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
+                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
+                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
+                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
+#                reform_attributes(G_best_kernel)
+                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
+                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
+                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib')
+                
+#                # plot median graphs.
+#                set_median = loadGXL(fn_pre_sm_new + '.gxl')
+#                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
+#                draw_Letter_graph(set_median, fn_pre_sm_new)
+#                draw_Letter_graph(gen_median, fn_pre_gm_new)
+#                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
+                    
+            # write result summary for each letter. 
+            sod_sm_mean_list.append(np.mean(sod_sm_list))
+            sod_gm_mean_list.append(np.mean(sod_gm_list))
+            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
+            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
+            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
+            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
+            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
+            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
+            f_summary = open(dir_output + fn_output_summary, 'a')
+            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
+                      sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
+                      dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
+                      dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                      dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
+                      nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
+                      repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
+                      repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
+            f_summary.close()
+            
+
+        # write result summary for each letter. 
+        sod_sm_mean = np.mean(sod_sm_mean_list)
+        sod_gm_mean = np.mean(sod_gm_mean_list)
+        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
+        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
+        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
+        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
+        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
+        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
+        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
+        f_summary = open(dir_output + fn_output_summary, 'a')
+        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
+                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
+                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
+                  dis_k_gi2sm_mean, dis_k_gi2gm_mean])
+        f_summary.close()
+            
+        
+    print('\ncomplete.')
+    
+    
+#Dessin median courrant
+def draw_Letter_graph(graph, file_prefix):
+    plt.figure()
+    pos = {}
+    for n in graph.nodes:
+        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
+    nx.draw_networkx(graph, pos)
+    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
+#    plt.show()
+    plt.clf()
+    
+
+if __name__ == "__main__":
+    xp_monoterpenoides()
\ No newline at end of file