From 5fe81a932b96b647d773939175784bce5f703413 Mon Sep 17 00:00:00 2001
From: jajupmochi <jajupmochi@gmail.com>
Date: Fri, 27 Mar 2020 09:34:26 +0100
Subject: [PATCH] clear repo: remove preimage.

---
 gklearn/preimage/common_types.py                |  17 -
 gklearn/preimage/cpp2python.py                  | 134 ----
 gklearn/preimage/find_best_k.py                 | 170 -----
 gklearn/preimage/fitDistance.py                 | 430 -----------
 gklearn/preimage/ged.py                         | 467 ------------
 gklearn/preimage/iam.py                         | 775 -------------------
 gklearn/preimage/knn.py                         | 114 ---
 gklearn/preimage/libs.py                        |   6 -
 gklearn/preimage/median.py                      | 218 ------
 gklearn/preimage/median_benoit.py               | 201 -----
 gklearn/preimage/median_graph_estimator.py      | 826 --------------------
 gklearn/preimage/median_linlin.py               | 215 ------
 gklearn/preimage/median_preimage_generator.py   |  15 -
 gklearn/preimage/misc.py                        | 108 ---
 gklearn/preimage/pathfrequency.py               | 201 -----
 gklearn/preimage/preimage_generator.py          |  12 -
 gklearn/preimage/preimage_iam.py                | 705 -----------------
 gklearn/preimage/preimage_random.py             | 309 --------
 gklearn/preimage/python_code.py                 | 122 ---
 gklearn/preimage/test.py                        |  83 --
 gklearn/preimage/test_fitDistance.py            | 648 ----------------
 gklearn/preimage/test_ged.py                    | 520 -------------
 gklearn/preimage/test_iam.py                    | 964 ------------------------
 gklearn/preimage/test_k_closest_graphs.py       | 462 ------------
 gklearn/preimage/test_median_graph_estimator.py |  91 ---
 gklearn/preimage/test_others.py                 | 686 -----------------
 gklearn/preimage/test_preimage_iam.py           | 620 ---------------
 gklearn/preimage/test_preimage_mix.py           | 539 -------------
 gklearn/preimage/test_preimage_random.py        | 398 ----------
 gklearn/preimage/timer.py                       |  40 -
 gklearn/preimage/utils.py                       | 151 ----
 gklearn/preimage/visualization.py               | 585 --------------
 gklearn/preimage/xp_fit_method.py               | 935 -----------------------
 gklearn/preimage/xp_letter_h.py                 | 476 ------------
 gklearn/preimage/xp_monoterpenoides.py          | 249 ------
 35 files changed, 12492 deletions(-)
 delete mode 100644 gklearn/preimage/common_types.py
 delete mode 100644 gklearn/preimage/cpp2python.py
 delete mode 100644 gklearn/preimage/find_best_k.py
 delete mode 100644 gklearn/preimage/fitDistance.py
 delete mode 100644 gklearn/preimage/ged.py
 delete mode 100644 gklearn/preimage/iam.py
 delete mode 100644 gklearn/preimage/knn.py
 delete mode 100644 gklearn/preimage/libs.py
 delete mode 100644 gklearn/preimage/median.py
 delete mode 100644 gklearn/preimage/median_benoit.py
 delete mode 100644 gklearn/preimage/median_graph_estimator.py
 delete mode 100644 gklearn/preimage/median_linlin.py
 delete mode 100644 gklearn/preimage/median_preimage_generator.py
 delete mode 100644 gklearn/preimage/misc.py
 delete mode 100644 gklearn/preimage/pathfrequency.py
 delete mode 100644 gklearn/preimage/preimage_generator.py
 delete mode 100644 gklearn/preimage/preimage_iam.py
 delete mode 100644 gklearn/preimage/preimage_random.py
 delete mode 100644 gklearn/preimage/python_code.py
 delete mode 100644 gklearn/preimage/test.py
 delete mode 100644 gklearn/preimage/test_fitDistance.py
 delete mode 100644 gklearn/preimage/test_ged.py
 delete mode 100644 gklearn/preimage/test_iam.py
 delete mode 100644 gklearn/preimage/test_k_closest_graphs.py
 delete mode 100644 gklearn/preimage/test_median_graph_estimator.py
 delete mode 100644 gklearn/preimage/test_others.py
 delete mode 100644 gklearn/preimage/test_preimage_iam.py
 delete mode 100644 gklearn/preimage/test_preimage_mix.py
 delete mode 100644 gklearn/preimage/test_preimage_random.py
 delete mode 100644 gklearn/preimage/timer.py
 delete mode 100644 gklearn/preimage/utils.py
 delete mode 100644 gklearn/preimage/visualization.py
 delete mode 100644 gklearn/preimage/xp_fit_method.py
 delete mode 100644 gklearn/preimage/xp_letter_h.py
 delete mode 100644 gklearn/preimage/xp_monoterpenoides.py

diff --git a/gklearn/preimage/common_types.py b/gklearn/preimage/common_types.py
deleted file mode 100644
index 2face25..0000000
--- a/gklearn/preimage/common_types.py
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 19 18:17:38 2020
-
-@author: ljia
-"""
-
-from enum import Enum, auto
-
-class AlgorithmState(Enum):
-    """can be used to specify the state of an algorithm.
-    """
-    CALLED = auto # The algorithm has been called.
-    INITIALIZED = auto # The algorithm has been initialized.
-    CONVERGED = auto # The algorithm has converged.
-    TERMINATED = auto # The algorithm has terminated.
\ No newline at end of file
diff --git a/gklearn/preimage/cpp2python.py b/gklearn/preimage/cpp2python.py
deleted file mode 100644
index 9d63026..0000000
--- a/gklearn/preimage/cpp2python.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Mar 20 11:09:04 2020
-
-@author: ljia
-"""
-import re
-
-def convert_function(cpp_code):
-# f_cpp = open('cpp_code.cpp', 'r')
-# # f_cpp = open('cpp_ext/src/median_graph_estimator.ipp', 'r')
-# 	cpp_code = f_cpp.read()
-	python_code = cpp_code.replace('else if (', 'elif ')
-	python_code = python_code.replace('if (', 'if ')
-	python_code = python_code.replace('else {', 'else:')
-	python_code = python_code.replace(') {', ':')
-	python_code = python_code.replace(';\n', '\n')
-	python_code = re.sub('\n(.*)}\n', '\n\n', python_code)
-	# python_code = python_code.replace('}\n', '')
-	python_code = python_code.replace('throw', 'raise')
-	python_code = python_code.replace('error', 'Exception')
-	python_code = python_code.replace('"', '\'')
-	python_code = python_code.replace('\\\'', '"')
-	python_code = python_code.replace('try {', 'try:')
-	python_code = python_code.replace('true', 'True')
-	python_code = python_code.replace('false', 'False')
-	python_code = python_code.replace('catch (...', 'except')
-	# python_code = re.sub('std::string\(\'(.*)\'\)', '$1', python_code)
-	
-	return python_code
-
-
-
-# # python_code = python_code.replace('}\n', '')
-
-
-
-
-# python_code = python_code.replace('option.first', 'opt_name')
-# python_code = python_code.replace('option.second', 'opt_val')
-# python_code = python_code.replace('ged::Error', 'Exception')
-# python_code = python_code.replace('std::string(\'Invalid argument "\')', '\'Invalid argument "\'')
-
-
-# f_cpp.close()
-# f_python = open('python_code.py', 'w')
-# f_python.write(python_code)
-# f_python.close()
-
-
-def convert_function_comment(cpp_fun_cmt, param_types):
-	cpp_fun_cmt = cpp_fun_cmt.replace('\t', '')
-	cpp_fun_cmt = cpp_fun_cmt.replace('\n * ', ' ')
-	# split the input comment according to key words.
-	param_split = None
-	note = None
-	cmt_split = cpp_fun_cmt.split('@brief')[1]
-	brief = cmt_split
-	if '@param' in cmt_split:
-		cmt_split = cmt_split.split('@param')
-		brief = cmt_split[0]
-		param_split = cmt_split[1:]
-	if '@note' in cmt_split[-1]:
-		note_split = cmt_split[-1].split('@note')
-		if param_split is not None:
-			param_split.pop()
-			param_split.append(note_split[0])
-		else:
-			brief = note_split[0]
-		note = note_split[1]
-		
-	# get parameters.
-	if param_split is not None:
-		for idx, param in enumerate(param_split):
-			_, param_name, param_desc = param.split(' ', 2)
-			param_name = function_comment_strip(param_name, ' *\n\t/')
-			param_desc = function_comment_strip(param_desc, ' *\n\t/')
-			param_split[idx] = (param_name, param_desc)
-		
-	# strip comments.
-	brief = function_comment_strip(brief, ' *\n\t/')
-	if note is not None:
-		note = function_comment_strip(note, ' *\n\t/')
-		
-	# construct the Python function comment.
-	python_fun_cmt = '"""'
-	python_fun_cmt += brief + '\n'
-	if param_split is not None and len(param_split) > 0:
-		python_fun_cmt += '\nParameters\n----------'
-		for idx, param in enumerate(param_split):
-			python_fun_cmt += '\n' + param[0] + ' : ' + param_types[idx]
-			python_fun_cmt += '\n\t' + param[1] + '\n'
-	if note is not None:
-		python_fun_cmt += '\nNote\n----\n' + note + '\n'
-	python_fun_cmt += '"""'
-	
-	return python_fun_cmt
-			
-		
-def function_comment_strip(comment, bad_chars):
-	head_removed, tail_removed = False, False
-	while not head_removed or not tail_removed:
-		if comment[0] in bad_chars:
-			comment = comment[1:]
-			head_removed = False
-		else:
-			head_removed = True
-		if comment[-1] in bad_chars:
-			comment = comment[:-1]
-			tail_removed = False
-		else:
-			tail_removed = True
-			
-	return comment
-
-		
-if __name__ == '__main__':
-#  	python_code = convert_function("""
-# 		if (print_to_stdout_ == 2) {
-# 			std::cout << "\n===========================================================\n";
-# 			std::cout << "Block gradient descent for initial median " << median_pos + 1 << " of " << medians.size() << ".\n";
-# 			std::cout << "-----------------------------------------------------------\n";
-# 		}
-# 								""")
-	
-	
- 	python_fun_cmt = convert_function_comment("""
-	/*!
-	 * @brief Returns the sum of distances.
-	 * @param[in] state The state of the estimator.
-	 * @return The sum of distances of the median when the estimator was in the state @p state during the last call to run().
-	 */
-						""", ['string', 'string'])
\ No newline at end of file
diff --git a/gklearn/preimage/find_best_k.py b/gklearn/preimage/find_best_k.py
deleted file mode 100644
index df38d32..0000000
--- a/gklearn/preimage/find_best_k.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jan  9 11:54:32 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs
-
-def find_best_k():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    ds_name = 'mono'
-    dir_output = 'results/test_find_best_k/'
-    
-    repeats = 50
-    k_list = range(2, 11)
-    fit_method = 'k-graphs'
-    # fitted on the whole dataset - treelet - mono
-    edit_costs = [0.1268873773592978, 0.004084633224249829, 0.0897581955378986, 0.15328856114451297, 0.3109956881625734, 0.0]
-    
-    # create result files.
-    fn_output_detail = 'results_detail.' + fit_method + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'repeat', 'median set', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM'])
-    f_detail.close()
-    fn_output_summary = 'results_summary.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_list = []
-        sod_gm_list = []
-        dis_k_sm_list = []
-        dis_k_gm_list = []
-        dis_k_gi_min_list = []
-        nb_sod_sm2gm = [0, 0, 0]
-        nb_dis_k_sm2gm = [0, 0, 0]
-        nb_dis_k_gi2sm = [0, 0, 0]
-        nb_dis_k_gi2gm = [0, 0, 0]
-        repeats_better_sod_sm2gm = []
-        repeats_better_dis_k_sm2gm = []
-        repeats_better_dis_k_gi2sm = []
-        repeats_better_dis_k_gi2gm = []
-        
-        
-        for repeat in range(repeats):
-            print('\nrepeat =', repeat)
-            random.seed(rdn_seed_list[repeat])
-            median_set_idx = random.sample(range(0, len(Gn)), k)
-            print('median set: ', median_set_idx)
-            
-            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-                                             fit_method='k-graphs', 
-                                             edit_costs=edit_costs,
-                                             group_min=median_set_idx,
-                                             parallel=False)
-                
-            # write result detail.
-            sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-            dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-            dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-            dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-            f_detail = open(dir_output + fn_output_detail, 'a')
-            csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, repeat,
-                      median_set_idx, sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                      dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                      dis_k_gi2gm])
-            f_detail.close()
-            
-            # compute result summary.
-            sod_sm_list.append(sod_sm)
-            sod_gm_list.append(sod_gm)
-            dis_k_sm_list.append(dis_k_sm)
-            dis_k_gm_list.append(dis_k_gm)
-            dis_k_gi_min_list.append(dis_k_gi_min)
-            # # SOD SM -> GM
-            if sod_sm > sod_gm:
-                nb_sod_sm2gm[0] += 1
-                repeats_better_sod_sm2gm.append(repeat)
-            elif sod_sm == sod_gm:
-                nb_sod_sm2gm[1] += 1
-            elif sod_sm < sod_gm:
-                nb_sod_sm2gm[2] += 1
-            # # dis_k SM -> GM
-            if dis_k_sm > dis_k_gm:
-                nb_dis_k_sm2gm[0] += 1
-                repeats_better_dis_k_sm2gm.append(repeat)
-            elif dis_k_sm == dis_k_gm:
-                nb_dis_k_sm2gm[1] += 1
-            elif dis_k_sm < dis_k_gm:
-                nb_dis_k_sm2gm[2] += 1
-            # # dis_k gi -> SM
-            if dis_k_gi_min > dis_k_sm:
-                nb_dis_k_gi2sm[0] += 1
-                repeats_better_dis_k_gi2sm.append(repeat)
-            elif dis_k_gi_min == dis_k_sm:
-                nb_dis_k_gi2sm[1] += 1
-            elif dis_k_gi_min < dis_k_sm:
-                nb_dis_k_gi2sm[2] += 1
-            # # dis_k gi -> GM
-            if dis_k_gi_min > dis_k_gm:
-                nb_dis_k_gi2gm[0] += 1
-                repeats_better_dis_k_gi2gm.append(repeat)
-            elif dis_k_gi_min == dis_k_gm:
-                nb_dis_k_gi2gm[1] += 1
-            elif dis_k_gi_min < dis_k_gm:
-                nb_dis_k_gi2gm[2] += 1
-            
-        # write result summary. 
-        sod_sm_mean = np.mean(sod_sm_list)
-        sod_gm_mean = np.mean(sod_gm_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 
-                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                  dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                  nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                  repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                  repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-        f_summary.close()
-        
-    print('\ncomplete.')
-    return
-
-
-def getRelations(sign):
-    if sign == -1:
-        return 'better'
-    elif sign == 0:
-        return 'same'
-    elif sign == 1:
-        return 'worse'
-
-
-if __name__ == '__main__':
-    find_best_k()
\ No newline at end of file
diff --git a/gklearn/preimage/fitDistance.py b/gklearn/preimage/fitDistance.py
deleted file mode 100644
index 234f7fc..0000000
--- a/gklearn/preimage/fitDistance.py
+++ /dev/null
@@ -1,430 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Oct 16 14:20:06 2019
-
-@author: ljia
-"""
-import numpy as np
-from tqdm import tqdm
-from itertools import combinations_with_replacement, combinations
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-import time
-import random
-import sys
-
-from scipy import optimize
-from scipy.optimize import minimize
-import cvxpy as cp
-
-from gklearn.preimage.ged import GED, get_nb_edit_operations, get_nb_edit_operations_letter, get_nb_edit_operations_nonsymbolic
-from gklearn.preimage.utils import kernel_distance_matrix
-
-def fit_GED_to_kernel_distance(Gn, node_label, edge_label, gkernel, itr_max,
-                               params_ged={'lib': 'gedlibpy', 'cost': 'CONSTANT', 
-                                           'method': 'IPFP', 'stabilizer': None},
-                               init_costs=[3, 3, 1, 3, 3, 1],
-                               dataset='monoterpenoides', Kmatrix=None,
-                               parallel=True):
-#    dataset = dataset.lower()
-    
-    # c_vi, c_vr, c_vs, c_ei, c_er, c_es or parts of them.
-#    random.seed(1)
-#    cost_rdm = random.sample(range(1, 10), 6)
-#    init_costs = cost_rdm + [0]
-#    init_costs = cost_rdm
-#    init_costs = [3, 3, 1, 3, 3, 1]
-#    init_costs = [i * 0.01 for i in cost_rdm] + [0]
-#    init_costs = [0.2, 0.2, 0.2, 0.2, 0.2, 0]
-#    init_costs = [0, 0, 0.9544, 0.026, 0.0196, 0]
-#    init_costs = [0.008429912251810438, 0.025461055985319694, 0.2047320869225948, 0.004148727085832133, 0.0, 0]
-#    idx_cost_nonzeros = [i for i, item in enumerate(edit_costs) if item != 0]
-    
-    # compute distances in feature space.
-    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                                Kmatrix=Kmatrix, gkernel=gkernel)
-    dis_k_vec = []
-    for i in range(len(dis_k_mat)):
-#        for j in range(i, len(dis_k_mat)):
-        for j in range(i + 1, len(dis_k_mat)):
-            dis_k_vec.append(dis_k_mat[i, j])
-    dis_k_vec = np.array(dis_k_vec)
-    
-    # init ged.
-    print('\ninitial:')
-    time0 = time.time()
-    params_ged['dataset'] = dataset
-    params_ged['edit_cost_constant'] = init_costs
-    ged_vec_init, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
-                                                            parallel=parallel)
-    residual_list = [np.sqrt(np.sum(np.square(np.array(ged_vec_init) - dis_k_vec)))]    
-    time_list = [time.time() - time0]
-    edit_cost_list = [init_costs]  
-    nb_cost_mat = np.array(n_edit_operations)
-    nb_cost_mat_list = [nb_cost_mat]
-    print('edit_costs:', init_costs)
-    print('residual_list:', residual_list)
-    
-    for itr in range(itr_max):
-        print('\niteration', itr)
-        time0 = time.time()
-        # "fit" geds to distances in feature space by tuning edit costs using the
-        # Least Squares Method.
-        np.savez('results/xp_fit_method/fit_data_debug' + str(itr) + '.gm', 
-                 nb_cost_mat=nb_cost_mat, dis_k_vec=dis_k_vec, 
-                 n_edit_operations=n_edit_operations, ged_vec_init=ged_vec_init,
-                 ged_mat=ged_mat)
-        edit_costs_new, residual = update_costs(nb_cost_mat, dis_k_vec, 
-                                                dataset=dataset, cost=params_ged['cost'])
-        for i in range(len(edit_costs_new)):
-            if -1e-9 <= edit_costs_new[i] <= 1e-9:
-                edit_costs_new[i] = 0
-            if edit_costs_new[i] < 0:
-                raise ValueError('The edit cost is negative.')
-#        for i in range(len(edit_costs_new)):
-#            if edit_costs_new[i] < 0:
-#                edit_costs_new[i] = 0
-
-        # compute new GEDs and numbers of edit operations.
-        params_ged['edit_cost_constant'] = edit_costs_new # np.array([edit_costs_new[0], edit_costs_new[1], 0.75])
-        ged_vec, ged_mat, n_edit_operations = compute_geds(Gn, params_ged,
-                                                           parallel=parallel)
-        residual_list.append(np.sqrt(np.sum(np.square(np.array(ged_vec) - dis_k_vec))))
-        time_list.append(time.time() - time0)
-        edit_cost_list.append(edit_costs_new)
-        nb_cost_mat = np.array(n_edit_operations)
-        nb_cost_mat_list.append(nb_cost_mat)                        
-        print('edit_costs:', edit_costs_new)
-        print('residual_list:', residual_list)
-    
-    return edit_costs_new, residual_list, edit_cost_list, dis_k_mat, ged_mat, \
-        time_list, nb_cost_mat_list
-
-
-def compute_geds(Gn, params_ged, parallel=False):
-    edit_cost_name = params_ged['cost']
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2':
-        get_nb_eo = get_nb_edit_operations_letter
-    elif edit_cost_name == 'NON_SYMBOLIC':
-        get_nb_eo = get_nb_edit_operations_nonsymbolic
-    else: 
-        get_nb_eo = get_nb_edit_operations
-    ged_mat = np.zeros((len(Gn), len(Gn)))
-    if parallel:
-#        print('parallel')
-#        len_itr = int(len(Gn) * (len(Gn) + 1) / 2)
-        len_itr = int(len(Gn) * (len(Gn) - 1) / 2)
-        ged_vec = [0 for i in range(len_itr)]
-        n_edit_operations = [0 for i in range(len_itr)]
-#        itr = combinations_with_replacement(range(0, len(Gn)), 2)
-        itr = combinations(range(0, len(Gn)), 2)
-        n_jobs = multiprocessing.cpu_count()
-        if len_itr < 100 * n_jobs:
-            chunksize = int(len_itr / n_jobs) + 1
-        else:
-            chunksize = 100
-        def init_worker(gn_toshare):
-            global G_gn
-            G_gn = gn_toshare
-        do_partial = partial(_wrapper_compute_ged_parallel, params_ged, get_nb_eo)
-        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn,))
-        iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
-                        desc='computing GEDs', file=sys.stdout)
-#        iterator = pool.imap_unordered(do_partial, itr, chunksize)
-        for i, j, dis, n_eo_tmp in iterator:
-            idx_itr = int(len(Gn) * i + j - (i + 1) * (i + 2) / 2)
-            ged_vec[idx_itr] = dis
-            ged_mat[i][j] = dis
-            ged_mat[j][i] = dis
-            n_edit_operations[idx_itr] = n_eo_tmp
-#            print('\n-------------------------------------------')
-#            print(i, j, idx_itr, dis)
-        pool.close()
-        pool.join()
-        
-    else:
-        ged_vec = []
-        n_edit_operations = []
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-#        for i in range(len(Gn)):
-            for j in range(i + 1, len(Gn)):
-                dis, pi_forward, pi_backward = GED(Gn[i], Gn[j], **params_ged)
-                ged_vec.append(dis)
-                ged_mat[i][j] = dis
-                ged_mat[j][i] = dis
-                n_eo_tmp = get_nb_eo(Gn[i], Gn[j], pi_forward, pi_backward)
-                n_edit_operations.append(n_eo_tmp)
-                    
-    return ged_vec, ged_mat, n_edit_operations
-                    
-
-def _wrapper_compute_ged_parallel(params_ged, get_nb_eo, itr):
-    i = itr[0]
-    j = itr[1]
-    dis, n_eo_tmp = _compute_ged_parallel(G_gn[i], G_gn[j], params_ged, get_nb_eo)
-    return i, j, dis, n_eo_tmp
-
-
-def _compute_ged_parallel(g1, g2, params_ged, get_nb_eo):
-    dis, pi_forward, pi_backward = GED(g1, g2, **params_ged)
-    n_eo_tmp = get_nb_eo(g1, g2, pi_forward, pi_backward) # [0,0,0,0,0,0]
-    return dis, n_eo_tmp
-
-
-def update_costs(nb_cost_mat, dis_k_vec, dataset='monoterpenoides', 
-                 cost='CONSTANT', rw_constraints='inequality'):
-#    if dataset == 'Letter-high':
-    if cost == 'LETTER':            
-        pass
-#        # method 1: set alpha automatically, just tune c_vir and c_eir by 
-#        # LMS using cvxpy.
-#        alpha = 0.5
-#        coeff = 100 # np.max(alpha * nb_cost_mat[:,4] / dis_k_vec)
-##        if np.count_nonzero(nb_cost_mat[:,4]) == 0:
-##            alpha = 0.75
-##        else:
-##            alpha = np.min([dis_k_vec / c_vs for c_vs in nb_cost_mat[:,4] if c_vs != 0])
-##        alpha = alpha * 0.99
-#        param_vir = alpha * (nb_cost_mat[:,0] + nb_cost_mat[:,1])
-#        param_eir = (1 - alpha) * (nb_cost_mat[:,4] + nb_cost_mat[:,5])
-#        nb_cost_mat_new = np.column_stack((param_vir, param_eir))
-#        dis_new = coeff * dis_k_vec - alpha * nb_cost_mat[:,3]
-#        
-#        x = cp.Variable(nb_cost_mat_new.shape[1])
-#        cost = cp.sum_squares(nb_cost_mat_new * x - dis_new)
-#        constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-#        prob = cp.Problem(cp.Minimize(cost), constraints)
-#        prob.solve()
-#        edit_costs_new = x.value
-#        edit_costs_new = np.array([edit_costs_new[0], edit_costs_new[1], alpha])
-#        residual = np.sqrt(prob.value)
-    
-#        # method 2: tune c_vir, c_eir and alpha by nonlinear programming by 
-#        # scipy.optimize.minimize.
-#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-#        w2 = nb_cost_mat[:,3]
-#        w3 = dis_k_vec
-#        func_min = lambda x: np.sum((w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-#                             + w2 * x[2] - w3 * x[3]) ** 2)
-#        bounds = ((0, None), (0., None), (0.5, 0.5), (0, None))
-#        res = minimize(func_min, [0.9, 1.7, 0.75, 10], bounds=bounds)
-#        edit_costs_new = res.x[0:3]
-#        residual = res.fun
-    
-    # method 3: tune c_vir, c_eir and alpha by nonlinear programming using cvxpy.
-    
-    
-#        # method 4: tune c_vir, c_eir and alpha by QP function
-#        # scipy.optimize.least_squares. An initial guess is required.
-#        w0 = nb_cost_mat[:,0] + nb_cost_mat[:,1]
-#        w1 = nb_cost_mat[:,4] + nb_cost_mat[:,5]
-#        w2 = nb_cost_mat[:,3]
-#        w3 = dis_k_vec
-#        func = lambda x: (w0 * x[0] * x[3] + w1 * x[1] * (1 - x[2]) \
-#                             + w2 * x[2] - w3 * x[3]) ** 2
-#        res = optimize.root(func, [0.9, 1.7, 0.75, 100])
-#        edit_costs_new = res.x
-#        residual = None
-    elif cost == 'LETTER2':
-#            # 1. if c_vi != c_vr, c_ei != c_er.
-#            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-#            x = cp.Variable(nb_cost_mat_new.shape[1])
-#            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-##            # 1.1 no constraints.
-##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-#            # 1.2 c_vs <= c_vi + c_vr.
-#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]            
-##            # 2. if c_vi == c_vr, c_ei == c_er.
-##            nb_cost_mat_new = nb_cost_mat[:,[0,3,4]]
-##            nb_cost_mat_new[:,0] += nb_cost_mat[:,1]
-##            nb_cost_mat_new[:,2] += nb_cost_mat[:,5]
-##            x = cp.Variable(nb_cost_mat_new.shape[1])
-##            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-##            # 2.1 no constraints.
-##            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])]]
-###            # 2.2 c_vs <= c_vi + c_vr.
-###            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-###                           np.array([2.0, -1.0, 0.0]).T@x >= 0.0]     
-#            
-#            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-#            prob.solve()
-#            edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-#            edit_costs_new = np.array(edit_costs_new)
-#            residual = np.sqrt(prob.value)
-        if rw_constraints == 'inequality':
-            # c_vs <= c_vi + c_vr.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
-                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            try:
-                prob.solve(verbose=True)
-            except MemoryError as error0:
-                print('\nUsing solver "OSQP" caused a memory error.')
-                print('the original error message is\n', error0)
-                print('solver status: ', prob.status)
-                print('trying solver "CVXOPT" instead...\n')
-                try:
-                    prob.solve(solver=cp.CVXOPT, verbose=True)
-                except Exception as error1:
-                    print('\nAn error occured when using solver "CVXOPT".')
-                    print('the original error message is\n', error1)
-                    print('solver status: ', prob.status)
-                    print('trying solver "MOSEK" instead. Notice this solver is commercial and a lisence is required.\n')
-                    prob.solve(solver=cp.MOSEK, verbose=True)
-                else:
-                    print('solver status: ', prob.status)                    
-            else:
-                print('solver status: ', prob.status)
-            print()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-        elif rw_constraints == '2constraints':
-            # c_vs <= c_vi + c_vr and c_vi == c_vr, c_ei == c_er.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                           np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
-                           np.array([1.0, -1.0, 0.0, 0.0, 0.0]).T@x == 0.0,
-                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-        elif rw_constraints == 'no-constraint':
-            # no constraint.
-            nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-            constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-            edit_costs_new = x.value
-            residual = np.sqrt(prob.value)
-#            elif method == 'inequality_modified':
-#                # c_vs <= c_vi + c_vr.
-#                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-#                x = cp.Variable(nb_cost_mat_new.shape[1])
-#                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#                constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-#                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-#                prob.solve()
-#                # use same costs for insertion and removal rather than the fitted costs.
-#                edit_costs_new = [x.value[0], x.value[0], x.value[1], x.value[2], x.value[2]]
-#                edit_costs_new = np.array(edit_costs_new)
-#                residual = np.sqrt(prob.value)
-    elif cost == 'NON_SYMBOLIC':
-        is_n_attr = np.count_nonzero(nb_cost_mat[:,2])
-        is_e_attr = np.count_nonzero(nb_cost_mat[:,5])
-        
-        if dataset == 'SYNTHETICnew':
-#            nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
-            nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
-            x = cp.Variable(nb_cost_mat_new.shape[1])
-            cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#            constraints = [x >= [0.0 for i in range(nb_cost_mat_new.shape[1])],
-#                           np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-#            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])]]
-            constraints = [x >= [0.0001 for i in range(nb_cost_mat_new.shape[1])],
-                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
-            prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-            prob.solve()
-#            print(x.value)
-            edit_costs_new = np.concatenate((np.array([0.0, 0.0]), x.value, 
-                                             np.array([0.0])))
-            residual = np.sqrt(prob.value)
-            
-        elif rw_constraints == 'inequality':
-            # c_vs <= c_vi + c_vr.
-            if is_n_attr and is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4,5]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
-                               np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = x.value
-                residual = np.sqrt(prob.value)
-            elif is_n_attr and not is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,2,3,4]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.001 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                print(x.value)
-                edit_costs_new = np.concatenate((x.value, np.array([0.0])))
-                residual = np.sqrt(prob.value)
-            elif not is_n_attr and is_e_attr:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4,5]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])],
-                               np.array([0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), x.value[2:]))
-                residual = np.sqrt(prob.value)
-            else:
-                nb_cost_mat_new = nb_cost_mat[:,[0,1,3,4]]
-                x = cp.Variable(nb_cost_mat_new.shape[1])
-                cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-                constraints = [x >= [0.01 for i in range(nb_cost_mat_new.shape[1])]]
-                prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-                prob.solve()
-                edit_costs_new = np.concatenate((x.value[0:2], np.array([0.0]), 
-                                                 x.value[2:], np.array([0.0])))
-                residual = np.sqrt(prob.value)
-    else:
-#    # method 1: simple least square method.
-#    edit_costs_new, residual, _, _ = np.linalg.lstsq(nb_cost_mat, dis_k_vec,
-#                                                     rcond=None)
-    
-#    # method 2: least square method with x_i >= 0.
-#    edit_costs_new, residual = optimize.nnls(nb_cost_mat, dis_k_vec)
-    
-    # method 3: solve as a quadratic program with constraints.
-#    P = np.dot(nb_cost_mat.T, nb_cost_mat)
-#    q_T = -2 * np.dot(dis_k_vec.T, nb_cost_mat)
-#    G = -1 * np.identity(nb_cost_mat.shape[1])
-#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
-#    A = np.array([1 for i in range(nb_cost_mat.shape[1])])
-#    b = 1
-#    x = cp.Variable(nb_cost_mat.shape[1])
-#    prob = cp.Problem(cp.Minimize(cp.quad_form(x, P) + q_T@x),
-#                      [G@x <= h])
-#    prob.solve()
-#    edit_costs_new = x.value
-#    residual = prob.value - np.dot(dis_k_vec.T, dis_k_vec)
-    
-#    G = -1 * np.identity(nb_cost_mat.shape[1])
-#    h = np.array([0 for i in range(nb_cost_mat.shape[1])])
-        x = cp.Variable(nb_cost_mat.shape[1])
-        cost_fun = cp.sum_squares(nb_cost_mat * x - dis_k_vec)
-        constraints = [x >= [0.0 for i in range(nb_cost_mat.shape[1])],
-    #                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-                       np.array([1.0, 1.0, -1.0, 0.0, 0.0, 0.0]).T@x >= 0.0,
-                       np.array([0.0, 0.0, 0.0, 1.0, 1.0, -1.0]).T@x >= 0.0]
-        prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-        prob.solve()
-        edit_costs_new = x.value
-        residual = np.sqrt(prob.value)
-    
-    # method 4: 
-    
-    return edit_costs_new, residual
-
-
-if __name__ == '__main__':
-    print('check test_fitDistance.py')
\ No newline at end of file
diff --git a/gklearn/preimage/ged.py b/gklearn/preimage/ged.py
deleted file mode 100644
index a66baaf..0000000
--- a/gklearn/preimage/ged.py
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 17 18:44:59 2019
-
-@author: ljia
-"""
-import numpy as np
-import networkx as nx
-from tqdm import tqdm
-import sys
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-
-#from gedlibpy_linlin import librariesImport, gedlibpy
-from gklearn.gedlib import librariesImport, gedlibpy
-
-def GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy', cost='CHEM_1', method='IPFP', 
-        edit_cost_constant=[], algo_options='', stabilizer='min', repeat=50):
-    """
-    Compute GED for 2 graphs.
-    """    
-    
-#    dataset = dataset.lower()
-    
-    if lib == 'gedlibpy':
-        gedlibpy.restart_env()
-        gedlibpy.add_nx_graph(convertGraph(g1, cost), "")
-        gedlibpy.add_nx_graph(convertGraph(g2, cost), "")
-
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
-        gedlibpy.init()
-        gedlibpy.set_method(method, algo_options)
-        gedlibpy.init_method()
-
-        g = listID[0]
-        h = listID[1]
-        if stabilizer is None:
-            gedlibpy.run_method(g, h)
-            pi_forward = gedlibpy.get_forward_map(g, h)
-            pi_backward = gedlibpy.get_backward_map(g, h)
-            upper = gedlibpy.get_upper_bound(g, h)
-            lower = gedlibpy.get_lower_bound(g, h)        
-        elif stabilizer == 'mean':
-            # @todo: to be finished...
-            upper_list = [np.inf] * repeat
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
-                pi_forward = gedlibpy.get_forward_map(g, h)
-                pi_backward = gedlibpy.get_backward_map(g, h)
-                lower = gedlibpy.get_lower_bound(g, h)
-            upper = np.mean(upper_list)
-        elif stabilizer == 'median':
-            if repeat % 2 == 0:
-                repeat += 1
-            upper_list = [np.inf] * repeat
-            pi_forward_list = [0] * repeat
-            pi_backward_list = [0] * repeat
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_list[itr] = gedlibpy.get_upper_bound(g, h)
-                pi_forward_list[itr] = gedlibpy.get_forward_map(g, h)
-                pi_backward_list[itr] = gedlibpy.get_backward_map(g, h)
-                lower = gedlibpy.get_lower_bound(g, h)
-            upper = np.median(upper_list)
-            idx_median = upper_list.index(upper)
-            pi_forward = pi_forward_list[idx_median]
-            pi_backward = pi_backward_list[idx_median]
-        elif stabilizer == 'min':
-            upper = np.inf
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp < upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-                if upper == 0:
-                    break
-        elif stabilizer == 'max':
-            upper = 0
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp > upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-        elif stabilizer == 'gaussian':
-            pass
-                    
-        dis = upper
-        
-    elif lib == 'gedlib-bash':
-        import time
-        import random
-        import os
-        from gklearn.utils.graphfiles import saveDataset
-        
-        tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
-        if not os.path.exists(tmp_dir):
-            os.makedirs(tmp_dir)
-        fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
-        xparams = {'method': 'gedlib', 'graph_dir': fn_collection}
-        saveDataset([g1, g2], ['dummy', 'dummy'], gformat='gxl', group='xml', 
-                    filename=fn_collection, xparams=xparams)
-        
-        command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/others/gedlib/gedlib2\'\n'
-        command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
-        command += 'export LD_LIBRARY_PATH\n'
-        command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
-        command += './ged_for_python_bash monoterpenoides ' + fn_collection \
-                + ' \'' + algo_options + '\' '
-        for ec in edit_cost_constant:
-            command += str(ec) + ' '
-#        output = os.system(command)
-        stream = os.popen(command)
-        output = stream.readlines()
-#        print(output)
-        
-        dis = float(output[0].strip())
-        runtime = float(output[1].strip())
-        size_forward = int(output[2].strip())
-        pi_forward = [int(item.strip()) for item in output[3:3+size_forward]]
-        pi_backward = [int(item.strip()) for item in output[3+size_forward:]]
-
-#        print(dis)
-#        print(runtime)
-#        print(size_forward)
-#        print(pi_forward)
-#        print(pi_backward)
-                
-        
-    # make the map label correct (label remove map as np.inf)
-    nodes1 = [n for n in g1.nodes()]
-    nodes2 = [n for n in g2.nodes()]
-    nb1 = nx.number_of_nodes(g1)
-    nb2 = nx.number_of_nodes(g2)
-    pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
-    pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]
-#        print(pi_forward)
-              
-        
-    return dis, pi_forward, pi_backward
-
-
-def convertGraph(G, cost):
-    """Convert a graph to the proper NetworkX format that can be
-    recognized by library gedlibpy.
-    """
-    G_new = nx.Graph()
-    if cost == 'LETTER' or cost == 'LETTER2':   
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
-                           y=str(attrs['attributes'][1]))
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2))
-    elif cost == 'NON_SYMBOLIC':
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd))
-            for a_name in G.graph['node_attrs']:
-                G_new.nodes[str(nd)][a_name] = str(attrs[a_name])
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2))
-            for a_name in G.graph['edge_attrs']:
-                G_new.edges[str(nd1), str(nd2)][a_name] = str(attrs[a_name])
-    else:
-        for nd, attrs in G.nodes(data=True):
-            G_new.add_node(str(nd), chem=attrs['atom'])
-        for nd1, nd2, attrs in G.edges(data=True):
-            G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-#                G_new.add_edge(str(nd1), str(nd2))
-        
-    return G_new
-
-
-def GED_n(Gn, lib='gedlibpy', cost='CHEM_1', method='IPFP', 
-        edit_cost_constant=[], stabilizer='min', repeat=50):
-    """
-    Compute GEDs for a group of graphs.
-    """
-    if lib == 'gedlibpy':
-        def convertGraph(G):
-            """Convert a graph to the proper NetworkX format that can be
-            recognized by library gedlibpy.
-            """
-            G_new = nx.Graph()
-            for nd, attrs in G.nodes(data=True):
-                G_new.add_node(str(nd), chem=attrs['atom'])
-            for nd1, nd2, attrs in G.edges(data=True):
-#                G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-                G_new.add_edge(str(nd1), str(nd2))
-                
-            return G_new
-        
-        gedlibpy.restart_env()
-        gedlibpy.add_nx_graph(convertGraph(g1), "")
-        gedlibpy.add_nx_graph(convertGraph(g2), "")
-
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost(cost, edit_cost_constant=edit_cost_constant)
-        gedlibpy.init()
-        gedlibpy.set_method(method, "")
-        gedlibpy.init_method()
-
-        g = listID[0]
-        h = listID[1]
-        if stabilizer is None:
-            gedlibpy.run_method(g, h)
-            pi_forward = gedlibpy.get_forward_map(g, h)
-            pi_backward = gedlibpy.get_backward_map(g, h)
-            upper = gedlibpy.get_upper_bound(g, h)
-            lower = gedlibpy.get_lower_bound(g, h)        
-        elif stabilizer == 'min':
-            upper = np.inf
-            for itr in range(repeat):                
-                gedlibpy.run_method(g, h)                
-                upper_tmp = gedlibpy.get_upper_bound(g, h)                
-                if upper_tmp < upper:
-                    upper = upper_tmp
-                    pi_forward = gedlibpy.get_forward_map(g, h)
-                    pi_backward = gedlibpy.get_backward_map(g, h)
-                    lower = gedlibpy.get_lower_bound(g, h)
-                if upper == 0:
-                    break
-                    
-        dis = upper
-        
-        # make the map label correct (label remove map as np.inf)
-        nodes1 = [n for n in g1.nodes()]
-        nodes2 = [n for n in g2.nodes()]
-        nb1 = nx.number_of_nodes(g1)
-        nb2 = nx.number_of_nodes(g2)
-        pi_forward = [nodes2[pi] if pi < nb2 else np.inf for pi in pi_forward]
-        pi_backward = [nodes1[pi] if pi < nb1 else np.inf for pi in pi_backward]      
-        
-    return dis, pi_forward, pi_backward
-
-
-def ged_median(Gn, Gn_median, verbose=False, params_ged={'lib': 'gedlibpy', 
-               'cost': 'CHEM_1', 'method': 'IPFP', 'edit_cost_constant': [], 
-               'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1',
-               'stabilizer': None}, parallel=False):
-    if parallel:
-        len_itr = int(len(Gn))
-        pi_forward_list = [[] for i in range(len_itr)]
-        dis_list = [0 for i in range(len_itr)]
-               
-        itr = range(0, len_itr)
-        n_jobs = multiprocessing.cpu_count()
-        if len_itr < 100 * n_jobs:
-            chunksize = int(len_itr / n_jobs) + 1
-        else:
-            chunksize = 100
-        def init_worker(gn_toshare, gn_median_toshare):
-            global G_gn, G_gn_median
-            G_gn = gn_toshare
-            G_gn_median = gn_median_toshare
-        do_partial = partial(_compute_ged_median, params_ged)
-        pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(Gn, Gn_median))
-        if verbose:
-            iterator = tqdm(pool.imap_unordered(do_partial, itr, chunksize),
-                            desc='computing GEDs', file=sys.stdout)
-        else:
-            iterator = pool.imap_unordered(do_partial, itr, chunksize)
-        for i, dis_sum, pi_forward in iterator:
-            pi_forward_list[i] = pi_forward
-            dis_list[i] = dis_sum
-#            print('\n-------------------------------------------')
-#            print(i, j, idx_itr, dis)
-        pool.close()
-        pool.join()
-        
-    else:
-        dis_list = []
-        pi_forward_list = []
-        for idx, G in tqdm(enumerate(Gn), desc='computing median distances', 
-                           file=sys.stdout) if verbose else enumerate(Gn):
-            dis_sum = 0
-            pi_forward_list.append([])
-            for G_p in Gn_median:
-                dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p, 
-                    **params_ged)
-                pi_forward_list[idx].append(pi_tmp_forward)
-                dis_sum += dis_tmp
-            dis_list.append(dis_sum)
-            
-    return dis_list, pi_forward_list
-
-
-def _compute_ged_median(params_ged, itr):
-#    print(itr)
-    dis_sum = 0
-    pi_forward = []
-    for G_p in G_gn_median:
-        dis_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_gn[itr], G_p, 
-                    **params_ged)
-        pi_forward.append(pi_tmp_forward)
-        dis_sum += dis_tmp
-        
-    return itr, dis_sum, pi_forward
-
-
-def get_nb_edit_operations(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    n_ei = 0
-    n_er = 0
-    n_es = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        elif g1.node[nodes1[i]]['atom'] != g2.node[map_i]['atom']:
-            n_vs += 1
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    nb_edges2_cnted = 0
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        # one of the nodes is removed, thus the edge is removed.
-        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
-            n_er += 1
-        # corresponding edge is in g2.
-        elif (forward_map[idx1], forward_map[idx2]) in g2.edges():
-            nb_edges2_cnted += 1
-            # edge labels are different.
-            if g2.edges[((forward_map[idx1], forward_map[idx2]))]['bond_type'] \
-                != g1.edges[(n1, n2)]['bond_type']:
-                    n_es += 1
-        elif (forward_map[idx2], forward_map[idx1]) in g2.edges():
-            nb_edges2_cnted += 1
-            # edge labels are different.
-            if g2.edges[((forward_map[idx2], forward_map[idx1]))]['bond_type'] \
-                != g1.edges[(n1, n2)]['bond_type']:
-                    n_es += 1                
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-    
-    return n_vi, n_vr, n_vs, n_ei, n_er, n_es
-
-
-def get_nb_edit_operations_letter(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    sod_vs = 0
-    n_ei = 0
-    n_er = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        else:
-            n_vs += 1
-            diff_x = float(g1.nodes[nodes1[i]]['x']) - float(g2.nodes[map_i]['x'])
-            diff_y = float(g1.nodes[nodes1[i]]['y']) - float(g2.nodes[map_i]['y'])
-            sod_vs += np.sqrt(np.square(diff_x) + np.square(diff_y))
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    nb_edges2_cnted = 0
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        # one of the nodes is removed, thus the edge is removed.
-        if forward_map[idx1] == np.inf or forward_map[idx2] == np.inf:
-            n_er += 1
-        # corresponding edge is in g2. Edge label is not considered.
-        elif (forward_map[idx1], forward_map[idx2]) in g2.edges() or \
-            (forward_map[idx2], forward_map[idx1]) in g2.edges():
-                nb_edges2_cnted += 1
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - nb_edges2_cnted
-    
-    return n_vi, n_vr, n_vs, sod_vs, n_ei, n_er
-
-
-def get_nb_edit_operations_nonsymbolic(g1, g2, forward_map, backward_map):
-    """Compute the number of each edit operations.
-    """
-    n_vi = 0
-    n_vr = 0
-    n_vs = 0
-    sod_vs = 0
-    n_ei = 0
-    n_er = 0
-    n_es = 0
-    sod_es = 0
-    
-    nodes1 = [n for n in g1.nodes()]
-    for i, map_i in enumerate(forward_map):
-        if map_i == np.inf:
-            n_vr += 1
-        else:
-            n_vs += 1
-            sum_squares = 0
-            for a_name in g1.graph['node_attrs']:
-                diff = float(g1.nodes[nodes1[i]][a_name]) - float(g2.nodes[map_i][a_name])
-                sum_squares += np.square(diff)
-            sod_vs += np.sqrt(sum_squares)
-    for map_i in backward_map:
-        if map_i == np.inf:
-            n_vi += 1
-    
-#    idx_nodes1 = range(0, len(node1))
-    
-    edges1 = [e for e in g1.edges()]
-    for n1, n2 in edges1:
-        idx1 = nodes1.index(n1)
-        idx2 = nodes1.index(n2)
-        n1_g2 = forward_map[idx1]
-        n2_g2 = forward_map[idx2]
-        # one of the nodes is removed, thus the edge is removed.
-        if n1_g2 == np.inf or n2_g2 == np.inf:
-            n_er += 1
-        # corresponding edge is in g2.
-        elif (n1_g2, n2_g2) in g2.edges():
-            n_es += 1
-            sum_squares = 0
-            for a_name in g1.graph['edge_attrs']:
-                diff = float(g1.edges[n1, n2][a_name]) - float(g2.nodes[n1_g2, n2_g2][a_name])
-                sum_squares += np.square(diff)
-            sod_es += np.sqrt(sum_squares)
-        elif (n2_g2, n1_g2) in g2.edges():
-            n_es += 1
-            sum_squares = 0
-            for a_name in g1.graph['edge_attrs']:
-                diff = float(g1.edges[n2, n1][a_name]) - float(g2.nodes[n2_g2, n1_g2][a_name])
-                sum_squares += np.square(diff)
-            sod_es += np.sqrt(sum_squares)
-        # corresponding nodes are in g2, however the edge is removed.
-        else:
-            n_er += 1
-    n_ei = nx.number_of_edges(g2) - n_es
-        
-    return n_vi, n_vr, sod_vs, n_ei, n_er, sod_es
-
-
-if __name__ == '__main__':
-    print('check test_ged.py')
\ No newline at end of file
diff --git a/gklearn/preimage/iam.py b/gklearn/preimage/iam.py
deleted file mode 100644
index f3e2165..0000000
--- a/gklearn/preimage/iam.py
+++ /dev/null
@@ -1,775 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Apr 26 11:49:12 2019
-
-Iterative alternate minimizations using GED.
-@author: ljia
-"""
-import numpy as np
-import random
-import networkx as nx
-from tqdm import tqdm
-
-from gklearn.utils.graphdataset import get_dataset_attributes
-from gklearn.utils.utils import graph_isIdentical, get_node_labels, get_edge_labels
-from gklearn.preimage.ged import GED, ged_median
-
-
-def iam_upgraded(Gn_median, Gn_candidate, c_ei=3, c_er=3, c_es=1, ite_max=50, 
-        epsilon=0.001, node_label='atom', edge_label='bond_type', 
-        connected=False, removeNodes=True, allBestInit=False, allBestNodes=False,
-        allBestEdges=False, allBestOutput=False,
-        params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
-                    'edit_cost_constant': [], 'stabilizer': None, 
-                    'algo_options': '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'}):
-    """See my name, then you know what I do.
-    """
-#    Gn_median = Gn_median[0:10]
-#    Gn_median = [nx.convert_node_labels_to_integers(g) for g in Gn_median]
-    node_ir = np.inf # corresponding to the node remove and insertion.
-    label_r = 'thanksdanny' # the label for node remove. # @todo: make this label unrepeatable.
-    ds_attrs = get_dataset_attributes(Gn_median + Gn_candidate, 
-                                      attr_names=['edge_labeled', 'node_attr_dim', 'edge_attr_dim'], 
-                                      edge_label=edge_label)
-    node_label_set = get_node_labels(Gn_median, node_label)
-    edge_label_set = get_edge_labels(Gn_median, edge_label)
-
-    
-    def generate_graph(G, pi_p_forward):
-        G_new_list = [G.copy()] # all "best" graphs generated in this iteration.
-#        nx.draw_networkx(G)
-#        import matplotlib.pyplot as plt
-#        plt.show()
-#        print(pi_p_forward)
-                    
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
-                h_i0_list = []
-                label_list = []
-                for label in node_label_set:
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn_median):
-                        pi_i = pi_p_forward[idx][ndi]
-                        if pi_i != node_ir and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # case when the node is to be removed.
-                if removeNodes:
-                    h_i0_remove = 0 # @todo: maybe this can be added to the node_label_set above.
-                    for idx, g in enumerate(Gn_median):
-                        pi_i = pi_p_forward[idx][ndi]
-                        if pi_i == node_ir:
-                            h_i0_remove += 1
-                    h_i0_list.append(h_i0_remove)
-                    label_list.append(label_r)
-                # get the best labels.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                if allBestNodes: # choose all best graphs.                    
-                    nlabel_best = [label_list[idx] for idx in idx_max]
-                    # generate "best" graphs with regard to "best" node labels.
-                    G_new_list_nd = []
-                    for g in G_new_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
-                        for nl in nlabel_best:
-                            g_tmp = g.copy()
-                            if nl == label_r:
-                                g_tmp.remove_node(nd)
-                            else:
-                                g_tmp.nodes[nd][node_label] = nl
-                            G_new_list_nd.append(g_tmp)
-    #                            nx.draw_networkx(g_tmp)
-    #                            import matplotlib.pyplot as plt
-    #                            plt.show()
-    #                            print(g_tmp.nodes(data=True))
-    #                            print(g_tmp.edges(data=True))
-                    G_new_list = [ggg.copy() for ggg in G_new_list_nd]
-                else: 
-                    # choose one of the best randomly.
-                    idx_rdm = random.randint(0, len(idx_max) - 1)
-                    best_label = label_list[idx_max[idx_rdm]]
-                    h_i0_max = h_i0_list[idx_max[idx_rdm]]
-
-                    g_new = G_new_list[0]
-                    if best_label == label_r:
-                        g_new.remove_node(nd) 
-                    else:
-                        g_new.nodes[nd][node_label] = best_label
-                    G_new_list = [g_new]
-        else: # labels are non-symbolic
-            for ndi, (nd, _) in enumerate(G.nodes(data=True)):
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn_median):
-                    pi_i = pi_p_forward[idx][ndi]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new_list[0].nodes[nd]['attributes'] = phi_i_bar
-                
-#        for g in G_new_list:
-#            import matplotlib.pyplot as plt 
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            G_new_list_edge = []
-            for g_new in G_new_list:
-                nd_list = [n for n in g_new.nodes()]
-                g_tmp_list = [g_new.copy()]
-                for nd1i in range(nx.number_of_nodes(g_new)): 
-                    nd1 = nd_list[nd1i]# @todo: not just edges, but all pairs of nodes
-                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_new)):
-                        nd2 = nd_list[nd2i]
-#                for nd1, nd2, _ in g_new.edges(data=True): 
-                        h_ij0_list = []
-                        label_list = []
-                        for label in edge_label_set:
-                            h_ij0 = 0
-                            for idx, g in enumerate(Gn_median):
-                                pi_i = pi_p_forward[idx][nd1i]
-                                pi_j = pi_p_forward[idx][nd2i]
-                                h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                           g.has_edge(pi_i, pi_j) and 
-                                           g.edges[pi_i, pi_j][edge_label] == label)
-                                h_ij0 += h_ij0_p
-                            h_ij0_list.append(h_ij0)
-                            label_list.append(label)
-                        
-                        # get the best labels.
-                        idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                        if allBestEdges: # choose all best graphs.
-                            elabel_best = [label_list[idx] for idx in idx_max]
-                            h_ij0_max = [h_ij0_list[idx] for idx in idx_max]
-                            # generate "best" graphs with regard to "best" node labels.
-                            G_new_list_ed = []
-                            for g_tmp in g_tmp_list: # @todo: seems it can be simplified. The G_new_list will only contain 1 graph for now.
-                                for idxl, el in enumerate(elabel_best):
-                                    g_tmp_copy = g_tmp.copy()
-                                    # check whether a_ij is 0 or 1.
-                                    sij_norm = 0
-                                    for idx, g in enumerate(Gn_median):
-                                        pi_i = pi_p_forward[idx][nd1i]
-                                        pi_j = pi_p_forward[idx][nd2i]
-                                        if g.has_node(pi_i) and g.has_node(pi_j) and \
-                                            g.has_edge(pi_i, pi_j):
-                                           sij_norm += 1
-                                    if h_ij0_max[idxl] > len(Gn_median) * c_er / c_es + \
-                                        sij_norm * (1 - (c_er + c_ei) / c_es):
-                                        if not g_tmp_copy.has_edge(nd1, nd2):
-                                            g_tmp_copy.add_edge(nd1, nd2)
-                                        g_tmp_copy.edges[nd1, nd2][edge_label] = elabel_best[idxl]
-                                    else:
-                                        if g_tmp_copy.has_edge(nd1, nd2):
-                                            g_tmp_copy.remove_edge(nd1, nd2)
-                                    G_new_list_ed.append(g_tmp_copy)
-                            g_tmp_list = [ggg.copy() for ggg in G_new_list_ed]
-                        else: # choose one of the best randomly.
-                            idx_rdm = random.randint(0, len(idx_max) - 1)
-                            best_label = label_list[idx_max[idx_rdm]]
-                            h_ij0_max = h_ij0_list[idx_max[idx_rdm]]
-                                   
-                            # check whether a_ij is 0 or 1.
-                            sij_norm = 0
-                            for idx, g in enumerate(Gn_median):
-                                pi_i = pi_p_forward[idx][nd1i]
-                                pi_j = pi_p_forward[idx][nd2i]
-                                if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                                   sij_norm += 1
-                            if h_ij0_max > len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                                if not g_new.has_edge(nd1, nd2):
-                                    g_new.add_edge(nd1, nd2)
-                                g_new.edges[nd1, nd2][edge_label] = best_label
-                            else:
-#                            elif h_ij0_max < len(Gn_median) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                                if g_new.has_edge(nd1, nd2):
-                                    g_new.remove_edge(nd1, nd2) 
-                            g_tmp_list = [g_new]
-                G_new_list_edge += g_tmp_list
-            G_new_list = [ggg.copy() for ggg in G_new_list_edge]    
-                    
-               
-        else: # if edges are unlabeled
-            # @todo: is this even right? G or g_tmp? check if the new one is right
-            # @todo: works only for undirected graphs.
-            
-            for g_tmp in G_new_list:
-                nd_list = [n for n in g_tmp.nodes()]
-                for nd1i in range(nx.number_of_nodes(g_tmp)):
-                    nd1 = nd_list[nd1i]
-                    for nd2i in range(nd1i + 1, nx.number_of_nodes(g_tmp)):
-                        nd2 = nd_list[nd2i]
-                        sij_norm = 0
-                        for idx, g in enumerate(Gn_median):
-                            pi_i = pi_p_forward[idx][nd1i]
-                            pi_j = pi_p_forward[idx][nd2i]
-                            if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                               sij_norm += 1
-                        if sij_norm > len(Gn_median) * c_er / (c_er + c_ei):
-                            # @todo: should we consider if nd1 and nd2 in g_tmp?
-                            # or just add the edge anyway?
-                            if g_tmp.has_node(nd1) and g_tmp.has_node(nd2) \
-                                and not g_tmp.has_edge(nd1, nd2):
-                                g_tmp.add_edge(nd1, nd2)
-                        else: # @todo: which to use?
-#                        elif sij_norm < len(Gn_median) * c_er / (c_er + c_ei):
-                            if g_tmp.has_edge(nd1, nd2):
-                                g_tmp.remove_edge(nd1, nd2)
-                        # do not change anything when equal.     
-                        
-#        for i, g in enumerate(G_new_list):
-#            import matplotlib.pyplot as plt 
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-        
-#        # find the best graph generated in this iteration and update pi_p.
-        # @todo: should we update all graphs generated or just the best ones?
-        dis_list, pi_forward_list = ged_median(G_new_list, Gn_median, 
-            params_ged=params_ged)
-        # @todo: should we remove the identical and connectivity check? 
-        # Don't know which is faster.
-        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-            G_new_list, idx_list = remove_duplicates(G_new_list)
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-            dis_list = [dis_list[idx] for idx in idx_list]
-#        if connected == True:
-#            G_new_list, idx_list = remove_disconnected(G_new_list)
-#            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-#        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
-#        dis_min = dis_list[idx_min_tmp_list[0]]
-#        pi_forward_list = [pi_forward_list[idx] for idx in idx_min_list]
-#        G_new_list = [G_new_list[idx] for idx in idx_min_list] 
-        
-#        for g in G_new_list:
-#            import matplotlib.pyplot as plt 
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-        
-        return G_new_list, pi_forward_list, dis_list
-    
-    
-    def best_median_graphs(Gn_candidate, pi_all_forward, dis_all):
-        idx_min_list = np.argwhere(dis_all == np.min(dis_all)).flatten().tolist()
-        dis_min = dis_all[idx_min_list[0]]
-        pi_forward_min_list = [pi_all_forward[idx] for idx in idx_min_list]
-        G_min_list = [Gn_candidate[idx] for idx in idx_min_list]
-        return G_min_list, pi_forward_min_list, dis_min
-    
-    
-    def iteration_proc(G, pi_p_forward, cur_sod):
-        G_list = [G]
-        pi_forward_list = [pi_p_forward]
-        old_sod = cur_sod * 2
-        sod_list = [cur_sod]
-        dis_list = [cur_sod]
-        # iterations.
-        itr = 0
-        # @todo: what if difference == 0?
-#        while itr < ite_max and (np.abs(old_sod - cur_sod) > epsilon or
-#                                 np.abs(old_sod - cur_sod) == 0):
-        while itr < ite_max and np.abs(old_sod - cur_sod) > epsilon:
-#        while itr < ite_max:
-#        for itr in range(0, 5): # the convergence condition?
-            print('itr_iam is', itr)
-            G_new_list = []
-            pi_forward_new_list = []
-            dis_new_list = []
-            for idx, g in enumerate(G_list):
-#                label_set = get_node_labels(Gn_median + [g], node_label)                        
-                G_tmp_list, pi_forward_tmp_list, dis_tmp_list = generate_graph(
-                    g, pi_forward_list[idx])
-                G_new_list += G_tmp_list
-                pi_forward_new_list += pi_forward_tmp_list
-                dis_new_list += dis_tmp_list
-            # @todo: need to remove duplicates here?
-            G_list = [ggg.copy() for ggg in G_new_list]
-            pi_forward_list = [pitem.copy() for pitem in pi_forward_new_list]
-            dis_list = dis_new_list[:]
-            
-            old_sod = cur_sod
-            cur_sod = np.min(dis_list)
-            sod_list.append(cur_sod)
-            
-            itr += 1
-        
-        # @todo: do we return all graphs or the best ones?
-        # get the best ones of the generated graphs.
-        G_list, pi_forward_list, dis_min = best_median_graphs(
-            G_list, pi_forward_list, dis_list)
-        
-        if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-            G_list, idx_list = remove_duplicates(G_list)
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-#            dis_list = [dis_list[idx] for idx in idx_list]
-            
-#        import matplotlib.pyplot as plt
-#        for g in G_list:             
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        print('\nsods:', sod_list, '\n')
-            
-        return G_list, pi_forward_list, dis_min, sod_list
-    
-    
-    def remove_duplicates(Gn):
-        """Remove duplicate graphs from list.
-        """
-        Gn_new = []
-        idx_list = []
-        for idx, g in enumerate(Gn):
-            dupl = False
-            for g_new in Gn_new:
-                if graph_isIdentical(g_new, g):
-                    dupl = True
-                    break
-            if not dupl:
-                Gn_new.append(g)
-                idx_list.append(idx)
-        return Gn_new, idx_list
-    
-    
-    def remove_disconnected(Gn):
-        """Remove disconnected graphs from list.
-        """
-        Gn_new = []
-        idx_list = []
-        for idx, g in enumerate(Gn):
-            if nx.is_connected(g):
-                Gn_new.append(g)
-                idx_list.append(idx)
-        return Gn_new, idx_list
-
-    
-    ###########################################################################
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-    dis_list, pi_forward_all = ged_median(Gn_candidate, Gn_median,
-        params_ged=params_ged, parallel=True)
-    print('finish computing GEDs.')
-    # find all smallest distances.
-    if allBestInit: # try all best init graphs.
-        idx_min_list = range(len(dis_list))
-        dis_min = dis_list
-    else:
-        idx_min_list = np.argwhere(dis_list == np.min(dis_list)).flatten().tolist()
-        dis_min = [dis_list[idx_min_list[0]]] * len(idx_min_list)
-        idx_min_rdm = random.randint(0, len(idx_min_list) - 1)
-        idx_min_list = [idx_min_list[idx_min_rdm]]
-    sod_set_median = np.min(dis_min)
-        
-    
-    # phase 2: iteration.
-    G_list = []
-    dis_list = []
-    pi_forward_list = []
-    G_set_median_list = []
-#    sod_list = []
-    for idx_tmp, idx_min in enumerate(idx_min_list):
-#        print('idx_min is', idx_min)
-        G = Gn_candidate[idx_min].copy()
-        G_set_median_list.append(G.copy())
-        # list of edit operations.        
-        pi_p_forward = pi_forward_all[idx_min]
-#        pi_p_backward = pi_all_backward[idx_min]        
-        Gi_list, pi_i_forward_list, dis_i_min, sod_list = iteration_proc(G, 
-                                                pi_p_forward, dis_min[idx_tmp])            
-        G_list += Gi_list
-        dis_list += [dis_i_min] * len(Gi_list)
-        pi_forward_list += pi_i_forward_list
-        
-        
-    if ds_attrs['node_attr_dim'] == 0 and ds_attrs['edge_attr_dim'] == 0:
-        G_list, idx_list = remove_duplicates(G_list)
-        dis_list = [dis_list[idx] for idx in idx_list]
-        pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-    if connected == True:
-        G_list_con, idx_list = remove_disconnected(G_list)
-        # if there is no connected graphs at all, then remain the disconnected ones.
-        if len(G_list_con) > 0: # @todo: ??????????????????????????
-            G_list = G_list_con
-            dis_list = [dis_list[idx] for idx in idx_list]
-            pi_forward_list = [pi_forward_list[idx] for idx in idx_list]
-
-#    import matplotlib.pyplot as plt 
-#    for g in G_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    
-    # get the best median graphs
-    G_gen_median_list, pi_forward_min_list, sod_gen_median = best_median_graphs(
-            G_list, pi_forward_list, dis_list)
-#    for g in G_gen_median_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    
-    if not allBestOutput:
-        # randomly choose one graph.
-        idx_rdm = random.randint(0, len(G_gen_median_list) - 1)
-        G_gen_median_list = [G_gen_median_list[idx_rdm]]
-    
-    return G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median
-
-
-def iam_bash(Gn_names, edit_cost_constant, cost='CONSTANT', initial_solutions=1,
-             dataset='monoterpenoides',
-             graph_dir=''):
-    """Compute the iam by c++ implementation (gedlib) through bash.
-    """
-    import os
-    import time
-
-    def createCollectionFile(Gn_names, y, filename):
-        """Create collection file.
-        """
-        dirname_ds = os.path.dirname(filename)
-        if dirname_ds != '':
-            dirname_ds += '/'
-            if not os.path.exists(dirname_ds) :
-                os.makedirs(dirname_ds)
-                
-        with open(filename + '.xml', 'w') as fgroup:
-            fgroup.write("<?xml version=\"1.0\"?>")
-            fgroup.write("\n<!DOCTYPE GraphCollection SYSTEM \"http://www.inf.unibz.it/~blumenthal/dtd/GraphCollection.dtd\">")
-            fgroup.write("\n<GraphCollection>")
-            for idx, fname in enumerate(Gn_names):
-                fgroup.write("\n\t<graph file=\"" + fname + "\" class=\"" + str(y[idx]) + "\"/>")
-            fgroup.write("\n</GraphCollection>")
-            fgroup.close()
-
-    tmp_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/'
-    fn_collection = tmp_dir + 'collection.' + str(time.time()) + str(random.randint(0, 1e9))
-    createCollectionFile(Gn_names, ['dummy'] * len(Gn_names), fn_collection)
-#    fn_collection = tmp_dir + 'collection_for_debug'
-#    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/gxl'
-    
-#    if dataset == 'Letter-high' or dataset == 'Fingerprint':
-#        dataset = 'letter'
-    command = 'GEDLIB_HOME=\'/media/ljia/DATA/research-repo/codes/Linlin/gedlib\'\n'
-    command += 'LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GEDLIB_HOME/lib\n'
-    command += 'export LD_LIBRARY_PATH\n'
-    command += 'cd \'' + os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/bin\'\n'
-    command += './iam_for_python_bash ' + dataset + ' ' + fn_collection \
-            + ' \'' + graph_dir + '\' ' + ' ' + cost + ' ' + str(initial_solutions) + ' '
-    if edit_cost_constant is None:
-        command += 'None'
-    else:
-        for ec in edit_cost_constant:
-            command += str(ec) + ' '
-#        output = os.system(command)
-    stream = os.popen(command)
-
-    output = stream.readlines()    
-#    print(output)
-    sod_sm = float(output[0].strip())
-    sod_gm = float(output[1].strip())
-    
-    fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-    fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-    
-    return sod_sm, sod_gm, fname_sm, fname_gm
-
-
-
-###############################################################################
-# Old implementations.
-    
-def iam(Gn, c_ei=3, c_er=3, c_es=1, node_label='atom', edge_label='bond_type', 
-        connected=True):
-    """See my name, then you know what I do.
-    """
-#    Gn = Gn[0:10]
-    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-    pi_p = []
-    pi_all = []
-    for idx1, G_p in enumerate(Gn):
-        dist_sum = 0
-        pi_all.append([])
-        for idx2, G_p_prime in enumerate(Gn):
-            dist_tmp, pi_tmp, _ = GED(G_p, G_p_prime)
-            pi_all[idx1].append(pi_tmp)
-            dist_sum += dist_tmp
-        if dist_sum < dis_min:
-            dis_min = dist_sum
-            G = G_p.copy()
-            idx_min = idx1
-    # list of edit operations.        
-    pi_p = pi_all[idx_min]
-            
-    # phase 2: iteration.
-    ds_attrs = get_dataset_attributes(Gn, attr_names=['edge_labeled', 'node_attr_dim'], 
-                                      edge_label=edge_label)
-    for itr in range(0, 10): # @todo: the convergence condition?
-        G_new = G.copy()
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for nd, _ in G.nodes(data=True):
-                h_i0_list = []
-                label_list = []
-                for label in get_node_labels(Gn, node_label):
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p[idx][nd]
-                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
-        else: # labels are non-symbolic
-            for nd, _ in G.nodes(data=True):
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new.nodes[nd]['attributes'] = phi_i_bar
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            for nd1, nd2, _ in G.edges(data=True):
-                h_ij0_list = []
-                label_list = []
-                for label in get_edge_labels(Gn, edge_label):
-                    h_ij0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p[idx][nd1]
-                        pi_j = pi_p[idx][nd2]
-                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                   g.has_edge(pi_i, pi_j) and 
-                                   g.edges[pi_i, pi_j][edge_label] == label)
-                        h_ij0 += h_ij0_p
-                    h_ij0_list.append(h_ij0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                h_ij0_max = h_ij0_list[idx_max[0]]
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                best_label = label_list[idx_max[idx_rdm]]
-                       
-                # check whether a_ij is 0 or 1.
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd1]
-                    pi_j = pi_p[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                    G_new.edges[nd1, nd2][edge_label] = best_label
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)                
-        else: # if edges are unlabeled
-            for nd1, nd2, _ in G.edges(data=True):
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p[idx][nd1]
-                    pi_j = pi_p[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if sij_norm > len(Gn) * c_er / (c_er + c_ei):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)
-                        
-        G = G_new.copy()
-        
-        # update pi_p
-        pi_p = []
-        for idx1, G_p in enumerate(Gn):
-            dist_tmp, pi_tmp, _ = GED(G, G_p)
-            pi_p.append(pi_tmp)
-    
-    return G
-
-# --------------------------- These are tests --------------------------------#
-    
-def test_iam_with_more_graphs_as_init(Gn, G_candidate, c_ei=3, c_er=3, c_es=1, 
-                                      node_label='atom', edge_label='bond_type'):
-    """See my name, then you know what I do.
-    """
-#    Gn = Gn[0:10]
-    Gn = [nx.convert_node_labels_to_integers(g) for g in Gn]
-    
-    # phase 1: initilize.
-    # compute set-median.
-    dis_min = np.inf
-#    pi_p = []
-    pi_all_forward = []
-    pi_all_backward = []
-    for idx1, G_p in tqdm(enumerate(G_candidate), desc='computing GEDs', file=sys.stdout):
-        dist_sum = 0
-        pi_all_forward.append([])
-        pi_all_backward.append([])
-        for idx2, G_p_prime in enumerate(Gn):
-            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G_p, G_p_prime)
-            pi_all_forward[idx1].append(pi_tmp_forward)
-            pi_all_backward[idx1].append(pi_tmp_backward)
-            dist_sum += dist_tmp
-        if dist_sum <= dis_min:
-            dis_min = dist_sum
-            G = G_p.copy()
-            idx_min = idx1
-    # list of edit operations.        
-    pi_p_forward = pi_all_forward[idx_min]
-    pi_p_backward = pi_all_backward[idx_min]
-            
-    # phase 2: iteration.
-    ds_attrs = get_dataset_attributes(Gn + [G], attr_names=['edge_labeled', 'node_attr_dim'], 
-                                      edge_label=edge_label)
-    label_set = get_node_labels(Gn + [G], node_label)
-    for itr in range(0, 10): # @todo: the convergence condition?
-        G_new = G.copy()
-        # update vertex labels.
-        # pre-compute h_i0 for each label.
-#        for label in get_node_labels(Gn, node_label):
-#            print(label)
-#        for nd in G.nodes(data=True):
-#            pass
-        if not ds_attrs['node_attr_dim']: # labels are symbolic
-            for nd in G.nodes():
-                h_i0_list = []
-                label_list = []
-                for label in label_set:
-                    h_i0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd]
-                        if g.has_node(pi_i) and g.nodes[pi_i][node_label] == label:
-                            h_i0 += 1
-                    h_i0_list.append(h_i0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_i0_list == np.max(h_i0_list)).flatten().tolist()
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                G_new.nodes[nd][node_label] = label_list[idx_max[idx_rdm]]
-        else: # labels are non-symbolic
-            for nd in G.nodes():
-                Si_norm = 0
-                phi_i_bar = np.array([0.0 for _ in range(ds_attrs['node_attr_dim'])])
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p_forward[idx][nd]
-                    if g.has_node(pi_i): #@todo: what if no g has node? phi_i_bar = 0?
-                        Si_norm += 1
-                        phi_i_bar += np.array([float(itm) for itm in g.nodes[pi_i]['attributes']])                
-                phi_i_bar /= Si_norm
-                G_new.nodes[nd]['attributes'] = phi_i_bar
-                                            
-        # update edge labels and adjacency matrix.
-        if ds_attrs['edge_labeled']:
-            for nd1, nd2, _ in G.edges(data=True):
-                h_ij0_list = []
-                label_list = []
-                for label in get_edge_labels(Gn, edge_label):
-                    h_ij0 = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd1]
-                        pi_j = pi_p_forward[idx][nd2]
-                        h_ij0_p = (g.has_node(pi_i) and g.has_node(pi_j) and 
-                                   g.has_edge(pi_i, pi_j) and 
-                                   g.edges[pi_i, pi_j][edge_label] == label)
-                        h_ij0 += h_ij0_p
-                    h_ij0_list.append(h_ij0)
-                    label_list.append(label)
-                # choose one of the best randomly.
-                idx_max = np.argwhere(h_ij0_list == np.max(h_ij0_list)).flatten().tolist()
-                h_ij0_max = h_ij0_list[idx_max[0]]
-                idx_rdm = random.randint(0, len(idx_max) - 1)
-                best_label = label_list[idx_max[idx_rdm]]
-                       
-                # check whether a_ij is 0 or 1.
-                sij_norm = 0
-                for idx, g in enumerate(Gn):
-                    pi_i = pi_p_forward[idx][nd1]
-                    pi_j = pi_p_forward[idx][nd2]
-                    if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                       sij_norm += 1
-                if h_ij0_max > len(Gn) * c_er / c_es + sij_norm * (1 - (c_er + c_ei) / c_es):
-                    if not G_new.has_edge(nd1, nd2):
-                        G_new.add_edge(nd1, nd2)
-                    G_new.edges[nd1, nd2][edge_label] = best_label
-                else:
-                    if G_new.has_edge(nd1, nd2):
-                        G_new.remove_edge(nd1, nd2)                
-        else: # if edges are unlabeled
-            # @todo: works only for undirected graphs.
-            for nd1 in range(nx.number_of_nodes(G)):
-                for nd2 in range(nd1 + 1, nx.number_of_nodes(G)):
-                    sij_norm = 0
-                    for idx, g in enumerate(Gn):
-                        pi_i = pi_p_forward[idx][nd1]
-                        pi_j = pi_p_forward[idx][nd2]
-                        if g.has_node(pi_i) and g.has_node(pi_j) and g.has_edge(pi_i, pi_j):
-                           sij_norm += 1
-                    if sij_norm > len(Gn) * c_er / (c_er + c_ei):
-                        if not G_new.has_edge(nd1, nd2):
-                            G_new.add_edge(nd1, nd2)
-                    elif sij_norm < len(Gn) * c_er / (c_er + c_ei):
-                        if G_new.has_edge(nd1, nd2):
-                            G_new.remove_edge(nd1, nd2)
-                    # do not change anything when equal.
-                        
-        G = G_new.copy()
-        
-        # update pi_p
-        pi_p_forward = []
-        for G_p in Gn:
-            dist_tmp, pi_tmp_forward, pi_tmp_backward = GED(G, G_p)
-            pi_p_forward.append(pi_tmp_forward)
-    
-    return G
-
-
-###############################################################################
-
-if __name__ == '__main__':
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG.mat',
-          'extra_params': {'am_sp_al_nl_el': [0, 0, 3, 1, 2]}}  # node/edge symb
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
-    iam(Gn)
\ No newline at end of file
diff --git a/gklearn/preimage/knn.py b/gklearn/preimage/knn.py
deleted file mode 100644
index c179287..0000000
--- a/gklearn/preimage/knn.py
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Jan 10 13:22:04 2020
-
-@author: ljia
-"""
-import numpy as np
-#import matplotlib.pyplot as plt
-from tqdm import tqdm
-import random
-#import csv
-from shutil import copyfile
-import os
-
-from gklearn.preimage.iam import iam_bash
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-from gklearn.preimage.ged import GED
-from gklearn.preimage.utils import get_same_item_indices
-
-def test_knn():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-#    gkernel = 'treeletkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    ds_name = 'mono'
-    dir_output = 'results/knn/'
-    graph_dir = os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'
-    
-    k_nn = 1
-    percent = 0.1
-    repeats = 50
-    edit_cost_constant = [3, 3, 1, 3, 3, 1]
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    sod_sm_list_list
-    for repeat in range(0, repeats):
-        print('\n---------------------------------')
-        print('repeat =', repeat)
-        accuracy_sm_list = []
-        accuracy_gm_list = []
-        sod_sm_list = []
-        sod_gm_list = []
-        
-        random.seed(repeat)
-        set_median_list = []
-        gen_median_list = []
-        train_y_set = []
-        for y, values in y_idx.items():
-            print('\ny =', y)
-            size_median_set = int(len(values) * percent)
-            median_set_idx = random.sample(values, size_median_set)
-            print('median set: ', median_set_idx)
-            
-            # compute set median and gen median using IAM (C++ through bash).
-    #        Gn_median = [Gn[idx] for idx in median_set_idx]
-            group_fnames = [Gn[g].graph['filename'] for g in median_set_idx]
-            sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constant,
-                                                          graph_dir=graph_dir)
-            print('sod_sm, sod_gm:', sod_sm, sod_gm)
-            sod_sm_list.append(sod_sm)
-            sod_gm_list.append(sod_gm)
-            fname_sm_new = dir_output + 'medians/set_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-            copyfile(fname_sm, fname_sm_new)
-            fname_gm_new = dir_output + 'medians/gen_median.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-            copyfile(fname_gm, fname_gm_new)
-            set_median_list.append(loadGXL(fname_sm_new))
-            gen_median_list.append(loadGXL(fname_gm_new))
-            train_y_set.append(int(y))
-        
-        print(sod_sm, sod_gm)
-        
-        # do 1-nn.
-        test_y_set = [int(y) for y in y_all]
-        accuracy_sm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
-        accuracy_gm = knn(set_median_list, train_y_set, Gn, test_y_set, k=k_nn, distance='ged')
-        accuracy_sm_list.append(accuracy_sm)
-        accuracy_gm_list.append(accuracy_gm)
-        print('current accuracy sm and gm:', accuracy_sm, accuracy_gm)
-        
-    # output
-    accuracy_sm_mean = np.mean(accuracy_sm_list)
-    accuracy_gm_mean = np.mean(accuracy_gm_list)
-    print('\ntotal average accuracy sm and gm:', accuracy_sm_mean, accuracy_gm_mean)
-
-        
-def knn(train_set, train_y_set, test_set, test_y_set, k=1, distance='ged'):
-    if k == 1 and distance == 'ged':
-        algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                    'algo_options': algo_options, 'stabilizer': None}
-        accuracy = 0
-        for idx_test, g_test in tqdm(enumerate(test_set), desc='computing 1-nn', 
-                                     file=sys.stdout):
-            dis = np.inf
-            for idx_train, g_train in enumerate(train_set):
-                dis_cur, _, _ = GED(g_test, g_train, **params_ged)
-                if dis_cur < dis:
-                    dis = dis_cur
-                    test_y_cur = train_y_set[idx_train]
-            if test_y_cur == test_y_set[idx_test]:
-                accuracy += 1
-        accuracy = accuracy / len(test_set)
-        
-    return accuracy
-
-    
-
-if __name__ == '__main__':
-    test_knn()
\ No newline at end of file
diff --git a/gklearn/preimage/libs.py b/gklearn/preimage/libs.py
deleted file mode 100644
index 76005c6..0000000
--- a/gklearn/preimage/libs.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import sys
-import pathlib
-
-# insert gedlibpy library.
-sys.path.insert(0, "../../../")
-from gedlibpy import librariesImport, gedlibpy
diff --git a/gklearn/preimage/median.py b/gklearn/preimage/median.py
deleted file mode 100644
index 1c5bb0f..0000000
--- a/gklearn/preimage/median.py
+++ /dev/null
@@ -1,218 +0,0 @@
-import sys
-sys.path.insert(0, "../")
-#import pathlib
-import numpy as np
-import networkx as nx
-import time
-
-from gedlibpy import librariesImport, gedlibpy
-#import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-from gklearn.utils.graphfiles import loadDataset
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, savepath=''):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['attributes'][0]),
-           float(graph.node[n]['attributes'][1])])
-    nx.draw_networkx(graph, pos)
-    if savepath != '':
-        plt.savefig(savepath + str(time.time()) + '.eps', format='eps', dpi=300)
-    plt.show()
-    plt.clf()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    script.PySetEditCost("LETTER")
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = script.PyGetAllGraphIds()
-    median, sod = compute_median(script,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
-
-
-#if __name__ == '__main__':
-#    # test draw_Letter_graph
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    print(y_all)
-#    for g in Gn:
-#        draw_Letter_graph(g)
\ No newline at end of file
diff --git a/gklearn/preimage/median_benoit.py b/gklearn/preimage/median_benoit.py
deleted file mode 100644
index 6712196..0000000
--- a/gklearn/preimage/median_benoit.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-import librariesImport
-import script
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph,pos)
-    plt.show()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    script.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    script.PySetEditCost("LETTER")
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = script.PyGetAllGraphIds()
-    median, sod = compute_median(script,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_graph_estimator.py b/gklearn/preimage/median_graph_estimator.py
deleted file mode 100644
index b70cc61..0000000
--- a/gklearn/preimage/median_graph_estimator.py
+++ /dev/null
@@ -1,826 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Mar 16 18:04:55 2020
-
-@author: ljia
-"""
-import numpy as np
-from gklearn.preimage.common_types import AlgorithmState
-from gklearn.preimage import misc
-from gklearn.preimage.timer import Timer
-from gklearn.utils.utils import graph_isIdentical
-import time
-from tqdm import tqdm
-import sys
-import networkx as nx
-
-
-class MedianGraphEstimator(object):
-	
-	def __init__(self, ged_env, constant_node_costs):
-		"""Constructor.
-		
-		Parameters
-		----------
-		ged_env : gklearn.gedlib.gedlibpy.GEDEnv
-			Initialized GED environment. The edit costs must be set by the user.
-			
-		constant_node_costs : Boolean
-			Set to True if the node relabeling costs are constant.
-		"""
-		self.__ged_env = ged_env
-		self.__init_method = 'BRANCH_FAST'
-		self.__init_options = ''
-		self.__descent_method = 'BRANCH_FAST'
-		self.__descent_options = ''
-		self.__refine_method = 'IPFP'
-		self.__refine_options = ''
-		self.__constant_node_costs = constant_node_costs
-		self.__labeled_nodes = (ged_env.get_num_node_labels() > 1)
-		self.__node_del_cost = ged_env.get_node_del_cost(ged_env.get_node_label(1))
-		self.__node_ins_cost = ged_env.get_node_ins_cost(ged_env.get_node_label(1))
-		self.__labeled_edges = (ged_env.get_num_edge_labels() > 1)
-		self.__edge_del_cost = ged_env.get_edge_del_cost(ged_env.get_edge_label(1))
-		self.__edge_ins_cost = ged_env.get_edge_ins_cost(ged_env.get_edge_label(1))
-		self.__init_type = 'RANDOM'
-		self.__num_random_inits = 10
-		self.__desired_num_random_inits = 10
-		self.__use_real_randomness = True
-		self.__seed = 0
-		self.__refine = True
-		self.__time_limit_in_sec = 0
-		self.__epsilon = 0.0001
-		self.__max_itrs = 100
-		self.__max_itrs_without_update = 3
-		self.__num_inits_increase_order = 10
-		self.__init_type_increase_order = 'K-MEANS++'
-		self.__max_itrs_increase_order = 10
-		self.__print_to_stdout = 2
-		self.__median_id = np.inf # @todo: check
-		self.__median_node_id_prefix = '' # @todo: check
-		self.__node_maps_from_median = {}
-		self.__sum_of_distances = 0
-		self.__best_init_sum_of_distances = np.inf
-		self.__converged_sum_of_distances = np.inf
-		self.__runtime = None
-		self.__runtime_initialized = None
-		self.__runtime_converged = None
-		self.__itrs = [] # @todo: check: {} ?
-		self.__num_decrease_order = 0
-		self.__num_increase_order = 0
-		self.__num_converged_descents = 0
-		self.__state = AlgorithmState.TERMINATED
-		
-		if ged_env is None:
-			raise Exception('The GED environment pointer passed to the constructor of MedianGraphEstimator is null.')
-		elif not ged_env.is_initialized():
-			raise Exception('The GED environment is uninitialized. Call gedlibpy.GEDEnv.init() before passing it to the constructor of MedianGraphEstimator.')
-	
-	
-	def set_options(self, options):
-		"""Sets the options of the estimator.
-
-		Parameters
-		----------
-		options : string
-			String that specifies with which options to run the estimator.
-		"""
-		self.__set_default_options()
-		options_map = misc.options_string_to_options_map(options)
-		for opt_name, opt_val in options_map.items():
-			if opt_name == 'init-type':
-				self.__init_type = opt_val
-				if opt_val != 'MEDOID' and opt_val != 'RANDOM' and opt_val != 'MIN' and opt_val != 'MAX' and opt_val != 'MEAN':
-					raise Exception('Invalid argument ' + opt_val + ' for option init-type. Usage: options = "[--init-type RANDOM|MEDOID|EMPTY|MIN|MAX|MEAN] [...]"')
-			elif opt_name == 'random-inits':
-				try:
-					self.__num_random_inits = int(opt_val)
-					self.__desired_num_random_inits = self.__num_random_inits
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-
-				if self.__num_random_inits <= 0:
-					raise Exception('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-	
-			elif opt_name == 'randomness':
-				if opt_val == 'PSEUDO':
-					self.__use_real_randomness = False
-	
-				elif opt_val == 'REAL':
-					self.__use_real_randomness = True
-	
-				else:
-					raise Exception('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
-	
-			elif opt_name == 'stdout':
-				if opt_val == '0':
-					self.__print_to_stdout = 0
-	
-				elif opt_val == '1':
-					self.__print_to_stdout = 1
-	
-				elif opt_val == '2':
-					self.__print_to_stdout = 2
-	
-				else:
-					raise Exception('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
-	
-			elif opt_name == 'refine':
-				if opt_val == 'TRUE':
-					self.__refine = True
-	
-				elif opt_val == 'FALSE':
-					self.__refine = False
-	
-				else:
-					raise Exception('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
-	
-			elif opt_name == 'time-limit':
-				try:
-					self.__time_limit_in_sec = float(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
-	
-			elif opt_name == 'max-itrs':
-				try:
-					self.__max_itrs = int(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
-	
-			elif opt_name == 'max-itrs-without-update':
-				try:
-					self.__max_itrs_without_update = int(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
-	
-			elif opt_name == 'seed':
-				try:
-					self.__seed = int(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
-	
-			elif opt_name == 'epsilon':
-				try:
-					self.__epsilon = float(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-	
-				if self.__epsilon <= 0:
-					raise Exception('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-	
-			elif opt_name == 'inits-increase-order':
-				try:
-					self.__num_inits_increase_order = int(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-	
-				if self.__num_inits_increase_order <= 0:
-					raise Exception('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-
-			elif opt_name == 'init-type-increase-order':
-				self.__init_type_increase_order = opt_val
-				if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
-					raise Exception('Invalid argument ' + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
-	
-			elif opt_name == 'max-itrs-increase-order':
-				try:
-					self.__max_itrs_increase_order = int(opt_val)
-	
-				except:
-					raise Exception('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
-
-			else:
-				valid_options = '[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] '
-				valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
-				valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
-				raise Exception('Invalid option "' + opt_name + '". Usage: options = "' + valid_options + '"')
- 
-		
-	def set_init_method(self, init_method, init_options=''):
-		"""Selects method to be used for computing the initial medoid graph.
-		
-		Parameters
-		----------
-		init_method : string
-			The selected method. Default: ged::Options::GEDMethod::BRANCH_UNIFORM.
-		
-		init_options : string
-			The options for the selected method. Default: "".
-		
-		Notes
-		-----
-		Has no effect unless "--init-type MEDOID" is passed to set_options().
-		"""
-		self.__init_method = init_method;
-		self.__init_options = init_options;
-	
-	
-	def set_descent_method(self, descent_method, descent_options=''):
-		"""Selects method to be used for block gradient descent..
-		
-		Parameters
-		----------
-		descent_method : string
-			The selected method. Default: ged::Options::GEDMethod::BRANCH_FAST.
-		
-		descent_options : string
-			The options for the selected method. Default: "".
-		
-		Notes
-		-----
-		Has no effect unless "--init-type MEDOID" is passed to set_options().
-		"""
-		self.__descent_method = descent_method;
-		self.__descent_options = descent_options;
-
-	
-	def set_refine_method(self, refine_method, refine_options):
-		"""Selects method to be used for improving the sum of distances and the node maps for the converged median.
-		
-		Parameters
-		----------
-		refine_method : string
-			The selected method. Default: "IPFP".
-			
-		refine_options : string 
-			The options for the selected method. Default: "".
-					
-		Notes
-		-----
-		Has no effect if "--refine FALSE" is passed to set_options().
-		"""
-		self.__refine_method = refine_method
-		self.__refine_options = refine_options
-
-	
-	def run(self, graph_ids, set_median_id, gen_median_id):
-		"""Computes a generalized median graph.
-		
-		Parameters
-		----------
-		graph_ids : list[integer]
-			The IDs of the graphs for which the median should be computed. Must have been added to the environment passed to the constructor.
-		
-		set_median_id : integer
-			The ID of the computed set-median. A dummy graph with this ID must have been added to the environment passed to the constructor. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
-
-
-		gen_median_id : integer
-			The ID of the computed generalized median. Upon termination, the computed median can be obtained via gklearn.gedlib.gedlibpy.GEDEnv.get_graph().
-		"""
-		# Sanity checks.
-		if len(graph_ids) == 0:
-			raise Exception('Empty vector of graph IDs, unable to compute median.')
-		all_graphs_empty = True
-		for graph_id in graph_ids:
-			if self.__ged_env.get_graph_num_nodes(graph_id) > 0:
-				self.__median_node_id_prefix = self.__ged_env.get_original_node_ids(graph_id)[0]
-				all_graphs_empty = False
-				break
-		if all_graphs_empty:
-			raise Exception('All graphs in the collection are empty.')
-			
-		# Start timer and record start time.
-		start = time.time()
-		timer = Timer(self.__time_limit_in_sec)
-		self.__median_id = gen_median_id
-		self.__state = AlgorithmState.TERMINATED
-		
-		# Get ExchangeGraph representations of the input graphs.
-		graphs = {}
-		for graph_id in graph_ids:
-			# @todo: get_nx_graph() function may need to be modified according to the coming code.
-			graphs[graph_id] = self.__ged_env.get_nx_graph(graph_id, True, True, False)
-# 		print(self.__ged_env.get_graph_internal_id(0))
-# 		print(graphs[0].graph)
-# 		print(graphs[0].nodes(data=True))
-# 		print(graphs[0].edges(data=True))
-# 		print(nx.adjacency_matrix(graphs[0]))
-
-			
-		# Construct initial medians.
-		medians = []
-		self.__construct_initial_medians(graph_ids, timer, medians)
-		end_init = time.time()
-		self.__runtime_initialized = end_init - start
-# 		print(medians[0].graph)
-# 		print(medians[0].nodes(data=True))
-# 		print(medians[0].edges(data=True))
-# 		print(nx.adjacency_matrix(medians[0]))
-		
-		# Reset information about iterations and number of times the median decreases and increases.
-		self.__itrs = [0] * len(medians)
-		self.__num_decrease_order = 0
-		self.__num_increase_order = 0
-		self.__num_converged_descents = 0
-		
-		# Initialize the best median.
-		best_sum_of_distances = np.inf
-		self.__best_init_sum_of_distances = np.inf
-		node_maps_from_best_median = {}
-		
-		# Run block gradient descent from all initial medians.
-		self.__ged_env.set_method(self.__descent_method, self.__descent_options)
-		for median_pos in range(0, len(medians)):
-			
-			# Terminate if the timer has expired and at least one SOD has been computed.
-			if timer.expired() and median_pos > 0:
-				break
-			
-			# Print information about current iteration.
-			if self.__print_to_stdout == 2:
-				print('\n===========================================================')
-				print('Block gradient descent for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
-				print('-----------------------------------------------------------')
-				
-			# Get reference to the median.
-			median = medians[median_pos]
-			
-			# Load initial median into the environment.
-			self.__ged_env.load_nx_graph(median, gen_median_id)
-			self.__ged_env.init(self.__ged_env.get_init_type())
-			
-			# Print information about current iteration.
-			if self.__print_to_stdout == 2:
-				progress = tqdm(desc='\rComputing initial node maps', total=len(graph_ids), file=sys.stdout)
-				
-			# Compute node maps and sum of distances for initial median.
-			self.__sum_of_distances = 0
-			self.__node_maps_from_median.clear() # @todo
-			for graph_id in graph_ids:
-				self.__ged_env.run_method(gen_median_id, graph_id)
-				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(gen_median_id, graph_id)
-# 				print(self.__node_maps_from_median[graph_id])
-				self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: the C++ implementation for this function in GedLibBind.ipp re-call get_node_map() once more, this is not neccessary.
-# 				print(self.__sum_of_distances)
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					progress.update(1)
-					
-			self.__best_init_sum_of_distances = min(self.__best_init_sum_of_distances, self.__sum_of_distances)
-			self.__ged_env.load_nx_graph(median, set_median_id)
-# 			print(self.__best_init_sum_of_distances)
-			
-			# Print information about current iteration.
-			if self.__print_to_stdout == 2:
-				print('\n')
-				
-			# Run block gradient descent from initial median.
-			converged = False
-			itrs_without_update = 0
-			while not self.__termination_criterion_met(converged, timer, self.__itrs[median_pos], itrs_without_update):
-				
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('\n===========================================================')
-					print('Iteration', str(self.__itrs[median_pos] + 1), 'for initial median', str(median_pos + 1), 'of', str(len(medians)), '.')
-					print('-----------------------------------------------------------')
-					
-				# Initialize flags that tell us what happened in the iteration.
-				median_modified = False
-				node_maps_modified = False
-				decreased_order = False
-				increased_order = False
-				
-				# Update the median. # @todo!!!!!!!!!!!!!!!!!!!!!!
-				median_modified = self.__update_median(graphs, median)
-				if not median_modified or self.__itrs[median_pos] == 0:
-					decreased_order = False
-					if not decreased_order or self.__itrs[median_pos] == 0:
-						increased_order = False
-						
-				# Update the number of iterations without update of the median.
-				if median_modified or decreased_order or increased_order:
-					itrs_without_update = 0
-				else:
-					itrs_without_update += 1
-					
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('Loading median to environment: ... ', end='')
-					
-				# Load the median into the environment.
-				# @todo: should this function use the original node label?
-				self.__ged_env.load_nx_graph(median, gen_median_id)
-				self.__ged_env.init(self.__ged_env.get_init_type())
-					
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('done.')					
-					
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('Updating induced costs: ... ', end='')
-
-				# Compute induced costs of the old node maps w.r.t. the updated median.
-				for graph_id in graph_ids:
-# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
-					# @todo: watch out if compute_induced_cost is correct, this may influence: increase/decrease order, induced_cost() in the following code.!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-					self.__ged_env.compute_induced_cost(gen_median_id, graph_id)
-# 					print('---------------------------------------')
-# 					print(self.__ged_env.get_induced_cost(gen_median_id, graph_id))
-					
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('done.')					
-					
-				# Update the node maps.
-				node_maps_modified = self.__update_node_maps() # @todo
-
-				# Update the order of the median if no improvement can be found with the current order.
-				
-				# Update the sum of distances.
-				old_sum_of_distances = self.__sum_of_distances
-				self.__sum_of_distances = 0
-				for graph_id in self.__node_maps_from_median:
-					self.__sum_of_distances += self.__ged_env.get_induced_cost(gen_median_id, graph_id) # @todo: see above.
-					
-				# Print information about current iteration.
-				if self.__print_to_stdout == 2:
-					print('Old local SOD: ', old_sum_of_distances)
-					print('New local SOD: ', self.__sum_of_distances)
-					print('Best converged SOD: ', best_sum_of_distances)
-					print('Modified median: ', median_modified)
-					print('Modified node maps: ', node_maps_modified)
-					print('Decreased order: ', decreased_order)
-					print('Increased order: ', increased_order)
-					print('===========================================================\n')
-					
-				converged = not (median_modified or node_maps_modified or decreased_order or increased_order)
-				
-				self.__itrs[median_pos] += 1
-				
-			# Update the best median.
-			if self.__sum_of_distances < self.__best_init_sum_of_distances:
-				best_sum_of_distances = self.__sum_of_distances
-				node_maps_from_best_median = self.__node_maps_from_median
-				best_median = median
-				
-			# Update the number of converged descents.
-			if converged:
-				self.__num_converged_descents += 1
-				
-		# Store the best encountered median.
-		self.__sum_of_distances = best_sum_of_distances
-		self.__node_maps_from_median = node_maps_from_best_median
-		self.__ged_env.load_nx_graph(best_median, gen_median_id)
-		self.__ged_env.init(self.__ged_env.get_init_type())
-		end_descent = time.time()
-		self.__runtime_converged = end_descent - start
-		
-		# Refine the sum of distances and the node maps for the converged median.
-		self.__converged_sum_of_distances = self.__sum_of_distances
-		if self.__refine:
-			self.__improve_sum_of_distances(timer) # @todo
-		
-		# Record end time, set runtime and reset the number of initial medians.
-		end = time.time()
-		self.__runtime = end - start
-		self.__num_random_inits = self.__desired_num_random_inits
-		
-		# Print global information.
-		if self.__print_to_stdout != 0:
-			print('\n===========================================================')
-			print('Finished computation of generalized median graph.')
-			print('-----------------------------------------------------------')
-			print('Best SOD after initialization: ', self.__best_init_sum_of_distances)
-			print('Converged SOD: ', self.__converged_sum_of_distances)
-			if self.__refine:
-				print('Refined SOD: ', self.__sum_of_distances)
-			print('Overall runtime: ', self.__runtime)
-			print('Runtime of initialization: ', self.__runtime_initialized)
-			print('Runtime of block gradient descent: ', self.__runtime_converged - self.__runtime_initialized)
-			if self.__refine:
-				print('Runtime of refinement: ', self.__runtime - self.__runtime_converged)
-			print('Number of initial medians: ', len(medians))
-			total_itr = 0
-			num_started_descents = 0
-			for itr in self.__itrs:
-				total_itr += itr
-				if itr > 0:
-					num_started_descents += 1
-			print('Size of graph collection: ', len(graph_ids))
-			print('Number of started descents: ', num_started_descents)
-			print('Number of converged descents: ', self.__num_converged_descents)
-			print('Overall number of iterations: ', total_itr)
-			print('Overall number of times the order decreased: ', self.__num_decrease_order)
-			print('Overall number of times the order increased: ', self.__num_increase_order)
-			print('===========================================================\n')
-	
-	
-	def get_sum_of_distances(self, state=''):
-		"""Returns the sum of distances.
-		
-		Parameters
-		----------
-		state : string
-			The state of the estimator. Can be 'initialized' or 'converged'. Default: ""
-			
-		Returns
-		-------
-		float
-			The sum of distances (SOD) of the median when the estimator was in the state `state` during the last call to run(). If `state` is not given, the converged SOD (without refinement) or refined SOD (with refinement) is returned.
-		"""
-		if not self.__median_available():
-			raise Exception('No median has been computed. Call run() before calling get_sum_of_distances().')
-		if state == 'initialized':
-			return self.__best_init_sum_of_distances
-		if state == 'converged':
-			return self.__converged_sum_of_distances
-		return self.__sum_of_distances
-	
-	
-	def __set_default_options(self):
-		self.__init_type = 'RANDOM'
-		self.__num_random_inits = 10
-		self.__desired_num_random_inits = 10
-		self.__use_real_randomness = True
-		self.__seed = 0
-		self.__refine = True
-		self.__time_limit_in_sec = 0
-		self.__epsilon = 0.0001
-		self.__max_itrs = 100
-		self.__max_itrs_without_update = 3
-		self.__num_inits_increase_order = 10
-		self.__init_type_increase_order = 'K-MEANS++'
-		self.__max_itrs_increase_order = 10
-		self.__print_to_stdout = 2
-		
-		
-	def __construct_initial_medians(self, graph_ids, timer, initial_medians):
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('\n===========================================================')
-			print('Constructing initial median(s).')
-			print('-----------------------------------------------------------')
-			
-		# Compute or sample the initial median(s).
-		initial_medians.clear()
-		if self.__init_type == 'MEDOID':
-			self.__compute_medoid(graph_ids, timer, initial_medians)
-		elif self.__init_type == 'MAX':
-			pass # @todo
-# 			compute_max_order_graph_(graph_ids, initial_medians)
-		elif self.__init_type == 'MIN':
-			pass # @todo
-# 			compute_min_order_graph_(graph_ids, initial_medians)
-		elif self.__init_type == 'MEAN':
-			pass # @todo
-# 			compute_mean_order_graph_(graph_ids, initial_medians)
-		else:
-			pass # @todo
-# 			sample_initial_medians_(graph_ids, initial_medians)
-
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('===========================================================')
-			
-			
-	def __compute_medoid(self, graph_ids, timer, initial_medians):
-		# Use method selected for initialization phase.
-		self.__ged_env.set_method(self.__init_method, self.__init_options)
-		
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			progress = tqdm(desc='\rComputing medoid', total=len(graph_ids), file=sys.stdout)
-			
-		# Compute the medoid.
-		medoid_id = graph_ids[0]
-		best_sum_of_distances = np.inf
-		for g_id in graph_ids:
-			if timer.expired():
-				self.__state = AlgorithmState.CALLED
-				break
-			sum_of_distances = 0
-			for h_id in graph_ids:
-				self.__ged_env.run_method(g_id, h_id)
-				sum_of_distances += self.__ged_env.get_upper_bound(g_id, h_id)
-			if sum_of_distances < best_sum_of_distances:
-				best_sum_of_distances = sum_of_distances
-				medoid_id = g_id
-				
-			# Print information about current iteration.
-			if self.__print_to_stdout == 2:
-				progress.update(1)
-		initial_medians.append(self.__ged_env.get_nx_graph(medoid_id, True, True, False)) # @todo
-		
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('\n')
-			
-		
-	def __termination_criterion_met(self, converged, timer, itr, itrs_without_update):
-		if timer.expired() or (itr >= self.__max_itrs if self.__max_itrs >= 0 else False):
-			if self.__state == AlgorithmState.TERMINATED:
-				self.__state = AlgorithmState.INITIALIZED
-			return True
-		return converged or (itrs_without_update > self.__max_itrs_without_update if self.__max_itrs_without_update >= 0 else False)
-	
-	
-	def __update_median(self, graphs, median):
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('Updating median: ', end='')
-			
-		# Store copy of the old median.
-		old_median = median.copy() # @todo: this is just a shallow copy.
-		
-		# Update the node labels.
-		if self.__labeled_nodes:
-			self.__update_node_labels(graphs, median)
-			
-		# Update the edges and their labels.
-		self.__update_edges(graphs, median)
-		
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('done.')
-			
-		return not self.__are_graphs_equal(median, old_median)
-		
-		
-	def __update_node_labels(self, graphs, median):
-		
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('nodes ... ', end='')
-			
-		# Iterate through all nodes of the median.
-		for i in range(0, nx.number_of_nodes(median)):
-# 			print('i: ', i)
-			# Collect the labels of the substituted nodes.
-			node_labels = []
-			for graph_id, graph in graphs.items():
-# 				print('graph_id: ', graph_id)
-# 				print(self.__node_maps_from_median[graph_id])
-				k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
-# 				print('k: ', k)
-				if k != np.inf:
-					node_labels.append(graph.nodes[k])
-					
-			# Compute the median label and update the median.
-			if len(node_labels) > 0:
-				median_label = self.__ged_env.get_median_node_label(node_labels)
-				if self.__ged_env.get_node_rel_cost(median.nodes[i], median_label) > self.__epsilon:
-					nx.set_node_attributes(median, {i: median_label})
-					
-					
-	def __update_edges(self, graphs, median):
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('edges ... ', end='')
-			
-		# Clear the adjacency lists of the median and reset number of edges to 0.
-		median_edges = list(median.edges)		
-		for (head, tail) in median_edges:
-			median.remove_edge(head, tail)
-		
-		# @todo: what if edge is not labeled?
-		# Iterate through all possible edges (i,j) of the median.
-		for i in range(0, nx.number_of_nodes(median)):
-			for j in range(i + 1, nx.number_of_nodes(median)):
-				
-				# Collect the labels of the edges to which (i,j) is mapped by the node maps.
-				edge_labels = []
-				for graph_id, graph in graphs.items():
-					k = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], i)
-					l = self.__get_node_image_from_map(self.__node_maps_from_median[graph_id], j)
-					if k != np.inf and l != np.inf:
-						if graph.has_edge(k, l):
-							edge_labels.append(graph.edges[(k, l)])
-							
-				# Compute the median edge label and the overall edge relabeling cost.
-				rel_cost = 0
-				median_label = self.__ged_env.get_edge_label(1)
-				if median.has_edge(i, j):
-					median_label = median.edges[(i, j)]
-				if self.__labeled_edges and len(edge_labels) > 0:
-					new_median_label = self.__ged_env.median_edge_label(edge_labels)
-					if self.__ged_env.get_edge_rel_cost(median_label, new_median_label) > self.__epsilon:
-						median_label = new_median_label
-					for edge_label in edge_labels:
-						rel_cost += self.__ged_env.get_edge_rel_cost(median_label, edge_label)
-						
-				# Update the median.
-				if rel_cost < (self.__edge_ins_cost + self.__edge_del_cost) * len(edge_labels) - self.__edge_del_cost * len(graphs):
-					median.add_edge(i, j, **median_label)
-				else:
-					if median.has_edge(i, j):
-						median.remove_edge(i, j)
-
-
-	def __update_node_maps(self):
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			progress = tqdm(desc='\rUpdating node maps', total=len(self.__node_maps_from_median), file=sys.stdout)
-			
-		# Update the node maps.
-		node_maps_were_modified = False
-		for graph_id in self.__node_maps_from_median:
-			self.__ged_env.run_method(self.__median_id, graph_id)
-			if self.__ged_env.get_upper_bound(self.__median_id, graph_id) < self.__ged_env.get_induced_cost(self.__median_id, graph_id) - self.__epsilon: # @todo: see above.
-				self.__node_maps_from_median[graph_id] = self.__ged_env.get_node_map(self.__median_id, graph_id) # @todo: node_map may not assigned.
-				node_maps_were_modified = True
-			# Print information about current iteration.
-			if self.__print_to_stdout == 2:
-				progress.update(1)
-			
-		# Print information about current iteration.
-		if self.__print_to_stdout == 2:
-			print('\n')
-			
-		# Return true if the node maps were modified.
-		return node_maps_were_modified
-	
-	
-	def __improve_sum_of_distances(self, timer):
-		pass
-	
-	
-	def __median_available(self):
-		return self.__median_id != np.inf
-		
-				
-	def __get_node_image_from_map(self, node_map, node):
-		"""
-		Return ID of the node mapping of `node` in `node_map`.
-
-		Parameters
-		----------
-		node_map : list[tuple(int, int)]
-			List of node maps where the mapping node is found.
-		
-		node : int
-			The mapping node of this node is returned
-
-		Raises
-		------
-		Exception
-			If the node with ID `node` is not contained in the source nodes of the node map.
-
-		Returns
-		-------
-		int
-			ID of the mapping of `node`.
-			
-		Notes
-		-----
-		This function is not implemented in the `ged::MedianGraphEstimator` class of the `GEDLIB` library. Instead it is a Python implementation of the `ged::NodeMap::image` function.
-		"""
-		if node < len(node_map):
-			return node_map[node][1] if node_map[node][1] < len(node_map) else np.inf
-		else:
- 			raise Exception('The node with ID ', str(node), ' is not contained in the source nodes of the node map.')
-		return np.inf
-				
-	
-	def __are_graphs_equal(self, g1, g2):
-		"""
-		Check if the two graphs are equal.
-
-		Parameters
-		----------
-		g1 : NetworkX graph object
-			Graph 1 to be compared.
-		
-		g2 : NetworkX graph object
-			Graph 2 to be compared.
-
-		Returns
-		-------
-		bool
-			True if the two graph are equal.
-			
-		Notes
-		-----
-		This is not an identical check. Here the two graphs are equal if and only if their original_node_ids, nodes, all node labels, edges and all edge labels are equal. This function is specifically designed for class `MedianGraphEstimator` and should not be used elsewhere.
-		"""
-		# check original node ids.
-		if not g1.graph['original_node_ids'] == g2.graph['original_node_ids']:
-			return False
-		# check nodes.
-		nlist1 = [n for n in g1.nodes(data=True)]
-		nlist2 = [n for n in g2.nodes(data=True)]
-		if not nlist1 == nlist2:
-			return False
-		# check edges.
-		elist1 = [n for n in g1.edges(data=True)]
-		elist2 = [n for n in g2.edges(data=True)]
-		if not elist1 == elist2:
-			return False
-
-		return True
-	
-	
-	def compute_my_cost(g, h, node_map):
-		cost = 0.0
-		for node in g.nodes:
-			cost += 0
-		
\ No newline at end of file
diff --git a/gklearn/preimage/median_linlin.py b/gklearn/preimage/median_linlin.py
deleted file mode 100644
index 6139558..0000000
--- a/gklearn/preimage/median_linlin.py
+++ /dev/null
@@ -1,215 +0,0 @@
-import sys
-import pathlib
-import numpy as np
-import networkx as nx
-
-from gedlibpy import librariesImport, gedlibpy
-sys.path.insert(0, "/home/bgauzere/dev/optim-graphes/")
-import gklearn
-
-def replace_graph_in_env(script, graph, old_id, label='median'):
-    """
-    Replace a graph in script
-
-    If old_id is -1, add a new graph to the environnemt
-
-    """
-    if(old_id > -1):
-        script.PyClearGraph(old_id)
-    new_id = script.PyAddGraph(label)
-    for i in graph.nodes():
-        script.PyAddNode(new_id,str(i),graph.node[i]) # !! strings are required bt gedlib
-    for e in graph.edges:
-        script.PyAddEdge(new_id, str(e[0]),str(e[1]), {})
-    script.PyInitEnv()
-    script.PySetMethod("IPFP", "")
-    script.PyInitMethod()
-
-    return new_id
-    
-#Dessin median courrant
-def draw_Letter_graph(graph):
-    import numpy as np
-    import networkx as nx
-    import matplotlib.pyplot as plt
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph,pos)
-    plt.show()
-    
-#compute new mappings
-def update_mappings(script,median_id,listID):
-    med_distances = {}
-    med_mappings = {}
-    sod = 0
-    for i in range(0,len(listID)):
-        script.PyRunMethod(median_id,listID[i])
-        med_distances[i] = script.PyGetUpperBound(median_id,listID[i])
-        med_mappings[i] = script.PyGetForwardMap(median_id,listID[i])
-        sod += med_distances[i]
-    return med_distances, med_mappings, sod
-
-def calcul_Sij(all_mappings, all_graphs,i,j):
-    s_ij = 0
-    for k in range(0,len(all_mappings)):
-        cur_graph =  all_graphs[k]
-        cur_mapping = all_mappings[k]
-        size_graph = cur_graph.order()
-        if ((cur_mapping[i] < size_graph) and 
-            (cur_mapping[j] < size_graph) and 
-            (cur_graph.has_edge(cur_mapping[i], cur_mapping[j]) == True)):
-                s_ij += 1
-        
-    return s_ij
-
-# def update_median_nodes_L1(median,listIdSet,median_id,dataset, mappings):
-#     from scipy.stats.mstats import gmean
-
-#     for i in median.nodes():
-#         for k in listIdSet:
-#             vectors = [] #np.zeros((len(listIdSet),2))
-#             if(k != median_id):
-#                 phi_i = mappings[k][i]
-#                 if(phi_i < dataset[k].order()):
-#                     vectors.append([float(dataset[k].node[phi_i]['x']),float(dataset[k].node[phi_i]['y'])])
-
-#         new_labels = gmean(vectors)
-#         median.node[i]['x'] = str(new_labels[0])
-#         median.node[i]['y'] = str(new_labels[1])
-#     return median
-
-def update_median_nodes(median,dataset,mappings):
-    #update node attributes
-    for i in median.nodes():
-        nb_sub=0
-        mean_label = {'x' : 0, 'y' : 0}
-        for k in range(0,len(mappings)):
-            phi_i = mappings[k][i]
-            if ( phi_i < dataset[k].order() ):
-                nb_sub += 1
-                mean_label['x'] += 0.75*float(dataset[k].node[phi_i]['x'])
-                mean_label['y'] += 0.75*float(dataset[k].node[phi_i]['y'])
-        median.node[i]['x'] = str((1/0.75)*(mean_label['x']/nb_sub))
-        median.node[i]['y'] = str((1/0.75)*(mean_label['y']/nb_sub))
-    return median
-
-def update_median_edges(dataset, mappings, median, cei=0.425,cer=0.425):
-#for letter high, ceir = 1.7, alpha = 0.75
-    size_dataset = len(dataset)
-    ratio_cei_cer = cer/(cei + cer)
-    threshold = size_dataset*ratio_cei_cer
-    order_graph_median = median.order()
-    for i in range(0,order_graph_median):
-        for j in range(i+1,order_graph_median):
-            s_ij = calcul_Sij(mappings,dataset,i,j)
-            if(s_ij > threshold):
-                median.add_edge(i,j)
-            else:
-                if(median.has_edge(i,j)):
-                    median.remove_edge(i,j)
-    return median
-
-
-
-def compute_median(script, listID, dataset,verbose=False):
-    """Compute a graph median of a dataset according to an environment
-
-    Parameters
-
-    script : An gedlib initialized environnement 
-    listID (list): a list of ID in script: encodes the dataset 
-    dataset (list): corresponding graphs in networkX format. We assume that graph
-    listID[i] corresponds to dataset[i]
-
-    Returns:
-    A networkX graph, which is the median, with corresponding sod
-    """
-    print(len(listID))
-    median_set_index, median_set_sod = compute_median_set(script, listID)
-    print(median_set_index)
-    print(median_set_sod)
-    sods = []
-    #Ajout median dans environnement
-    set_median = dataset[median_set_index].copy()
-    median = dataset[median_set_index].copy()
-    cur_med_id = replace_graph_in_env(script,median,-1)
-    med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-    sods.append(cur_sod)
-    if(verbose):
-        print(cur_sod)
-    ite_max = 50
-    old_sod = cur_sod * 2
-    ite = 0
-    epsilon = 0.001
-
-    best_median 
-    while((ite < ite_max) and (np.abs(old_sod - cur_sod) > epsilon )):
-        median = update_median_nodes(median,dataset, med_mappings)
-        median = update_median_edges(dataset,med_mappings,median)
-
-        cur_med_id = replace_graph_in_env(script,median,cur_med_id)
-        med_distances, med_mappings, cur_sod = update_mappings(script,cur_med_id,listID)
-        
-        
-        sods.append(cur_sod)
-        if(verbose):
-            print(cur_sod)
-        ite += 1
-    return median, cur_sod, sods, set_median
-    
-    draw_Letter_graph(median)
-
-
-def compute_median_set(script,listID):
-    'Returns the id in listID corresponding to median set'
-    #Calcul median set
-    N=len(listID)
-    map_id_to_index = {}
-    map_index_to_id = {}
-    for i in range(0,len(listID)):
-        map_id_to_index[listID[i]] = i
-        map_index_to_id[i] = listID[i]
-        
-    distances = np.zeros((N,N))
-    for i in listID:
-        for j in listID:
-            script.PyRunMethod(i,j)
-            distances[map_id_to_index[i],map_id_to_index[j]] = script.PyGetUpperBound(i,j)
-
-    median_set_index = np.argmin(np.sum(distances,0))
-    sod = np.min(np.sum(distances,0))
-    
-    return median_set_index, sod
-
-def _convertGraph(G):
-    """Convert a graph to the proper NetworkX format that can be
-    recognized by library gedlibpy.
-    """
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-#                G_new.add_node(str(nd), x=str(attrs['attributes'][0]), 
-#                               y=str(attrs['attributes'][1]))
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-#                G_new.add_edge(str(nd1), str(nd2))
-        
-    return G_new
-
-if __name__ == "__main__":
-    #Chargement du dataset
-    gedlibpy.PyLoadGXLGraph('/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/', '/home/bgauzere/dev/gedlib/data/collections/Letter_Z.xml')
-    gedlibpy.PySetEditCost("LETTER")
-    gedlibpy.PyInitEnv()
-    gedlibpy.PySetMethod("IPFP", "")
-    gedlibpy.PyInitMethod()
-
-    dataset,my_y = gklearn.utils.graphfiles.loadDataset("/home/bgauzere/dev/gedlib/data/datasets/Letter/HIGH/Letter_Z.cxl")
-
-    listID = gedlibpy.PyGetAllGraphIds()
-    median, sod = compute_median(gedlibpy,listID,dataset,verbose=True)
-    
-    print(sod)
-    draw_Letter_graph(median)
diff --git a/gklearn/preimage/median_preimage_generator.py b/gklearn/preimage/median_preimage_generator.py
deleted file mode 100644
index dfbaef2..0000000
--- a/gklearn/preimage/median_preimage_generator.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 26 18:27:22 2020
-
-@author: ljia
-"""
-from gklearn.preimage.preimage_generator import PreimageGenerator
-# from gklearn.utils.dataset import Dataset
-
-class MedianPreimageGenerator(PreimageGenerator):
-	
-	def __init__(self, mge, dataset):
-		self.__mge = mge
-		self.__dataset = dataset
\ No newline at end of file
diff --git a/gklearn/preimage/misc.py b/gklearn/preimage/misc.py
deleted file mode 100644
index 18682c8..0000000
--- a/gklearn/preimage/misc.py
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 19 18:13:56 2020
-
-@author: ljia
-"""
-
-def options_string_to_options_map(options_string):
-    """Transforms an options string into an options map.
-    
-    Parameters
-    ----------
-    options_string : string
-        Options string of the form "[--<option> <arg>] [...]".
-    
-    Return
-    ------
-    options_map : dict{string : string}
-        Map with one key-value pair (<option>, <arg>) for each option contained in the string.
-    """
-    if options_string == '':
-        return
-    options_map = {}
-    words = []
-    tokenize(options_string, ' ', words)
-    expect_option_name = True
-    for word in words:
-        if expect_option_name:
-            is_opt_name, word = is_option_name(word)
-            if is_opt_name:
-                option_name = word
-                if option_name in options_map:
-                    raise Exception('Multiple specification of option "' + option_name + '".')
-                options_map[option_name] = ''
-            else:
-                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
-        else:
-            is_opt_name, word = is_option_name(word)
-            if is_opt_name:
-                raise Exception('Invalid options "' + options_string + '". Usage: options = "[--<option> <arg>] [...]"')
-            else:
-                options_map[option_name] = word
-        expect_option_name = not expect_option_name
-    return options_map
-    
-
-def tokenize(sentence, sep, words):
-    """Separates a sentence into words separated by sep (unless contained in single quotes).
-    
-    Parameters
-    ----------
-    sentence : string
-        The sentence that should be tokenized.
-        
-    sep : string 
-        The separator. Must be different from "'".
-        
-    words : list[string]
-        The obtained words.
-    """
-    outside_quotes = True
-    word_length = 0
-    pos_word_start = 0
-    for pos in range(0, len(sentence)):
-        if sentence[pos] == '\'':
-            if not outside_quotes and pos < len(sentence) - 1:
-                if sentence[pos + 1] != sep:
-                    raise Exception('Sentence contains closing single quote which is followed by a char different from ' + sep + '.')
-            word_length += 1
-            outside_quotes = not outside_quotes
-        elif outside_quotes and sentence[pos] == sep:
-            if word_length > 0:
-                words.append(sentence[pos_word_start:pos_word_start + word_length])
-            pos_word_start = pos + 1
-            word_length = 0
-        else:
-            word_length += 1
-    if not outside_quotes:
-        raise Exception('Sentence contains unbalanced single quotes.')
-    if word_length > 0:
-        words.append(sentence[pos_word_start:pos_word_start + word_length])
-
-
-def is_option_name(word):
-    """Checks whether a word is an option name and, if so, removes the leading dashes.
-    
-    Parameters
-    ----------
-    word : string
-        Word.
-        
-    return
-    ------
-    True if word is of the form "--<option>".
-    
-    word : string
-        The word without the leading dashes.
-    """
-    if word[0] == '\'':
-        word = word[1:len(word) - 2]
-        return False, word
-    if len(word) < 3:
-        return False, word
-    if word[0] == '-' and word[1] == '-' and word[2] != '-':
-        word = word[2:]
-        return True, word
-    return False, word
\ No newline at end of file
diff --git a/gklearn/preimage/pathfrequency.py b/gklearn/preimage/pathfrequency.py
deleted file mode 100644
index 3bca1bc..0000000
--- a/gklearn/preimage/pathfrequency.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 20 10:12:15 2019
-
-inferring a graph grom path frequency.
-@author: ljia
-"""
-#import numpy as np
-import networkx as nx
-from scipy.spatial.distance import hamming
-import itertools
-
-def SISF(K, v):
-    if output:
-        return output
-    else:
-        return 'no solution'
-
-    
-def SISF_M(K, v):
-    return output
-
-
-def GIPF_tree(v_obj, K=1, alphabet=[0, 1]):
-    if K == 1:
-        n_graph = v_obj[0] + v_obj[1]
-        D_T, father_idx = getDynamicTable(n_graph, alphabet)
-        
-        # get the vector the closest to v_obj.
-        if v_obj not in D_T:
-            print('no exact solution')
-            dis_lim = 1 / len(v_obj) # the possible shortest distance.
-            dis_min = 1.0 # minimum proportional distance
-            v_min = v_obj
-            for vc in D_T:
-                if vc[0] + vc[1] == n_graph:
-#                    print(vc)
-                    dis = hamming(vc, v_obj)
-                    if dis < dis_min:
-                        dis_min = dis
-                        v_min = vc
-                    if dis_min <= dis_lim:
-                        break
-            v_obj = v_min
-            
-        # obtain required graph by traceback procedure.        
-        return getObjectGraph(v_obj, D_T, father_idx, alphabet), v_obj
-    
-def GIPF_M(K, v):
-    return G
-
-
-def getDynamicTable(n_graph, alphabet=[0, 1]):
-    # init. When only one node exists.
-    D_T = {(1, 0, 0, 0, 0, 0): 1, (0, 1, 0, 0, 0, 0): 1, (0, 0, 1, 0, 0, 0): 0, 
-           (0, 0, 0, 1, 0, 0): 0, (0, 0, 0, 0, 1, 0): 0, (0, 0, 0, 0, 0, 1): 0,}
-    D_T = [(1, 0, 0, 0, 0, 0), (0, 1, 0, 0, 0, 0)]
-    father_idx = [-1, -1] # index of each vector's father
-    # add possible vectors.
-    for idx, v in enumerate(D_T):
-        if v[0] + v[1] < n_graph:
-            D_T.append((v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5]))
-            D_T.append((v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5]))
-            D_T.append((v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5]))
-            D_T.append((v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2))
-            father_idx += [idx, idx, idx, idx]
-    
-#    D_T = itertools.chain([(1, 0, 0, 0, 0, 0)], [(0, 1, 0, 0, 0, 0)])
-#    father_idx = itertools.chain([-1], [-1]) # index of each vector's father
-#    # add possible vectors.
-#    for idx, v in enumerate(D_T):
-#        if v[0] + v[1] < n_graph:
-#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2] + 2, v[3], v[4], v[5])])
-#            D_T = itertools.chain(D_T, [(v[0] + 1, v[1], v[2], v[3] + 1, v[4] + 1, v[5])])
-#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3] + 1, v[4] + 1, v[5])])
-#            D_T = itertools.chain(D_T, [(v[0], v[1] + 1, v[2], v[3], v[4], v[5] + 2)])
-#            father_idx = itertools.chain(father_idx, [idx, idx, idx, idx])
-    return D_T, father_idx
-
-
-def getObjectGraph(v_obj, D_T, father_idx, alphabet=[0, 1]):
-    g_obj = nx.Graph()
-    
-    # do vector traceback.
-    v_tb = [list(v_obj)] # traceback vectors.
-    v_tb_idx = [D_T.index(v_obj)] # indices of traceback vectors.
-    while v_tb_idx[-1] > 1:
-        idx_pre = father_idx[v_tb_idx[-1]]
-        v_tb_idx.append(idx_pre)
-        v_tb.append(list(D_T[idx_pre]))
-    v_tb = v_tb[::-1] # reverse
-#    v_tb_idx = v_tb_idx[::-1]
-
-    # construct tree.
-    v_c = v_tb[0] # current vector.
-    if v_c[0] == 1:
-        g_obj.add_node(0, node_label=alphabet[0])
-    else:
-        g_obj.add_node(0, node_label=alphabet[1])
-    for vct in v_tb[1:]:
-        if vct[0] - v_c[0] == 1:
-            if vct[2] - v_c[2] == 2: # transfer 1
-                label1 = alphabet[0]
-                label2 = alphabet[0]
-            else: # transfer 2
-                label1 = alphabet[1]
-                label2 = alphabet[0]
-        else: 
-            if vct[3] - v_c[3] == 1: # transfer 3
-                label1 = alphabet[0]
-                label2 = alphabet[1]
-            else: # transfer 4
-                label1 = alphabet[1]
-                label2 = alphabet[1]
-        for nd, attr in g_obj.nodes(data=True):
-            if attr['node_label'] == label1:
-                nb_node = nx.number_of_nodes(g_obj)
-                g_obj.add_node(nb_node, node_label=label2)
-                g_obj.add_edge(nd, nb_node)
-                break
-        v_c = vct
-    return g_obj
-
-
-import random
-def hierarchy_pos(G, root=None, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5):
-
-    '''
-    From Joel's answer at https://stackoverflow.com/a/29597209/2966723.  
-    Licensed under Creative Commons Attribution-Share Alike 
-
-    If the graph is a tree this will return the positions to plot this in a 
-    hierarchical layout.
-
-    G: the graph (must be a tree)
-
-    root: the root node of current branch 
-    - if the tree is directed and this is not given, 
-      the root will be found and used
-    - if the tree is directed and this is given, then 
-      the positions will be just for the descendants of this node.
-    - if the tree is undirected and not given, 
-      then a random choice will be used.
-
-    width: horizontal space allocated for this branch - avoids overlap with other branches
-
-    vert_gap: gap between levels of hierarchy
-
-    vert_loc: vertical location of root
-
-    xcenter: horizontal location of root
-    '''
-    if not nx.is_tree(G):
-        raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')
-
-    if root is None:
-        if isinstance(G, nx.DiGraph):
-            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11
-        else:
-            root = random.choice(list(G.nodes))
-
-    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
-        '''
-        see hierarchy_pos docstring for most arguments
-
-        pos: a dict saying where all nodes go if they have been assigned
-        parent: parent of this branch. - only affects it if non-directed
-
-        '''
-
-        if pos is None:
-            pos = {root:(xcenter,vert_loc)}
-        else:
-            pos[root] = (xcenter, vert_loc)
-        children = list(G.neighbors(root))
-        if not isinstance(G, nx.DiGraph) and parent is not None:
-            children.remove(parent)  
-        if len(children)!=0:
-            dx = width/len(children) 
-            nextx = xcenter - width/2 - dx/2
-            for child in children:
-                nextx += dx
-                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, 
-                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,
-                                    pos=pos, parent = root)
-        return pos
-
-
-    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)
-
-
-if __name__ == '__main__':
-    v_obj = (6, 4, 10, 3, 3, 2)
-#    v_obj = (6, 5, 10, 3, 3, 2)
-    tree_obj, v_obj = GIPF_tree(v_obj)
-    print('One closest vector is', v_obj)
-    # plot
-    pos = hierarchy_pos(tree_obj, 0) 
-    node_labels = nx.get_node_attributes(tree_obj, 'node_label')
-    nx.draw(tree_obj, pos=pos, labels=node_labels, with_labels=True)
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_generator.py b/gklearn/preimage/preimage_generator.py
deleted file mode 100644
index 51fb43b..0000000
--- a/gklearn/preimage/preimage_generator.py
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Mar 26 18:26:36 2020
-
-@author: ljia
-"""
-
-class PreimageGenerator(object):
-	
-	def __init__(self):
-		pass
diff --git a/gklearn/preimage/preimage_iam.py b/gklearn/preimage/preimage_iam.py
deleted file mode 100644
index bf79d0e..0000000
--- a/gklearn/preimage/preimage_iam.py
+++ /dev/null
@@ -1,705 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Apr 30 17:07:43 2019
-
-A graph pre-image method combining iterative pre-image method in reference [1] 
-and the iterative alternate minimizations (IAM) in reference [2].
-@author: ljia
-@references:
-    [1] Gökhan H Bakir, Alexander Zien, and Koji Tsuda. Learning to and graph 
-    pre-images. In Joint Pattern Re ognition Symposium , pages 253-261. Springer, 2004.
-    [2] Generalized median graph via iterative alternate minimization.
-"""
-import sys
-import numpy as np
-from tqdm import tqdm
-import networkx as nx
-import matplotlib.pyplot as plt
-import random
-
-from iam import iam_upgraded
-from utils import dis_gstar, compute_kernel
-
-
-def preimage_iam(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
-                 gkernel, epsilon=0.001, InitIAMWithAllDk=False,
-                 params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
-                             'ite_max': 50, 'epsilon': 0.001, 
-                             'removeNodes': True, 'connected': False},
-                 params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 'method': 'IPFP', 
-                             'edit_cost_constant': [], 'stabilizer': 'min', 
-                             'repeat': 50}):
-    """This function constructs graph pre-image by the iterative pre-image 
-    framework in reference [1], algorithm 1, where the step of generating new 
-    graphs randomly is replaced by the IAM algorithm in reference [2].
-    
-    notes
-    -----
-    Every time a set of n better graphs is acquired, their distances in kernel space are
-    compared with the k nearest ones, and the k nearest distances from the k+n
-    distances will be used as the new ones.
-    """
-    # compute k nearest neighbors of phi in DN.
-    dis_all = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_all.append(dtemp)
-        
-    # sort
-    sort_idx = np.argsort(dis_all)
-    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
-    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-    if dis_k[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, ghat_list, 0, 0
-    dhat = dis_k[0] # the nearest distance
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
-##        nx.draw_networkx(gi)
-#        plt.show()
-##        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    
-#    i = 1
-    r = 0
-    itr_total = 0
-    dis_of_each_itr = [dhat]
-    found = False
-    nb_updated = 0
-    nb_updated_k = 0
-    while r < r_max:# and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
-        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
-        print('Current preimage iteration =', r)
-        print('Total preimage iteration =', itr_total, '\n')
-        found = False
-        
-        Gn_nearest_median = [g.copy() for g in Gk]
-        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
-            ghat_new_list = []
-            for g_tmp in Gk:
-                Gn_nearest_init = [g_tmp.copy()]
-                ghat_new_list_tmp, _, _ = iam_upgraded(Gn_nearest_median, 
-                        Gn_nearest_init, params_ged=params_ged, **params_iam)
-                ghat_new_list += ghat_new_list_tmp
-        else: # only the best graph in D_k is used to initialize IAM.
-            Gn_nearest_init = [g.copy() for g in Gk]
-            ghat_new_list, _, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
-                    params_ged=params_ged, **params_iam)
-
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            draw_Letter_graph(g)
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                len(ghat_new_list) + len(Gn_median) + 1), 
-                                alpha, knew, withterm3=False))
-        
-        for idx_g, ghat_new in enumerate(ghat_new_list):          
-            dhat_new = dhat_new_list[idx_g]
-            
-            # if the new distance is smaller than the max of D_k.           
-            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                # check if the new distance is the same as one in D_k.
-                is_duplicate = False
-                for dis_tmp in dis_k[1:-1]:
-                    if np.abs(dhat_new - dis_tmp) < epsilon:
-                        is_duplicate = True
-                        print('IAM: duplicate k nearest graph generated.')
-                        break
-                if not is_duplicate:
-                    if np.abs(dhat_new - dhat) < epsilon:
-                        print('IAM: I am equal!')
-#                        dhat = dhat_new
-#                        ghat_list = [ghat_new.copy()]
-                    else:
-                        print('IAM: we got better k nearest neighbors!')
-                        nb_updated_k += 1
-                        print('the k nearest neighbors are updated', 
-                              nb_updated_k, 'times.')
-                        
-                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
-                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
-                        sort_idx = np.argsort(dis_k)
-                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                        if dhat_new < dhat:
-                            print('IAM: I have smaller distance!')
-                            print(str(dhat) + '->' + str(dhat_new))
-                            dhat = dhat_new
-                            ghat_list = [Gk[0].copy()]
-                            r = 0
-                            nb_updated += 1
-                        
-                            print('the graph is updated', nb_updated, 'times.')                       
-                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
-                                with_labels=True)
-                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                            plt.show()
-                        
-                        found = True
-        if not found:
-            r += 1            
-
-        dis_of_each_itr.append(dhat)
-        itr_total += 1
-        print('\nthe k shortest distances are', dis_k)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-        
-    print('\n\nthe graph is updated', nb_updated, 'times.')
-    print('\nthe k nearest neighbors are updated', nb_updated_k, 'times.')
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, ghat_list, dis_of_each_itr[-1], nb_updated, nb_updated_k
-
-
-
-
-def preimage_iam_random_mix(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, 
-                            l_max, gkernel, epsilon=0.001, 
-                            InitIAMWithAllDk=False, InitRandomWithAllDk=True,
-                            params_iam={'c_ei': 1, 'c_er': 1, 'c_es': 1, 
-                                        'ite_max': 50, 'epsilon': 0.001, 
-                                        'removeNodes': True, 'connected': False},
-                            params_ged={'lib': 'gedlibpy', 'cost': 'CHEM_1', 
-                                        'method': 'IPFP', 'edit_cost_constant': [], 
-                                        'stabilizer': 'min', 'repeat': 50}):
-    """This function constructs graph pre-image by the iterative pre-image 
-    framework in reference [1], algorithm 1, where new graphs are generated 
-    randomly and by the IAM algorithm in reference [2].
-    
-    notes
-    -----
-    Every time a set of n better graphs is acquired, their distances in kernel space are
-    compared with the k nearest ones, and the k nearest distances from the k+n
-    distances will be used as the new ones.
-    """
-    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
-    # compute k nearest neighbors of phi in DN.
-    dis_all = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_all.append(dtemp)
-        
-    # sort
-    sort_idx = np.argsort(dis_all)
-    dis_k = [dis_all[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_k == dis_k[0]).flatten().tolist())
-    ghat_list = [Gn_init[idx].copy() for idx in sort_idx[0:nb_best]] # the nearest neighbors of psi in DN
-    if dis_k[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, ghat_list, 0, 0
-    dhat = dis_k[0] # the nearest distance
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw(gi, labels=nx.get_node_attributes(gi, 'atom'), with_labels=True)
-##        nx.draw_networkx(gi)
-#        plt.show()
-##        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    
-    r = 0
-    itr_total = 0
-    dis_of_each_itr = [dhat]
-    nb_updated_iam = 0
-    nb_updated_k_iam = 0
-    nb_updated_random = 0
-    nb_updated_k_random = 0
-#    is_iam_duplicate = False
-    while r < r_max: # and not found: # @todo: if not found?# and np.abs(old_dis - cur_dis) > epsilon:
-        print('\n-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-')
-        print('Current preimage iteration =', r)
-        print('Total preimage iteration =', itr_total, '\n')
-        found_iam = False
-
-        Gn_nearest_median = [g.copy() for g in Gk]
-        if InitIAMWithAllDk: # each graph in D_k is used to initialize IAM.
-            ghat_new_list = []
-            for g_tmp in Gk:
-                Gn_nearest_init = [g_tmp.copy()]
-                ghat_new_list_tmp, _ = iam_upgraded(Gn_nearest_median, 
-                        Gn_nearest_init, params_ged=params_ged, **params_iam)
-                ghat_new_list += ghat_new_list_tmp
-        else: # only the best graph in D_k is used to initialize IAM.
-            Gn_nearest_init = [g.copy() for g in Gk]
-            ghat_new_list, _ = iam_upgraded(Gn_nearest_median, Gn_nearest_init, 
-                    params_ged=params_ged, **params_iam)
-
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            draw_Letter_graph(g)
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                            len(ghat_new_list) + len(Gn_median) + 1), 
-                            alpha, knew, withterm3=False))
-                
-        # find the new k nearest graphs. 
-        for idx_g, ghat_new in enumerate(ghat_new_list):          
-            dhat_new = dhat_new_list[idx_g]
-            
-            # if the new distance is smaller than the max of D_k.           
-            if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                # check if the new distance is the same as one in D_k.
-                is_duplicate = False
-                for dis_tmp in dis_k[1:-1]:
-                    if np.abs(dhat_new - dis_tmp) < epsilon:
-                        is_duplicate = True
-                        print('IAM: duplicate k nearest graph generated.')
-                        break
-                if not is_duplicate:
-                    if np.abs(dhat_new - dhat) < epsilon:
-                        print('IAM: I am equal!')
-#                        dhat = dhat_new
-#                        ghat_list = [ghat_new.copy()]
-                    else:
-                        print('IAM: we got better k nearest neighbors!')
-                        nb_updated_k_iam += 1
-                        print('the k nearest neighbors are updated', 
-                              nb_updated_k_iam, 'times.')
-                        
-                        dis_k = [dhat_new] + dis_k[0:k-1] # add the new nearest distance.
-                        Gk = [ghat_new.copy()] + Gk[0:k-1] # add the corresponding graph.
-                        sort_idx = np.argsort(dis_k)
-                        dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                        Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                        if dhat_new < dhat:
-                            print('IAM: I have smaller distance!')
-                            print(str(dhat) + '->' + str(dhat_new))
-                            dhat = dhat_new
-                            ghat_list = [Gk[0].copy()]
-                            r = 0
-                            nb_updated_iam += 1
-                        
-                            print('the graph is updated by IAM', nb_updated_iam, 
-                                  'times.')                       
-                            nx.draw(Gk[0], labels=nx.get_node_attributes(Gk[0], 'atom'), 
-                                with_labels=True)
-                    ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                            plt.show()
-                        
-                        found_iam = True
-                        
-        # when new distance is not smaller than the max of D_k, use random generation.
-        if not found_iam:
-            print('Distance not better, switching to random generation now.')
-            print(str(dhat) + '->' + str(dhat_new))
-            
-            if InitRandomWithAllDk: # use all k nearest graphs as the initials.
-                init_list = [g_init.copy() for g_init in Gk]
-            else: # use just the nearest graph as the initial.
-                init_list = [Gk[0].copy()]
-            
-            # number of edges to be changed.
-            if len(init_list) == 1:
-                # @todo what if the log is negetive? how to choose alpha (scalar)? seems fdgs is always 1.
-    #            fdgs = dhat_new
-                fdgs = nb_updated_random + 1
-                if fdgs < 1:
-                    fdgs = 1
-                fdgs = int(np.ceil(np.log(fdgs)))
-                if fdgs < 1:
-                    fdgs += 1
-    #            fdgs = nb_updated_random + 1 # @todo:
-                fdgs_list = [fdgs]
-            else:
-                # @todo what if the log is negetive? how to choose alpha (scalar)?
-                fdgs_list = np.array(dis_k[:])
-                if np.min(fdgs_list) < 1:
-                    fdgs_list /= dis_k[0]
-                fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
-                if np.min(fdgs_list) < 1:
-                    fdgs_list = np.array(fdgs_list) + 1
-                
-            l = 0
-            found_random = False
-            while l < l_max and not found_random:
-                for idx_g, g_tmp in enumerate(init_list):
-                    # add and delete edges.
-                    ghat_new = nx.convert_node_labels_to_integers(g_tmp.copy())
-                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                    nb_vpairs = nx.number_of_nodes(ghat_new) * (nx.number_of_nodes(ghat_new) - 1)
-                    np.random.seed()
-                    # which edges to change.                
-                    # @todo: what if fdgs is bigger than nb_vpairs?
-                    idx_change = random.sample(range(nb_vpairs), fdgs_list[idx_g] if 
-                                               fdgs_list[idx_g] < nb_vpairs else nb_vpairs)
-#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-#                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                    for item in idx_change:
-                        node1 = int(item / (nx.number_of_nodes(ghat_new) - 1))
-                        node2 = (item - node1 * (nx.number_of_nodes(ghat_new) - 1))
-                        if node2 >= node1: # skip the self pair.
-                            node2 += 1
-                        # @todo: is the randomness correct?
-                        if not ghat_new.has_edge(node1, node2):
-                            ghat_new.add_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(ghat_new)
-    #                        plt.show()
-                        else:
-                            ghat_new.remove_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(ghat_new)
-    #                        plt.show()
-    #                nx.draw_networkx(ghat_new)
-    #                plt.show()
-                            
-                    # compute distance between \psi and the new generated graph.
-                    knew = compute_kernel([ghat_new] + Gn_median, gkernel, verbose=False)
-                    dhat_new = dis_gstar(0, range(1, len(Gn_median) + 1), 
-                                         alpha, knew, withterm3=False)
-                    # @todo: the new distance is smaller or also equal?
-                    if dhat_new < dis_k[-1] and np.abs(dhat_new - dis_k[-1]) >= epsilon:
-                        # check if the new distance is the same as one in D_k.
-                        is_duplicate = False
-                        for dis_tmp in dis_k[1:-1]:
-                            if np.abs(dhat_new - dis_tmp) < epsilon:
-                                is_duplicate = True
-                                print('Random: duplicate k nearest graph generated.')
-                                break
-                        if not is_duplicate:
-                            if np.abs(dhat_new - dhat) < epsilon:
-                                print('Random: I am equal!')
-        #                        dhat = dhat_new
-        #                        ghat_list = [ghat_new.copy()]
-                            else:
-                                print('Random: we got better k nearest neighbors!')
-                                print('l =', str(l))
-                                nb_updated_k_random += 1
-                                print('the k nearest neighbors are updated by random generation', 
-                                          nb_updated_k_random, 'times.')
-                                
-                                dis_k = [dhat_new] + dis_k # add the new nearest distances.
-                                Gk = [ghat_new.copy()] + Gk # add the corresponding graphs.
-                                sort_idx = np.argsort(dis_k)
-                                dis_k = [dis_k[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-                                Gk = [Gk[idx] for idx in sort_idx[0:k]]
-                                if dhat_new < dhat:
-                                    print('\nRandom: I am smaller!')
-                                    print('l =', str(l))
-                                    print(dhat, '->', dhat_new)                       
-                                    dhat = dhat_new
-                                    ghat_list = [ghat_new.copy()]
-                                    r = 0
-                                    nb_updated_random += 1
-        
-                                    print('the graph is updated by random generation', 
-                                          nb_updated_random, 'times.')
-                                             
-                                    nx.draw(ghat_new, labels=nx.get_node_attributes(ghat_new, 'atom'), 
-                                        with_labels=True)
-        ##            plt.savefig("results/gk_iam/simple_two/xx" + str(i) + ".png", format="PNG")
-                                    plt.show()
-                                found_random = True
-                                break
-                l += 1
-            if not found_random: # l == l_max:
-                r += 1            
-            
-        dis_of_each_itr.append(dhat)
-        itr_total += 1
-        print('\nthe k shortest distances are', dis_k)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-        
-    print('\n\nthe graph is updated by IAM', nb_updated_iam, 'times, and by random generation',
-          nb_updated_random, 'times.')
-    print('\nthe k nearest neighbors are updated by IAM', nb_updated_k_iam, 
-          'times, and by random generation', nb_updated_k_random, 'times.')
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, ghat_list, dis_of_each_itr[-1], \
-            nb_updated_iam, nb_updated_random, nb_updated_k_iam, nb_updated_k_random
-
-
-###############################################################################
-# Old implementations.
-    
-#def gk_iam(Gn, alpha):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a better graph is acquired, the older one is replaced by it.
-#    """
-#    pass
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-#                      k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
-#                      (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-#                      k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
-#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat
-#    dhat = dis_gs[0] # the nearest distance
-#    Gk = [Gn[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
-#    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-#        Gs_nearest = Gk + gihat_list
-#        g_tmp = iam(Gs_nearest)
-#        
-#        # compute distance between \psi and the new generated graph.
-#        knew = marginalizedkernel([g_tmp, g1, g2], node_label='atom', edge_label=None,
-#                       p_quit=lmbda, n_iteration=20, remove_totters=False,
-#                       n_jobs=multiprocessing.cpu_count(), verbose=False)
-#        dnew = knew[0][0, 0] - 2 * (alpha * knew[0][0, 1] + (1 - alpha) * 
-#              knew[0][0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
-#              (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-#              k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-#        if dnew <= dhat: # the new distance is smaller
-#            print('I am smaller!')
-#            dhat = dnew
-#            g_new = g_tmp.copy() # found better graph.
-#            gihat_list = [g_new]
-#            dis_gs.append(dhat)
-#            r = 0
-#        else:
-#            r += 1
-#            
-#    ghat = ([g0hat] if len(gihat_list) == 0 else gihat_list)
-#    
-#    return dhat, ghat
-
-
-#def gk_iam_nearest(Gn, alpha, idx_gi, Kmatrix, k, r_max):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a better graph is acquired, its distance in kernel space is
-#    compared with the k nearest ones, and the k nearest distances from the k+1
-#    distances will be used as the new ones.
-#    """
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
-##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
-##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
-##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-#    g0hat = Gn[sort_idx[0]] # the nearest neighbor of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat
-#    dhat = dis_gs[0] # the nearest distance
-#    ghat = g0hat.copy()
-#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw_networkx(gi)
-#        plt.show()
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-#    Gs_nearest = Gk.copy()
-##    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-##        Gs_nearest = Gk + gihat_list
-##        g_tmp = iam(Gs_nearest)
-#        g_tmp = test_iam_with_more_graphs_as_init(Gs_nearest, Gs_nearest, c_ei=1, c_er=1, c_es=1)
-#        nx.draw_networkx(g_tmp)
-#        plt.show()
-#        print(g_tmp.nodes(data=True))
-#        print(g_tmp.edges(data=True))
-#        
-#        # compute distance between \psi and the new generated graph.
-#        gi_list = [Gn[i] for i in idx_gi]
-#        knew = compute_kernel([g_tmp] + gi_list, 'untilhpathkernel', False)
-#        dnew = dis_gstar(0, range(1, len(gi_list) + 1), alpha, knew)
-#        
-##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
-##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
-##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
-##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
-#        if dnew <= dhat and g_tmp != ghat: # the new distance is smaller
-#            print('I am smaller!')
-#            print(str(dhat) + '->' + str(dnew))
-##            nx.draw_networkx(ghat)
-##            plt.show()
-##            print('->')
-##            nx.draw_networkx(g_tmp)
-##            plt.show()
-#            
-#            dhat = dnew
-#            g_new = g_tmp.copy() # found better graph.
-#            ghat = g_tmp.copy()
-#            dis_gs.append(dhat) # add the new nearest distance.
-#            Gs_nearest.append(g_new) # add the corresponding graph.
-#            sort_idx = np.argsort(dis_gs)
-#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
-#            r = 0
-#        else:
-#            r += 1
-#    
-#    return dhat, ghat
-
-
-#def gk_iam_nearest_multi(Gn, alpha, idx_gi, Kmatrix, k, r_max):
-#    """This function constructs graph pre-image by the iterative pre-image 
-#    framework in reference [1], algorithm 1, where the step of generating new 
-#    graphs randomly is replaced by the IAM algorithm in reference [2].
-#    
-#    notes
-#    -----
-#    Every time a set of n better graphs is acquired, their distances in kernel space are
-#    compared with the k nearest ones, and the k nearest distances from the k+n
-#    distances will be used as the new ones.
-#    """
-#    Gn_median = [Gn[idx].copy() for idx in idx_gi]
-#    # compute k nearest neighbors of phi in DN.
-#    dis_list = [] # distance between g_star and each graph.
-#    for ig, g in tqdm(enumerate(Gn), desc='computing distances', file=sys.stdout):
-#        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix)
-##        dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-##                      k_g2_list[ig]) + (alpha * alpha * k_list[0] + alpha * 
-##                      (1 - alpha) * k_g2_list[0] + (1 - alpha) * alpha * 
-##                      k_g1_list[6] + (1 - alpha) * (1 - alpha) * k_list[6])
-#        dis_list.append(dtemp)
-#        
-#    # sort
-#    sort_idx = np.argsort(dis_list)
-#    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-#    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-#    g0hat_list = [Gn[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-#    if dis_gs[0] == 0: # the exact pre-image.
-#        print('The exact pre-image is found from the input dataset.')
-#        return 0, g0hat_list
-#    dhat = dis_gs[0] # the nearest distance
-#    ghat_list = [g.copy() for g in g0hat_list]
-#    for g in ghat_list:
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-#    Gk = [Gn[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-#        nx.draw_networkx(gi)
-#        plt.show()
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-#    Gs_nearest = Gk.copy()
-##    gihat_list = []
-#    
-##    i = 1
-#    r = 1
-#    while r < r_max:
-#        print('r =', r)
-##        found = False
-##        Gs_nearest = Gk + gihat_list
-##        g_tmp = iam(Gs_nearest)
-#        g_tmp_list = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
-#                Gn_median, Gs_nearest, c_ei=1, c_er=1, c_es=1)
-#        for g in g_tmp_list:
-#            nx.draw_networkx(g)
-#            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-#        
-#        # compute distance between \psi and the new generated graphs.
-#        gi_list = [Gn[i] for i in idx_gi]
-#        knew = compute_kernel(g_tmp_list + gi_list, 'marginalizedkernel', False)
-#        dnew_list = []
-#        for idx, g_tmp in enumerate(g_tmp_list):
-#            dnew_list.append(dis_gstar(idx, range(len(g_tmp_list), 
-#                            len(g_tmp_list) + len(gi_list) + 1), alpha, knew))
-#        
-##        dnew = knew[0, 0] - 2 * (alpha[0] * knew[0, 1] + alpha[1] * 
-##              knew[0, 2]) + (alpha[0] * alpha[0] * k_list[0] + alpha[0] * 
-##              alpha[1] * k_g2_list[0] + alpha[1] * alpha[0] * 
-##              k_g1_list[1] + alpha[1] * alpha[1] * k_list[1])
-#            
-#        # find the new k nearest graphs.
-#        dis_gs = dnew_list + dis_gs # add the new nearest distances.
-#        Gs_nearest = [g.copy() for g in g_tmp_list] + Gs_nearest # add the corresponding graphs.
-#        sort_idx = np.argsort(dis_gs)
-#        if len([i for i in sort_idx[0:k] if i < len(dnew_list)]) > 0:
-#            print('We got better k nearest neighbors! Hurray!')
-#            dis_gs = [dis_gs[idx] for idx in sort_idx[0:k]] # the new k nearest distances.
-#            print(dis_gs[-1])
-#            Gs_nearest = [Gs_nearest[idx] for idx in sort_idx[0:k]]
-#            nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-#            if len([i for i in sort_idx[0:nb_best] if i < len(dnew_list)]) > 0:
-#                print('I have smaller or equal distance!')
-#                dhat = dis_gs[0]
-#                print(str(dhat) + '->' + str(dhat))
-#                idx_best_list = np.argwhere(dnew_list == dhat).flatten().tolist()
-#                ghat_list = [g_tmp_list[idx].copy() for idx in idx_best_list]
-#                for g in ghat_list:
-#                    nx.draw_networkx(g)
-#                    plt.show()
-#                    print(g.nodes(data=True))
-#                    print(g.edges(data=True))
-#            r = 0
-#        else:
-#            r += 1
-#    
-#    return dhat, ghat_list
\ No newline at end of file
diff --git a/gklearn/preimage/preimage_random.py b/gklearn/preimage/preimage_random.py
deleted file mode 100644
index e5f74cd..0000000
--- a/gklearn/preimage/preimage_random.py
+++ /dev/null
@@ -1,309 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar  6 16:03:11 2019
-
-pre-image
-@author: ljia
-"""
-
-import sys
-import numpy as np
-import random
-from tqdm import tqdm
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.preimage.utils import compute_kernel, dis_gstar
-
-
-def preimage_random(Gn_init, Gn_median, alpha, idx_gi, Kmatrix, k, r_max, l, gkernel):
-    Gn_init = [nx.convert_node_labels_to_integers(g) for g in Gn_init]
-    
-    # compute k nearest neighbors of phi in DN.
-    dis_list = [] # distance between g_star and each graph.
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_list.append(dtemp)
-#    print(np.max(dis_list))
-#    print(np.min(dis_list))
-#    print(np.min([item for item in dis_list if item != 0]))
-#    print(np.mean(dis_list))
-        
-    # sort
-    sort_idx = np.argsort(dis_list)
-    dis_gs = [dis_list[idis] for idis in sort_idx[0:k]] # the k shortest distances
-    nb_best = len(np.argwhere(dis_gs == dis_gs[0]).flatten().tolist())
-    g0hat_list = [Gn_init[idx] for idx in sort_idx[0:nb_best]] # the nearest neighbors of phi in DN
-    if dis_gs[0] == 0: # the exact pre-image.
-        print('The exact pre-image is found from the input dataset.')
-        return 0, g0hat_list[0], 0
-    dhat = dis_gs[0] # the nearest distance
-#    ghat_list = [g.copy() for g in g0hat_list]
-#    for g in ghat_list:
-#        draw_Letter_graph(g)
-#        nx.draw_networkx(g)
-#        plt.show()
-#        print(g.nodes(data=True))
-#        print(g.edges(data=True))
-    Gk = [Gn_init[ig].copy() for ig in sort_idx[0:k]] # the k nearest neighbors
-#    for gi in Gk:
-##        nx.draw_networkx(gi)
-##        plt.show()
-#        draw_Letter_graph(g)
-#        print(gi.nodes(data=True))
-#        print(gi.edges(data=True))
-    Gs_nearest = [g.copy() for g in Gk]
-    gihat_list = []
-    dihat_list = []
-    
-#    i = 1
-    r = 0
-#    sod_list = [dhat]
-#    found = False
-    dis_of_each_itr = [dhat]
-    nb_updated = 0
-    g_best = []
-    while r < r_max:
-        print('\nr =', r)
-        print('itr for gk =', nb_updated, '\n')
-        found = False
-        dis_bests = dis_gs + dihat_list
-        # @todo what if the log is negetive? how to choose alpha (scalar)?
-        fdgs_list = np.array(dis_bests)
-        if np.min(fdgs_list) < 1:
-            fdgs_list /= np.min(dis_bests)
-        fdgs_list = [int(item) for item in np.ceil(np.log(fdgs_list))]
-        if np.min(fdgs_list) < 1:
-            fdgs_list = np.array(fdgs_list) + 1
-            
-        for ig, gs in enumerate(Gs_nearest + gihat_list):
-#            nx.draw_networkx(gs)
-#            plt.show()
-            for trail in range(0, l):
-#            for trail in tqdm(range(0, l), desc='l loops', file=sys.stdout):
-                # add and delete edges.
-                gtemp = gs.copy()
-                np.random.seed()
-                # which edges to change.
-                # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
-                # @todo: what if fdgs is bigger than nb_vpairs?
-                idx_change = random.sample(range(nb_vpairs), fdgs_list[ig] if 
-                                           fdgs_list[ig] < nb_vpairs else nb_vpairs)
-#                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-#                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                for item in idx_change:
-                    node1 = int(item / (nx.number_of_nodes(gs) - 1))
-                    node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
-                    if node2 >= node1: # skip the self pair.
-                        node2 += 1
-                    # @todo: is the randomness correct?
-                    if not gtemp.has_edge(node1, node2):
-                        gtemp.add_edge(node1, node2)
-#                        nx.draw_networkx(gs)
-#                        plt.show()
-#                        nx.draw_networkx(gtemp)
-#                        plt.show()
-                    else:
-                        gtemp.remove_edge(node1, node2)
-#                        nx.draw_networkx(gs)
-#                        plt.show()
-#                        nx.draw_networkx(gtemp)
-#                        plt.show()
-#                nx.draw_networkx(gtemp)
-#                plt.show()
-                
-                # compute distance between \psi and the new generated graph.
-#                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
-#                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-#                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-                knew = compute_kernel([gtemp] + Gn_median, gkernel, verbose=False)
-                dnew = dis_gstar(0, range(1, len(Gn_median) + 1), alpha, knew, 
-                                 withterm3=False)
-                if dnew <= dhat: # @todo: the new distance is smaller or also equal?
-                    if dnew < dhat:
-                        print('\nI am smaller!')
-                        print('ig =', str(ig), ', l =', str(trail))
-                        print(dhat, '->', dnew)
-                        nb_updated += 1
-                    elif dnew == dhat:                   
-                        print('I am equal!') 
-#                    nx.draw_networkx(gtemp)
-#                    plt.show()
-#                    print(gtemp.nodes(data=True))
-#                    print(gtemp.edges(data=True))
-                    dhat = dnew
-                    gnew = gtemp.copy()
-                    found = True # found better graph.                  
-        if found:
-            r = 0
-            gihat_list = [gnew]
-            dihat_list = [dhat]
-        else:
-            r += 1
-            
-        dis_of_each_itr.append(dhat)
-        print('the shortest distances for previous iterations are', dis_of_each_itr)
-#    dis_best.append(dhat)
-    g_best = (g0hat_list[0] if len(gihat_list) == 0 else gihat_list[0])
-    print('distances in kernel space:', dis_of_each_itr, '\n')
-    
-    return dhat, g_best, nb_updated
-#    return 0, 0, 0
-
-
-if __name__ == '__main__':
-    from gklearn.utils.graphfiles import loadDataset
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#            'extra_params': {}} # node symb
-    
-    DN, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    #DN = DN[0:10]
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # 10 # iteration limit.
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    #alpha_range = np.linspace(0.1, 0.9, 9)
-    k = 10 # 5 # k nearest neighbors
-    
-    # randomly select two molecules
-    #np.random.seed(1)
-    #idx1, idx2 = np.random.randint(0, len(DN), 2)
-    #g1 = DN[idx1]
-    #g2 = DN[idx2]
-    idx1 = 0
-    idx2 = 6
-    g1 = DN[idx1]
-    g2 = DN[idx2]
-    
-    # compute 
-    k_list = [] # kernel between each graph and itself.
-    k_g1_list = [] # kernel between each graph and g1
-    k_g2_list = [] # kernel between each graph and g2
-    for ig, g in tqdm(enumerate(DN), desc='computing self kernels', file=sys.stdout): 
-    #    ktemp = marginalizedkernel([g, g1, g2], node_label='atom', edge_label=None,
-    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-        ktemp = compute_kernel([g, g1, g2], 'untilhpathkernel', verbose=False)
-        k_list.append(ktemp[0, 0])
-        k_g1_list.append(ktemp[0, 1])
-        k_g2_list.append(ktemp[0, 2])
-    
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        # compute k nearest neighbors of phi in DN.
-        dis_list = [] # distance between g_star and each graph.
-        for ig, g in tqdm(enumerate(DN), desc='computing distances', file=sys.stdout):
-            dtemp = k_list[ig] - 2 * (alpha * k_g1_list[ig] + (1 - alpha) * 
-                          k_g2_list[ig]) + (alpha * alpha * k_list[idx1] + alpha * 
-                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2])
-            dis_list.append(np.sqrt(dtemp))
-        
-        # sort
-        sort_idx = np.argsort(dis_list)
-        dis_gs = [dis_list[idis] for idis in sort_idx[0:k]]
-        g0hat = DN[sort_idx[0]] # the nearest neighbor of phi in DN
-        if dis_gs[0] == 0: # the exact pre-image.
-            print('The exact pre-image is found from the input dataset.')
-            g_pimg = g0hat
-            break
-        dhat = dis_gs[0] # the nearest distance
-        Dk = [DN[ig] for ig in sort_idx[0:k]] # the k nearest neighbors
-        gihat_list = []
-        
-        i = 1
-        r = 1
-        while r < r_max:
-            print('r =', r)
-            found = False
-            for ig, gs in enumerate(Dk + gihat_list):
-    #            nx.draw_networkx(gs)
-    #            plt.show()
-                # @todo what if the log is negetive?
-                fdgs = int(np.abs(np.ceil(np.log(alpha * dis_gs[ig]))))
-                for trail in tqdm(range(0, l), desc='l loop', file=sys.stdout):
-                    # add and delete edges.
-                    gtemp = gs.copy()
-                    np.random.seed()
-                    # which edges to change.
-                    # @todo: should we use just half of the adjacency matrix for undirected graphs?
-                    nb_vpairs = nx.number_of_nodes(gs) * (nx.number_of_nodes(gs) - 1)
-                    # @todo: what if fdgs is bigger than nb_vpairs?
-                    idx_change = random.sample(range(nb_vpairs), fdgs if fdgs < nb_vpairs else nb_vpairs)
-    #                idx_change = np.random.randint(0, nx.number_of_nodes(gs) * 
-    #                                               (nx.number_of_nodes(gs) - 1), fdgs)
-                    for item in idx_change:
-                        node1 = int(item / (nx.number_of_nodes(gs) - 1))
-                        node2 = (item - node1 * (nx.number_of_nodes(gs) - 1))
-                        if node2 >= node1: # skip the self pair.
-                            node2 += 1
-                        # @todo: is the randomness correct?
-                        if not gtemp.has_edge(node1, node2):
-                            # @todo: how to update the bond_type? 0 or 1?
-                            gtemp.add_edges_from([(node1, node2, {'bond_type': 1})])
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(gtemp)
-    #                        plt.show()
-                        else:
-                            gtemp.remove_edge(node1, node2)
-    #                        nx.draw_networkx(gs)
-    #                        plt.show()
-    #                        nx.draw_networkx(gtemp)
-    #                        plt.show()
-    #                nx.draw_networkx(gtemp)
-    #                plt.show()
-                    
-                    # compute distance between phi and the new generated graph.
-    #                knew = marginalizedkernel([gtemp, g1, g2], node_label='atom', edge_label=None,
-    #                               p_quit=lmbda, n_iteration=20, remove_totters=False,
-    #                               n_jobs=multiprocessing.cpu_count(), verbose=False)
-                    knew = compute_kernel([gtemp, g1, g2], 'untilhpathkernel', verbose=False)
-                    dnew = np.sqrt(knew[0, 0] - 2 * (alpha * knew[0, 1] + (1 - alpha) * 
-                          knew[0, 2]) + (alpha * alpha * k_list[idx1] + alpha * 
-                          (1 - alpha) * k_g2_list[idx1] + (1 - alpha) * alpha * 
-                          k_g1_list[idx2] + (1 - alpha) * (1 - alpha) * k_list[idx2]))
-                    if dnew < dhat: # @todo: the new distance is smaller or also equal?
-                        print('I am smaller!')
-                        print(dhat, '->', dnew)
-                        nx.draw_networkx(gtemp)
-                        plt.show()
-                        print(gtemp.nodes(data=True))
-                        print(gtemp.edges(data=True))
-                        dhat = dnew
-                        gnew = gtemp.copy()
-                        found = True # found better graph.
-                        r = 0
-                    elif dnew == dhat:                   
-                        print('I am equal!')                   
-            if found:
-                gihat_list = [gnew]
-                dis_gs.append(dhat)
-            else:
-                r += 1
-        dis_best.append(dhat)
-        g_best += ([g0hat] if len(gihat_list) == 0 else gihat_list)       
-    
-    
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-image is')
-        nx.draw_networkx(g_best[idx])
-        plt.show()
\ No newline at end of file
diff --git a/gklearn/preimage/python_code.py b/gklearn/preimage/python_code.py
deleted file mode 100644
index 3772526..0000000
--- a/gklearn/preimage/python_code.py
+++ /dev/null
@@ -1,122 +0,0 @@
-		elif opt_name == 'random-inits':
-			try:
-				num_random_inits_ = std::stoul(opt_val)
-				desired_num_random_inits_ = num_random_inits_
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-
-			if num_random_inits_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option random-inits. Usage: options = "[--random-inits <convertible to int greater 0>]"')
-
-		}
-		elif opt_name == 'randomness':
-			if opt_val == 'PSEUDO':
-				use_real_randomness_ = False
-
-			elif opt_val == 'REAL':
-				use_real_randomness_ = True
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option randomness. Usage: options = "[--randomness REAL|PSEUDO] [...]"')
-
-		}
-		elif opt_name == 'stdout':
-			if opt_val == '0':
-				print_to_stdout_ = 0
-
-			elif opt_val == '1':
-				print_to_stdout_ = 1
-
-			elif opt_val == '2':
-				print_to_stdout_ = 2
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option stdout. Usage: options = "[--stdout 0|1|2] [...]"')
-
-		}
-		elif opt_name == 'refine':
-			if opt_val == 'TRUE':
-				refine_ = True
-
-			elif opt_val == 'FALSE':
-				refine_ = False
-
-			else:
-				raise Error('Invalid argument "' + opt_val  + '" for option refine. Usage: options = "[--refine TRUE|FALSE] [...]"')
-
-		}
-		elif opt_name == 'time-limit':
-			try:
-				time_limit_in_sec_ = std::stod(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option time-limit.  Usage: options = "[--time-limit <convertible to double>] [...]')
-
-		}
-		elif opt_name == 'max-itrs':
-			try:
-				max_itrs_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs. Usage: options = "[--max-itrs <convertible to int>] [...]')
-
-		}
-		elif opt_name == 'max-itrs-without-update':
-			try:
-				max_itrs_without_update_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-without-update. Usage: options = "[--max-itrs-without-update <convertible to int>] [...]')
-
-		}
-		elif opt_name == 'seed':
-			try:
-				seed_ = std::stoul(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option seed. Usage: options = "[--seed <convertible to int greater equal 0>] [...]')
-
-		}
-		elif opt_name == 'epsilon':
-			try:
-				epsilon_ = std::stod(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-
-			if epsilon_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option epsilon. Usage: options = "[--epsilon <convertible to double greater 0>] [...]')
-
-		}
-		elif opt_name == 'inits-increase-order':
-			try:
-				num_inits_increase_order_ = std::stoul(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-
-			if num_inits_increase_order_ <= 0:
-				raise Error('Invalid argument "' + opt_val + '" for option inits-increase-order. Usage: options = "[--inits-increase-order <convertible to int greater 0>]"')
-
-		}
-		elif opt_name == 'init-type-increase-order':
-			init_type_increase_order_ = opt_val
-			if opt_val != 'CLUSTERS' and opt_val != 'K-MEANS++':
-				raise Exception(std::string('Invalid argument ') + opt_val + ' for option init-type-increase-order. Usage: options = "[--init-type-increase-order CLUSTERS|K-MEANS++] [...]"')
-
-		}
-		elif opt_name == 'max-itrs-increase-order':
-			try:
-				max_itrs_increase_order_ = std::stoi(opt_val)
-
-			except:
-				raise Error('Invalid argument "' + opt_val + '" for option max-itrs-increase-order. Usage: options = "[--max-itrs-increase-order <convertible to int>] [...]')
-
-		}
-		else:
-			std::string valid_options('[--init-type <arg>] [--random-inits <arg>] [--randomness <arg>] [--seed <arg>] [--stdout <arg>] ')
-			valid_options += '[--time-limit <arg>] [--max-itrs <arg>] [--epsilon <arg>] '
-			valid_options += '[--inits-increase-order <arg>] [--init-type-increase-order <arg>] [--max-itrs-increase-order <arg>]'
-			raise Error(std::string('Invalid option "') + opt_name + '". Usage: options = "' + valid_options + '"')
-
diff --git a/gklearn/preimage/test.py b/gklearn/preimage/test.py
deleted file mode 100644
index 4110a6f..0000000
--- a/gklearn/preimage/test.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
-
-#Pour que "import script" trouve les librairies qu'a besoin GedLib
-#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
-import gedlibpy.librariesImport
-from  gedlibpy import gedlibpy
-import networkx as nx
-
-
-def init() :
-    print("List of Edit Cost Options : ")
-    for i in gedlibpy.list_of_edit_cost_options :
-        print (i)
-    print("")
-
-    print("List of Method Options : ")
-    for j in gedlibpy.list_of_method_options :
-        print (j)
-    print("")
-
-    print("List of Init Options : ")
-    for k in gedlibpy.list_of_init_options :
-        print (k)
-    print("")
-    
-def test():
-    
-    gedlibpy.load_GXL_graphs('include/gedlib-master/data/datasets/Mutagenicity/data/', 'collections/MUTA_10.xml')
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-    g = listID[0]
-    h = listID[1]
-    gedlibpy.run_method(g, h)
-    print("Node Map : ", gedlibpy.get_node_map(g,h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print("Assignment Matrix : ")
-    print(gedlibpy.get_assignment_matrix(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g,h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-
-def convertGraph(G):
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-        
-    return G_new
-
-
-def testNxGrapĥ():
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    
-    gedlibpy.restart_env()
-    for graph in Gn:
-        g_new = convertGraph(graph)
-        gedlibpy.add_nx_graph(g_new, "")
-        
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-
-    print(listID)
-    g = listID[0]
-    h = listID[1]
-
-    gedlibpy.run_method(g, h)
-
-    print("Node Map : ", gedlibpy.get_node_map(g, h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-#test()
-init() 
-#testNxGrapĥ()
diff --git a/gklearn/preimage/test_fitDistance.py b/gklearn/preimage/test_fitDistance.py
deleted file mode 100644
index 2945a24..0000000
--- a/gklearn/preimage/test_fitDistance.py
+++ /dev/null
@@ -1,648 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 24 11:50:56 2019
-
-@author: ljia
-"""
-from matplotlib import pyplot as plt
-import numpy as np
-from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.utils import remove_edges
-from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
-from gklearn.preimage.utils import normalize_distance_matrix
-
-
-def test_update_costs():
-    from preimage.fitDistance import update_costs
-    import cvxpy as cp
-    
-    ds = np.load('results/xp_fit_method/fit_data_debug4.gm.npz')
-    nb_cost_mat = ds['nb_cost_mat']
-    dis_k_vec = ds['dis_k_vec']
-    n_edit_operations = ds['n_edit_operations']
-    ged_vec_init = ds['ged_vec_init']
-    ged_mat = ds['ged_mat']
-    
-    nb_cost_mat_new = nb_cost_mat[:,[2,3,4]]
-    x = cp.Variable(nb_cost_mat_new.shape[1])
-    cost_fun = cp.sum_squares(nb_cost_mat_new * x - dis_k_vec)
-#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
-#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0]
-#    constraints = [x >= [0.000 for i in range(nb_cost_mat_new.shape[1])],
-#                   np.array([1.0, 1.0, -1.0, 0.0, 0.0]).T@x >= 0.0,
-#                   np.array([0.0, 0.0, 0.0, 1.0, -1.0]).T@x == 0.0]
-    constraints = [x >= [0.00 for i in range(nb_cost_mat_new.shape[1])],
-                   np.array([0.0, 1.0, -1.0]).T@x == 0.0]
-#    constraints = [x >= [0.00000 for i in range(nb_cost_mat_new.shape[1])]]
-    prob = cp.Problem(cp.Minimize(cost_fun), constraints)
-    prob.solve()
-    print(x.value)
-    edit_costs_new = np.concatenate((x.value, np.array([0.0])))
-    residual = np.sqrt(prob.value)
-
-
-def median_paper_clcpc_python_best():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-#    ds = {'name': 'monoterpenoides', 
-#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-#    _, y_all = loadDataset(ds['dataset'])
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 6
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options, 'stabilizer': None}
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    fn_edit_costs_output = 'results/median_paper/edit_costs_output.python_init40.k10.txt'
-
-    for y in y_all:
-        for repeat in range(repeats):
-            edit_costs_output_file = open(fn_edit_costs_output, 'a')
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-                nb_cost_mat_list = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                            gkernel, itr_max, params_ged=params_ged, 
-                                            parallel=True)
-            total_time = np.sum(time_list)
-#            print('\nedit_costs:', edit_costs)
-#            print('\nresidual_list:', residual_list)
-#            print('\nedit_cost_list:', edit_cost_list)
-#            print('\ndistance matrix in kernel space:', dis_k_mat)
-#            print('\nged matrix:', ged_mat)
-#            print('\ntotal time:', total_time)
-#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-            np.savez('results/median_paper/fit_distance.clcpc.python_init40.monot.elabeled.uhpkernel.y' 
-                     + y + '.repeat' + str(repeat) + '.k10..gm', 
-                     edit_costs=edit_costs, 
-                     residual_list=residual_list, edit_cost_list=edit_cost_list,
-                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-            
-            for ec in edit_costs:
-                edit_costs_output_file.write(str(ec) + ' ')
-            edit_costs_output_file.write('\n')
-            edit_costs_output_file.close()
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-            print(nb_consistent, nb_inconsistent, ratio_consistent)
-                      
-#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-#            plt.imshow(norm_dis_k_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_ged_mat = normalize_distance_matrix(ged_mat)
-#            plt.imshow(norm_ged_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_diff = norm_ged_mat - norm_dis_k_mat
-#            plt.imshow(norm_diff)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_best.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#        #    draw_count_bar(norm_diff)
-
-
-def median_paper_clcpc_python_bash_cpp():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-#    ds = {'name': 'monoterpenoides', 
-#          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-#    _, y_all = loadDataset(ds['dataset'])
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 20
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options}
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    fn_edit_costs_output = 'results/median_paper/edit_costs_output.txt'
-
-    for y in y_all:
-        for repeat in range(repeats):
-            edit_costs_output_file = open(fn_edit_costs_output, 'a')
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-                nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                            gkernel, itr_max, params_ged=params_ged, 
-                                            parallel=False)
-            total_time = np.sum(time_list)
-#            print('\nedit_costs:', edit_costs)
-#            print('\nresidual_list:', residual_list)
-#            print('\nedit_cost_list:', edit_cost_list)
-#            print('\ndistance matrix in kernel space:', dis_k_mat)
-#            print('\nged matrix:', ged_mat)
-#            print('\ntotal time:', total_time)
-#            print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-            np.savez('results/median_paper/fit_distance.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-                     + y + '.repeat' + str(repeat) + '.gm', 
-                     edit_costs=edit_costs, 
-                     residual_list=residual_list, edit_cost_list=edit_cost_list,
-                     dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-                     total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-                     coef_dk=coef_dk)
-            
-            for ec in edit_costs:
-                edit_costs_output_file.write(str(ec) + ' ')
-            edit_costs_output_file.write('\n')
-            edit_costs_output_file.close()
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-            nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-            print(nb_consistent, nb_inconsistent, ratio_consistent)
-                      
-#            norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-#            plt.imshow(norm_dis_k_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_dis_k_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_ged_mat = normalize_distance_matrix(ged_mat)
-#            plt.imshow(norm_ged_mat)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/norm_ged_mat.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#            
-#            norm_diff = norm_ged_mat - norm_dis_k_mat
-#            plt.imshow(norm_diff)
-#            plt.colorbar()
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.eps', format='eps', dpi=300)
-#            plt.savefig('results/median_paper/diff_mat_norm_ged_dis_k.clcpc.python_bash_cpp.monot.elabeled.uhpkernel.y' 
-#                        + y + '.repeat' + str(repeat) + '.png', format='png')
-#        #    plt.show()
-#            plt.clf()
-#        #    draw_count_bar(norm_diff)
-
-
-
-
-
-def test_cs_leq_ci_plus_cr_python_bash_cpp():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er with ged computation with 
-       python invoking the c++ code by bash command (with updated library).
-    """
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 10
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-    params_ged = {'lib': 'gedlib-bash', 'cost': 'CONSTANT', 'method': 'IPFP', 
-                'algo_options': algo_options}
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                    gkernel, itr_max, params_ged=params_ged, 
-                                    parallel=False)
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel.gm', 
-             edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-             coef_dk=coef_dk)
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'untilhpathkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    itr_max = 10
-#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-#                                                      gkernel, itr_max)
-#    total_time = np.sum(time_list)
-#    print('\nedit_costs:', edit_costs)
-#    print('\nresidual_list:', residual_list)
-#    print('\nedit_cost_list:', edit_cost_list)
-#    print('\ndistance matrix in kernel space:', dis_k_mat)
-#    print('\nged matrix:', ged_mat)
-#    print('\ntotal time:', total_time)
-#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.mutag.elabeled.uhpkernel.gm', 
-#             edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-#    dis_k_sub = pairwise_substitution(dis_k_mat)
-#    ged_sub = pairwise_substitution(ged_mat)    
-#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.gm', 
-#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-    
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.python_bash_cpp.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-
-
-def test_anycosts():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:10]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, gkernel, itr_max)
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.any_costs.gm', edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list)
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.any_costs.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-##    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_dis_k_mat.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/norm_ged_mat.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.eps', format='eps', dpi=300)
-#    plt.savefig('results/diff_mat_norm_ged_dis_k.any_costs' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-    
-
-def test_cs_leq_ci_plus_cr():
-    """c_vs <= c_vi + c_vr, c_es <= c_ei + c_er
-    """
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    itr_max = 10
-    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-                                                      gkernel, itr_max,
-                                                      fitkernel='gaussian')
-    total_time = np.sum(time_list)
-    print('\nedit_costs:', edit_costs)
-    print('\nresidual_list:', residual_list)
-    print('\nedit_cost_list:', edit_cost_list)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-    print('\ntotal time:', total_time)
-    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-    np.savez('results/fit_distance.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel.gm', 
-             edit_costs=edit_costs, 
-             residual_list=residual_list, edit_cost_list=edit_cost_list,
-             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, 
-             coef_dk=coef_dk)
-    
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'untilhpathkernel'
-#    node_label = 'atom'
-#    edge_label = 'bond_type'
-#    itr_max = 10
-#    edit_costs, residual_list, edit_cost_list, dis_k_mat, ged_mat, time_list, \
-#        nb_cost_mat_list, coef_dk = fit_GED_to_kernel_distance(Gn, node_label, edge_label, 
-#                                                      gkernel, itr_max)
-#    total_time = np.sum(time_list)
-#    print('\nedit_costs:', edit_costs)
-#    print('\nresidual_list:', residual_list)
-#    print('\nedit_cost_list:', edit_cost_list)
-#    print('\ndistance matrix in kernel space:', dis_k_mat)
-#    print('\nged matrix:', ged_mat)
-#    print('\ntotal time:', total_time)
-#    print('\nnb_cost_mat:', nb_cost_mat_list[-1])
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.mutag.elabeled.uhpkernel.gm', 
-#             edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list, coef_dk)
-    
-    
-#    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.monot.elabeled.uhpkernel.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-#    coef_dk = gmfile['coef_dk']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-#    dis_k_sub = pairwise_substitution(dis_k_mat)
-#    ged_sub = pairwise_substitution(ged_mat)    
-#    np.savez('results/sub_dis_mat.cs_leq_ci_plus_cr.cost_leq_1en2.gm', 
-#             dis_k_sub=dis_k_sub, ged_sub=ged_sub)
-    
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.cs_leq_ci_plus_cr.gaussian.cost_leq_1en2.monot.elabeled.uhpkernel' 
-                + '.png', format='png')
-#    plt.show()
-    plt.clf()
-#    draw_count_bar(norm_diff)
-    
-    
-def test_unfitted():
-    """unfitted.
-    """  
-    from fitDistance import compute_geds
-    from utils import kernel_distance_matrix
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:10]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-        
-
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-##    Gn = Gn[0:10]
-##    remove_edges(Gn)
-#    gkernel = 'marginalizedkernel'
-
-    dis_k_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, gkernel=gkernel)
-    ged_all, ged_mat, n_edit_operations = compute_geds(Gn, [3, 3, 1, 3, 3, 1], 
-            [0, 1, 2, 3, 4, 5], parallel=True)
-    print('\ndistance matrix in kernel space:', dis_k_mat)
-    print('\nged matrix:', ged_mat)
-#    np.savez('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en2.gm', edit_costs=edit_costs, 
-#             residual_list=residual_list, edit_cost_list=edit_cost_list,
-#             dis_k_mat=dis_k_mat, ged_mat=ged_mat, time_list=time_list, 
-#             total_time=total_time, nb_cost_mat_list=nb_cost_mat_list) 
-    
-    # normalized distance matrices.
-#    gmfile = np.load('results/fit_distance.cs_leq_ci_plus_cr.cost_leq_1en3.gm.npz')
-#    edit_costs = gmfile['edit_costs']
-#    residual_list = gmfile['residual_list']
-#    edit_cost_list = gmfile['edit_cost_list']
-#    dis_k_mat = gmfile['dis_k_mat']
-#    ged_mat = gmfile['ged_mat']
-#    total_time = gmfile['total_time']
-#    nb_cost_mat_list = gmfile['nb_cost_mat_list']
-    
-    nb_consistent, nb_inconsistent, ratio_consistent = pairwise_substitution_consistence(dis_k_mat, ged_mat)
-    print(nb_consistent, nb_inconsistent, ratio_consistent)
-    
-    norm_dis_k_mat = normalize_distance_matrix(dis_k_mat)
-    plt.imshow(norm_dis_k_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_dis_k_mat.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_ged_mat = normalize_distance_matrix(ged_mat)
-    plt.imshow(norm_ged_mat)
-    plt.colorbar()
-    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/norm_ged_mat.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    
-    norm_diff = norm_ged_mat - norm_dis_k_mat
-    plt.imshow(norm_diff)
-    plt.colorbar()
-    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.eps', format='eps', dpi=300)
-    plt.savefig('results/diff_mat_norm_ged_dis_k.unfitted.MUTAG' + '.png', format='png')
-#    plt.show()
-    plt.clf()
-    draw_count_bar(norm_diff)
-    
-    
-def pairwise_substitution_consistence(mat1, mat2):
-    """
-    """
-    nb_consistent = 0
-    nb_inconsistent = 0
-    # the matrix is considered symmetric.
-    upper_tri1 = mat1[np.triu_indices_from(mat1)]
-    upper_tri2 = mat2[np.tril_indices_from(mat2)]
-    for i in tqdm(range(len(upper_tri1)), desc='computing consistence', file=sys.stdout):
-        for j in range(i, len(upper_tri1)):
-            if np.sign(upper_tri1[i] - upper_tri1[j]) == np.sign(upper_tri2[i] - upper_tri2[j]):
-                nb_consistent += 1
-            else:
-                nb_inconsistent += 1
-    return nb_consistent, nb_inconsistent, nb_consistent / (nb_consistent + nb_inconsistent)
-
-
-def pairwise_substitution(mat):
-    # the matrix is considered symmetric.
-    upper_tri = mat[np.triu_indices_from(mat)]
-    sub_list = []
-    for i in tqdm(range(len(upper_tri)), desc='computing', file=sys.stdout):
-        for j in range(i, len(upper_tri)):
-            sub_list.append(upper_tri[i] - upper_tri[j])
-    return sub_list
-    
-    
-def draw_count_bar(norm_diff):
-    import pandas
-    from collections import Counter, OrderedDict
-    norm_diff_cnt = norm_diff.flatten()
-    norm_diff_cnt = norm_diff_cnt * 10
-    norm_diff_cnt = np.floor(norm_diff_cnt)
-    norm_diff_cnt = Counter(norm_diff_cnt)
-    norm_diff_cnt = OrderedDict(sorted(norm_diff_cnt.items()))
-    df = pandas.DataFrame.from_dict(norm_diff_cnt, orient='index')
-    df.plot(kind='bar')
-    
-    
-if __name__ == '__main__':
-#    test_anycosts()
-#    test_cs_leq_ci_plus_cr()
-#    test_unfitted()
-    
-#    test_cs_leq_ci_plus_cr_python_bash_cpp()
-#    median_paper_clcpc_python_bash_cpp()
-#    median_paper_clcpc_python_best()
-
-#    x = np.array([[1,2,3],[4,5,6],[7,8,9]])
-#    xx = pairwise_substitution(x)
-    
-    test_update_costs()
\ No newline at end of file
diff --git a/gklearn/preimage/test_ged.py b/gklearn/preimage/test_ged.py
deleted file mode 100644
index 74e18a0..0000000
--- a/gklearn/preimage/test_ged.py
+++ /dev/null
@@ -1,520 +0,0 @@
-#export LD_LIBRARY_PATH=.:/export/home/lambertn/Documents/gedlibpy/lib/fann/:/export/home/lambertn/Documents/gedlibpy/lib/libsvm.3.22:/export/home/lambertn/Documents/gedlibpy/lib/nomad
-
-#Pour que "import script" trouve les librairies qu'a besoin GedLib
-#Equivalent à définir la variable d'environnement LD_LIBRARY_PATH sur un bash
-#import gedlibpy_linlin.librariesImport
-#from  gedlibpy_linlin import gedlibpy
-from libs import *
-import networkx as nx
-import numpy as np
-from tqdm import tqdm
-import sys
-
-
-def test_NON_SYMBOLIC_cost():
-    """Test edit cost LETTER2.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_nonsymbolic, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    dataset = '../../datasets/Letter-high/Letter-high_A.txt'
-    Gn, y_all = loadDataset(dataset)
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vi = 0.675
-    c_vr = 0.675
-    c_vs = 0.75
-    c_ei = 0.425
-    c_er = 0.425
-    c_es = 0
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    dis, pi_forward, pi_backward = GED(g1, g2, lib='gedlibpy',
-        cost='NON_SYMBOLIC', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, sod_vs, n_ei, n_er, sod_es = get_nb_edit_operations_nonsymbolic(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations:', n_vi, n_vr, sod_vs, n_ei, n_er, sod_es)
-    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
-    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
-        + c_ei * n_ei + c_er * n_er + c_es * sod_es
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_LETTER2_cost():
-    """Test edit cost LETTER2.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vi = 0.675
-    c_vr = 0.675
-    c_vs = 0.75
-    c_ei = 0.425
-    c_er = 0.425
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
-        cost='LETTER2', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, sod_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations:', n_vi, n_vr, n_vs, sod_vs, n_ei, n_er)
-    print('c_vi, c_vr, c_vs, c_ei, c_er:', c_vi, c_vr, c_vs, c_ei, c_er)
-    cost_computed = c_vi * n_vi + c_vr * n_vr + c_vs * sod_vs \
-        + c_ei * n_ei + c_er * n_er
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-
-def test_get_nb_edit_operations_letter():
-    """Test whether function preimage.ged.get_nb_edit_operations_letter returns
-    correct numbers of edit operations. The distance/cost computed by GED
-    should be the same as the cost computed by number of operations and edit
-    cost constants.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations_letter
-    from gklearn.preimage.test_k_closest_graphs import reform_attributes
-    from gklearn.utils.graphfiles import loadDataset
-
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-
-    g1 = Gn[200]
-    g2 = Gn[1780]
-    reform_attributes(g1)
-    reform_attributes(g2)
-
-    c_vir = 0.9
-    c_eir = 1.7
-    alpha = 0.75
-
-    edit_cost_constant = [c_vir, c_eir, alpha]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='letter', lib='gedlibpy',
-        cost='LETTER', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, c_vs, n_ei, n_er = get_nb_edit_operations_letter(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations and costs:', n_vi, n_vr, n_vs, c_vs, n_ei, n_er)
-    print('c_vir, c_eir, alpha:', c_vir, c_eir, alpha)
-    cost_computed = alpha * c_vir * (n_vi + n_vr) \
-        + alpha * c_vs \
-        + (1 - alpha) * c_eir * (n_ei + n_er)
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_get_nb_edit_operations():
-    """Test whether function preimage.ged.get_nb_edit_operations returns correct
-    numbers of edit operations. The distance/cost computed by GED should be the
-    same as the cost computed by number of operations and edit cost constants.
-    """
-    from gklearn.preimage.ged import GED, get_nb_edit_operations
-    from gklearn.utils.graphfiles import loadDataset
-    import os
-
-    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-
-    g1 = Gn[20]
-    g2 = Gn[108]
-
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    dis, pi_forward, pi_backward = GED(g1, g2, dataset='monoterpenoides', lib='gedlibpy',
-        cost='CONSTANT', method='IPFP', edit_cost_constant=edit_cost_constant,
-        algo_options='', stabilizer=None)
-    n_vi, n_vr, n_vs, n_ei, n_er, n_es = get_nb_edit_operations(g1, g2,
-        pi_forward, pi_backward)
-
-    print('# of operations and costs:', n_vi, n_vr, n_vs, n_ei, n_er, n_es)
-    print('edit costs:', c_vi, c_vr, c_vs, c_ei, c_er, c_es)
-    cost_computed = n_vi * c_vi + n_vr * c_vr + n_vs * c_vs \
-        + n_ei * c_ei + n_er * c_er + n_es * c_es
-    print('dis (cost computed by GED):', dis)
-    print('cost computed by # of operations and edit cost constants:', cost_computed)
-
-
-def test_ged_python_bash_cpp():
-    """Test ged computation with python invoking the c++ code by bash command (with updated library).
-    """
-    from gklearn.utils.graphfiles import loadDataset
-    from gklearn.preimage.ged import GED
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
-
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_bash_' + str(repeat) + '_init40.3_20.txt'
-#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-#        runtime_file = open(runtime_filename, 'a')
-
-        ged_mat = np.empty((len(Gn), len(Gn)))
-#        runtime_mat = np.empty((len(Gn), len(Gn)))
-
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(Gn)):
-                print(i, j)
-                g1 = Gn[i]
-                g2 = Gn[j]
-                upper_bound, _, _ = GED(g1, g2, lib='gedlib-bash', cost='CONSTANT',
-                                method='IPFP',
-                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
-                                algo_options=algo_options)
-#                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-#                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-#                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-#            runtime_file.write('\n')
-
-        ged_file.close()
-#        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-#    print('runtime_mat:')
-#    print(runtime_mat)
-
-    return
-
-
-
-def test_ged_best_settings_updated():
-    """Test ged computation with best settings the same as in the C++ code (with updated library).
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-#    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/monoterpenoides_3_20.xml'
-
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_updated_' + str(repeat) + '_init40.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_updated_' + str(repeat) + '_init40.txt'
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", algo_options)
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            ged_file = open(ged_filename, 'a')
-            runtime_file = open(runtime_filename, 'a')
-
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-            ged_file.close()
-            runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-def test_ged_best_settings():
-    """Test ged computation with best settings the same as in the C++ code.
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_best_settings_' + str(repeat) + '.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_best_settings_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-        runtime_file = open(runtime_filename, 'a')
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", algo_options)
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-        ged_file.close()
-        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-
-def test_ged_default():
-    """Test ged computation with default settings.
-    """
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    for repeat in range(3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_default_' + str(repeat) + '.txt'
-        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_default_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-        runtime_file = open(runtime_filename, 'a')
-
-        gedlibpy.restart_env()
-        gedlibpy.load_GXL_graphs(graph_dir, collection_file)
-        listID = gedlibpy.get_all_graph_ids()
-        gedlibpy.set_edit_cost('CONSTANT', [3.0, 3.0, 1.0, 3.0, 3.0, 1.0])
-        gedlibpy.init()
-        gedlibpy.set_method("IPFP", "")
-        gedlibpy.init_method()
-
-        ged_mat = np.empty((len(listID), len(listID)))
-        runtime_mat = np.empty((len(listID), len(listID)))
-
-        for i in tqdm(range(len(listID)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(listID)):
-                g1 = listID[i]
-                g2 = listID[j]
-                gedlibpy.run_method(g1, g2)
-                upper_bound = gedlibpy.get_upper_bound(g1, g2)
-                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-            runtime_file.write('\n')
-
-        ged_file.close()
-        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-    print('runtime_mat:')
-    print(runtime_mat)
-
-    return
-
-
-def test_ged_min():
-    """Test ged computation with the "min" stabilizer.
-    """
-    from gklearn.utils.graphfiles import loadDataset
-    from gklearn.preimage.ged import GED
-
-    data_dir_prefix = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/'
-    collection_file = data_dir_prefix + 'generated_datsets/monoterpenoides/gxl/monoterpenoides.xml'
-    graph_dir = data_dir_prefix +'generated_datsets/monoterpenoides/gxl/'
-
-    Gn, y = loadDataset(collection_file, extra_params=graph_dir)
-
-#    algo_options = '--threads 6 --initial-solutions 10 --ratio-runs-from-initial-solutions .5'
-
-    for repeat in range(0, 3):
-        # Generate the result file.
-        ged_filename = data_dir_prefix + 'output/test_ged/ged_mat_python_min_' + str(repeat) + '.txt'
-#        runtime_filename = data_dir_prefix + 'output/test_ged/runtime_mat_python_min_' + str(repeat) + '.txt'
-
-        ged_file = open(ged_filename, 'a')
-#        runtime_file = open(runtime_filename, 'a')
-
-        ged_mat = np.empty((len(Gn), len(Gn)))
-#        runtime_mat = np.empty((len(Gn), len(Gn)))
-
-        for i in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-            for j in range(len(Gn)):
-                g1 = Gn[i]
-                g2 = Gn[j]
-                upper_bound, _, _ = GED(g1, g2, lib='gedlibpy', cost='CONSTANT',
-                                method='IPFP',
-                                edit_cost_constant=[3.0, 3.0, 1.0, 3.0, 3.0, 1.0],
-                                stabilizer='min', repeat=10)
-#                runtime = gedlibpy.get_runtime(g1, g2)
-                ged_mat[i][j] = upper_bound
-#                runtime_mat[i][j] = runtime
-
-                # Write to files.
-                ged_file.write(str(int(upper_bound)) + ' ')
-#                runtime_file.write(str(runtime) + ' ')
-
-            ged_file.write('\n')
-#            runtime_file.write('\n')
-
-        ged_file.close()
-#        runtime_file.close()
-
-    print('ged_mat')
-    print(ged_mat)
-#    print('runtime_mat:')
-#    print(runtime_mat)
-
-    return
-
-
-def init() :
-    print("List of Edit Cost Options : ")
-    for i in gedlibpy.list_of_edit_cost_options :
-        print (i)
-    print("")
-
-    print("List of Method Options : ")
-    for j in gedlibpy.list_of_method_options :
-        print (j)
-    print("")
-
-    print("List of Init Options : ")
-    for k in gedlibpy.list_of_init_options :
-        print (k)
-    print("")
-
-
-
-
-def convertGraph(G):
-    G_new = nx.Graph()
-    for nd, attrs in G.nodes(data=True):
-        G_new.add_node(str(nd), chem=attrs['atom'])
-    for nd1, nd2, attrs in G.edges(data=True):
-        G_new.add_edge(str(nd1), str(nd2), valence=attrs['bond_type'])
-
-    return G_new
-
-
-def testNxGrapĥ():
-    from gklearn.utils.graphfiles import loadDataset
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-
-    gedlibpy.restart_env()
-    for graph in Gn:
-        g_new = convertGraph(graph)
-        gedlibpy.add_nx_graph(g_new, "")
-
-    listID = gedlibpy.get_all_graph_ids()
-    gedlibpy.set_edit_cost("CHEM_1")
-    gedlibpy.init()
-    gedlibpy.set_method("IPFP", "")
-    gedlibpy.init_method()
-
-    print(listID)
-    g = listID[0]
-    h = listID[1]
-
-    gedlibpy.run_method(g, h)
-
-    print("Node Map : ", gedlibpy.get_node_map(g, h))
-    print("Forward map : " , gedlibpy.get_forward_map(g, h), ", Backward map : ", gedlibpy.get_backward_map(g, h))
-    print ("Upper Bound = " + str(gedlibpy.get_upper_bound(g, h)) + ", Lower Bound = " + str(gedlibpy.get_lower_bound(g, h)) + ", Runtime = " + str(gedlibpy.get_runtime(g, h)))
-
-if __name__ == '__main__':
-#    test_ged_default()
-#    test_ged_min()
-#    test_ged_best_settings()
-#    test_ged_best_settings_updated()
-#    test_ged_python_bash_cpp()
-#    test_get_nb_edit_operations()
-#    test_get_nb_edit_operations_letter()
-#    test_LETTER2_cost()
-    test_NON_SYMBOLIC_cost()
-
-
-    #init()
-    #testNxGrapĥ()
diff --git a/gklearn/preimage/test_iam.py b/gklearn/preimage/test_iam.py
deleted file mode 100644
index 5897f50..0000000
--- a/gklearn/preimage/test_iam.py
+++ /dev/null
@@ -1,964 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-#from gklearn.utils.logger2file import *
-from gklearn.preimage.iam import iam_upgraded
-from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices, dis_gstar
-#from gklearn.preimage.ged import ged_median
-
-
-def test_iam_monoterpenoides_with_init40():
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    # unfitted edit costs.
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.0001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    ged_stabilizer = None
-#    ged_repeat = 50
-    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'algo_options': algo_options,
-                  'stabilizer': ged_stabilizer}
-
-    
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    
-    # classify graphs according to classes.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    for y in y_all:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y)
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(repeats):
-            # load median set.
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn_median, _ = loadDataset(collection_file, extra_params=graph_dir)
-            Gn_candidate = [g.copy() for g in Gn_median]
-            
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, node_label=node_label, edge_label=edge_label, 
-                connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-#            # show the best graph and save it to file.
-#            print('one of the possible corresponding pre-images is')
-#            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-#                    with_labels=True)
-##            plt.show()
-#    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-##            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
-##                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-##                        '.png', format="PNG")
-#            plt.clf()
-#    #        print(G_gen_median_list[0].nodes(data=True))
-#    #        print(G_gen_median_list[0].edges(data=True))
-            
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-#        print('\ndistance in kernel space of set median for this class:', 
-#              dis_ks_set_median_list[-1])
-#        print('\nsmallest distances in kernel space for this class:', 
-#              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-#        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-#        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-#    print('\ndistances in kernel space of set median for each class:', 
-#            dis_ks_set_median_list)
-#    print('\nmean smallest distances in kernel space for each class:', 
-#            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-#    print('\nmean distances in kernel space of set median of all:', 
-#            np.mean(dis_ks_set_median_list))
-#    print('\nmean smallest distances in kernel space of all:', 
-#            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-
-
-
-
-def test_iam_monoterpenoides():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    # parameters for GED function from the IAM paper.
-    # fitted edit costs (Gaussian).
-    c_vi = 0.03620133402089074
-    c_vr = 0.0417574590207099
-    c_vs = 0.009992282328587499
-    c_ei = 0.08293120042342755
-    c_er = 0.09512220476358019
-    c_es = 0.09222529696841467
-#    # fitted edit costs (linear combinations).
-#    c_vi = 0.1749684054238749
-#    c_vr = 0.0734054228711457
-#    c_vs = 0.05017781726016715
-#    c_ei = 0.1869431164806936
-#    c_er = 0.32055856948274
-#    c_es = 0.2569469379247611
-#    # unfitted edit costs.
-#    c_vi = 3
-#    c_vr = 3
-#    c_vs = 1
-#    c_ei = 3
-#    c_er = 3
-#    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for y_class in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y_class)
-        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_class)), 10)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                    with_labels=True)
-#            plt.show()
-    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#            plt.savefig('results/iam/paper_compare/monoter_y' + str(y_class) + 
-#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-#                        '.png', format="PNG")
-            plt.clf()
-    #        print(G_gen_median_list[0].nodes(data=True))
-    #        print(G_gen_median_list[0].edges(data=True))
-            
-    
-            # compute distance between \psi and the set median graph.
-            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
-                gkernel, node_label, edge_label, False)
-            dhat_new_set_median_list = []
-            for idx, g_tmp in enumerate(G_set_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
-                    len(G_set_median_list) + len(Gn_median) + 1), 
-                    alpha_range, knew_set_median, withterm3=False))
-                
-            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
-            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
-            
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(G_gen_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                    len(G_gen_median_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])
-            
-
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-        print('\ndistance in kernel space of set median for this class:', 
-              dis_ks_set_median_list[-1])
-        print('\nsmallest distances in kernel space for this class:', 
-              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-    print('\ndistances in kernel space of set median for each class:', 
-            dis_ks_set_median_list)
-    print('\nmean smallest distances in kernel space for each class:', 
-            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_ks_set_median_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    nb_better_sods = 0
-    nb_worse_sods = 0
-    nb_same_sods = 0
-    for sods in sod_list_list:
-        if sods[0] > sods[-1]:
-            nb_better_sods += 1
-        elif sods[0] < sods[-1]:
-            nb_worse_sods += 1
-        else:
-            nb_same_sods += 1
-    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
-          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
-          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
-          'sods are improved.')
-    
-    
-def test_iam_mutag():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    # parameters for GED function from the IAM paper.
-    # fitted edit costs.
-    c_vi = 0.03523843108436513
-    c_vr = 0.03347339739350128
-    c_vs = 0.06871290673612238
-    c_ei = 0.08591999846720685
-    c_er = 0.07962086440894103
-    c_es = 0.08596855855478233
-    # unfitted edit costs.
-#    c_vi = 3
-#    c_vr = 3
-#    c_vs = 1
-#    c_ei = 3
-#    c_er = 3
-#    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_set_median_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    sod_list_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for y_class in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y_class)
-        Gn_class = [Gn[i].copy() for i in idx_dict[y_class]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        dis_ks_set_median_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_class)), 10)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_class[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(G_gen_median_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_gen_median)
-            print('\nsmallest sod in graph space:', sod_gen_median)
-            sod_list_list.append(sod_list)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                    with_labels=True)
-#            plt.show()
-    #        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#            plt.savefig('results/iam/paper_compare/mutag_y' + str(y_class) + 
-#                        '_repeat' + str(repeat) + '_' + str(time.time()) +
-#                        '.png', format="PNG")
-            plt.clf()
-    #        print(G_gen_median_list[0].nodes(data=True))
-    #        print(G_gen_median_list[0].edges(data=True))
-            
-    
-            # compute distance between \psi and the set median graph.
-            knew_set_median = compute_kernel(G_set_median_list + Gn_median, 
-                gkernel, node_label, edge_label, False)
-            dhat_new_set_median_list = []
-            for idx, g_tmp in enumerate(G_set_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_set_median_list.append(dis_gstar(idx, range(len(G_set_median_list), 
-                    len(G_set_median_list) + len(Gn_median) + 1), 
-                    alpha_range, knew_set_median, withterm3=False))
-                
-            print('\ndistance in kernel space of set median: ', dhat_new_set_median_list[0]) 
-            dis_ks_set_median_list[-1].append(dhat_new_set_median_list[0])
-            
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(G_gen_median_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                    len(G_gen_median_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])
-            
-
-        print('\nsods of the set median for this class:', sod_set_median_list[-1])
-        print('\nsods in graph space for this class:', sod_gs_list[-1])
-        print('\ndistance in kernel space of set median for this class:', 
-              dis_ks_set_median_list[-1])
-        print('\nsmallest distances in kernel space for this class:', 
-              dis_ks_min_list[-1])   
-        print('\ntimes for this class:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_set_median_list[-1] = np.mean(dis_ks_set_median_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_set_median_list)
-    print('\nmean sods in graph space for each class:', sod_gs_list)
-    print('\ndistances in kernel space of set median for each class:', 
-            dis_ks_set_median_list)
-    print('\nmean smallest distances in kernel space for each class:', 
-            dis_ks_min_list)
-    print('\nmean times for each class:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_ks_set_median_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    nb_better_sods = 0
-    nb_worse_sods = 0
-    nb_same_sods = 0
-    for sods in sod_list_list:
-        if sods[0] > sods[-1]:
-            nb_better_sods += 1
-        elif sods[0] < sods[-1]:
-            nb_worse_sods += 1
-        else:
-            nb_same_sods += 1
-    print('\n In', str(len(sod_list_list)), 'sod lists,', str(nb_better_sods), 
-          'are getting better,', str(nb_worse_sods), 'are getting worse,', 
-          str(nb_same_sods), 'are not changed; ', str(nb_better_sods / len(sod_list_list)),
-          'sods are improved.')
-    
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_iam_median_nb():
-    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-#    # parameters for GED function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-#    ite_max_iam = 50
-#    epsilon_iam = 0.001
-#    removeNodes = False
-#    connected_iam = False
-#    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-#    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    ged_stabilizer = 'min'
-#    ged_repeat = 50
-#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-#                  'edit_cost_constant': edit_cost_constant, 
-#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # parameters for GED function
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-    ged_cost = 'CHEM_1'
-    ged_method = 'IPFP'
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [len(Gn)]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-#    sod_gs_min_list = []
-#    nb_updated_list = []
-#    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        Gn_candidate = [g.copy() for g in Gn]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-#        km_tmp = gmfile['gm']
-#        time_km = gmfile['gmtime']
-#        # modify mixed gram matrix.
-#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-#        for i in range(len(Gn)):
-#            for j in range(i, len(Gn)):
-#                km[i, j] = km_tmp[i, j]
-#                km[j, i] = km[i, j]
-#        for i in range(len(Gn)):
-#            for j, idx in enumerate(idx_rdm):
-#                km[i, len(Gn) + j] = km[i, idx]
-#                km[len(Gn) + j, i] = km[i, idx]
-#        for i, idx1 in enumerate(idx_rdm):
-#            for j, idx2 in enumerate(idx_rdm):
-#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        ghat_new_list, sod_min = iam_upgraded(Gn_median, Gn_candidate, 
-            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(ghat_new_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                len(ghat_new_list) + len(Gn_median) + 1), 
-                                alpha_range, knew, withterm3=False))
-            
-        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-        dis_ks_min_list.append(dhat_new_list[0])
-        g_best.append(ghat_new_list[0])
-        
-        # show the best graph and save it to file.
-#        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_new_list[0], labels=nx.get_node_attributes(ghat_new_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
-                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        sod_gs_list.append(sod_min)
-#        sod_gs_min_list.append(np.min(sod_min))
-        print('\nsmallest sod in graph space: ', sod_min)
-        
-    print('\nsods in graph space: ', sod_gs_list)
-#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-#          nb_updated_list)
-#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-#          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-def test_iam_letter_h():
-    from median import draw_Letter_graph
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-#    Gn = Gn[0:50]
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    # parameters for GED function from the IAM paper.
-    c_vi = 3
-    c_vr = 3
-    c_vs = 1
-    c_ei = 3
-    c_er = 3
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-    ged_cost = 'LETTER'
-    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # classify graphs according to letters.
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    g_best = []
-    sod_set_median_list = []
-    idx_dict = get_same_item_indices(y_all)
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------')
-        print('letter', letter)
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        g_best.append([])
-        sod_set_median_list.append([])
-        
-        for repeat in range(50):
-            idx_rdm = random.sample(range(len(Gn_let)), 50)
-            print('graphs chosen:', idx_rdm)
-            Gn_median = [Gn_let[idx].copy() for idx in idx_rdm]
-            Gn_candidate = [g.copy() for g in Gn_median]
-        
-            alpha_range = [1 / len(Gn_median)] * len(Gn_median)
-            time0 = time.time()
-            ghat_new_list, sod_min, sod_set_median = iam_upgraded(Gn_median, 
-                Gn_candidate, c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-                epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-                params_ged=params_ged)
-            time_total = time.time() - time0
-            print('\ntime: ', time_total)
-            time_list[-1].append(time_total)
-            g_best[-1].append(ghat_new_list[0])
-            sod_set_median_list[-1].append(sod_set_median)
-            print('\nsmallest sod of the set median:', sod_set_median)
-            sod_gs_list[-1].append(sod_min)
-            print('\nsmallest sod in graph space:', sod_min)
-            
-            # show the best graph and save it to file.
-            print('one of the possible corresponding pre-images is')
-            draw_Letter_graph(ghat_new_list[0], savepath='results/iam/paper_compare/')
-            
-            # compute distance between \psi and the new generated graphs.
-            knew = compute_kernel(ghat_new_list + Gn_median, gkernel, False)
-            dhat_new_list = []
-            for idx, g_tmp in enumerate(ghat_new_list):
-                # @todo: the term3 below could use the one at the beginning of the function.
-                dhat_new_list.append(dis_gstar(idx, range(len(ghat_new_list), 
-                                    len(ghat_new_list) + len(Gn_median) + 1), 
-                                    alpha_range, knew, withterm3=False))
-                
-            print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-            dis_ks_min_list[-1].append(dhat_new_list[0])            
-        
-        print('\nsods of the set median for this letter:', sod_set_median_list[-1])
-        print('\nsods in graph space for this letter:', sod_gs_list[-1])
-        print('\nsmallest distances in kernel space for this letter:', 
-              dis_ks_min_list[-1])
-        print('\ntimes for this letter:', time_list[-1])
-        
-        sod_set_median_list[-1] = np.mean(sod_set_median_list[-1])
-        sod_gs_list[-1] = np.mean(sod_gs_list[-1])
-        dis_ks_min_list[-1] = np.mean(dis_ks_min_list[-1])
-        time_list[-1] = np.mean(time_list[-1])
-        
-    print('\nmean sods of the set median for each letter:', sod_set_median_list)
-    print('\nmean sods in graph space for each letter:', sod_gs_list)
-    print('\nmean smallest distances in kernel space for each letter:', 
-            dis_ks_min_list)
-    print('\nmean times for each letter:', time_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_set_median_list))
-    print('\nmean sods in graph space of all:', np.mean(sod_gs_list))
-    print('\nmean smallest distances in kernel space of all:', 
-            np.mean(dis_ks_min_list))
-    print('\nmean times of all:', np.mean(time_list))
-    
-    
-
-    
-
-
-    
-    
-
-def test_iam_fitdistance():
-    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-#    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-#    lmbda = 0.03 # termination probalility
-#    # parameters for GED function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-#    ite_max_iam = 50
-#    epsilon_iam = 0.001
-#    removeNodes = False
-#    connected_iam = False
-#    # parameters for IAM function
-#    ged_cost = 'CONSTANT'
-#    ged_method = 'IPFP'
-#    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-#    ged_stabilizer = 'min'
-#    ged_repeat = 50
-#    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-#                  'edit_cost_constant': edit_cost_constant, 
-#                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # parameters for GED function
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = False
-    connected_iam = False
-    # parameters for IAM function
-    ged_cost = 'CHEM_1'
-    ged_method = 'IPFP'
-    edit_cost_constant = []
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [10]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-    
-    time_list = []
-    dis_ks_min_list = []
-    dis_ks_gen_median_list = []
-    sod_gs_list = []
-#    sod_gs_min_list = []
-#    nb_updated_list = []
-#    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        Gn_candidate = [g.copy() for g in Gn_median]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-#        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-#        km_tmp = gmfile['gm']
-#        time_km = gmfile['gmtime']
-#        # modify mixed gram matrix.
-#        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-#        for i in range(len(Gn)):
-#            for j in range(i, len(Gn)):
-#                km[i, j] = km_tmp[i, j]
-#                km[j, i] = km[i, j]
-#        for i in range(len(Gn)):
-#            for j, idx in enumerate(idx_rdm):
-#                km[i, len(Gn) + j] = km[i, idx]
-#                km[len(Gn) + j, i] = km[i, idx]
-#        for i, idx1 in enumerate(idx_rdm):
-#            for j, idx2 in enumerate(idx_rdm):
-#                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        G_gen_median_list, sod_gen_median, sod_list, G_set_median_list, sod_set_median \
-            = iam_upgraded(Gn_median, Gn_candidate, 
-            c_ei=c_ei, c_er=c_er, c_es=c_es, ite_max=ite_max_iam,
-            epsilon=epsilon_iam, connected=connected_iam, removeNodes=removeNodes, 
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        
-        # compute distance between \psi and the new generated graphs.
-        knew = compute_kernel(G_gen_median_list + Gn_median, gkernel, node_label,
-                              edge_label, False)
-        dhat_new_list = []
-        for idx, g_tmp in enumerate(G_gen_median_list):
-            # @todo: the term3 below could use the one at the beginning of the function.
-            dhat_new_list.append(dis_gstar(idx, range(len(G_gen_median_list), 
-                                len(G_gen_median_list) + len(Gn_median) + 1), 
-                                alpha_range, knew, withterm3=False))
-            
-        print('\nsmallest distance in kernel space: ', dhat_new_list[0]) 
-        dis_ks_min_list.append(dhat_new_list[0])
-        g_best.append(G_gen_median_list[0])
-        
-        # show the best graph and save it to file.
-#        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(G_gen_median_list[0], labels=nx.get_node_attributes(G_gen_median_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/iam/mutag_median.fit_costs2.001.nb' + str(nb_median) + 
-#        plt.savefig('results/iam/mutag_median_unfit2.nb' + str(nb_median) + 
-#                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        sod_gs_list.append(sod_gen_median)
-#        sod_gs_min_list.append(np.min(sod_gen_median))
-        print('\nsmallest sod in graph space: ', sod_gen_median)
-        print('\nsmallest sod of set median in graph space: ', sod_set_median)
-        
-    print('\nsods in graph space: ', sod_gs_list)
-#    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-#    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-#          nb_updated_list)
-#    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-#          nb_updated_k_list)
-    print('\ntimes:', time_list)
-        
-    
-            
-    
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# tests on different numbers of median-sets.
-#    test_iam_median_nb()
-#    test_iam_letter_h()
-#    test_iam_monoterpenoides()
-#    test_iam_mutag()
-    
-#    test_iam_fitdistance()
-#    print("test log")
-    
-    test_iam_monoterpenoides_with_init40()
diff --git a/gklearn/preimage/test_k_closest_graphs.py b/gklearn/preimage/test_k_closest_graphs.py
deleted file mode 100644
index 152deab..0000000
--- a/gklearn/preimage/test_k_closest_graphs.py
+++ /dev/null
@@ -1,462 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Dec 16 11:53:54 2019
-
-@author: ljia
-"""
-import numpy as np
-import math
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-from tqdm import tqdm
-from itertools import combinations, islice
-import multiprocessing
-from multiprocessing import Pool
-from functools import partial
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-#from gklearn.utils.logger2file import *
-from gklearn.preimage.iam import iam_upgraded, iam_bash
-from gklearn.preimage.utils import compute_kernel, dis_gstar, kernel_distance_matrix
-from gklearn.preimage.fitDistance import fit_GED_to_kernel_distance
-#from gklearn.preimage.ged import ged_median
-
-
-def fit_edit_cost_constants(fit_method, edit_cost_name, 
-                            edit_cost_constants=None, initial_solutions=1,
-                            Gn_median=None, node_label=None, edge_label=None,
-                            gkernel=None, dataset=None, init_ecc=None,
-                            Gn=None, Kmatrix_median=None):
-    """fit edit cost constants.    
-    """
-    if fit_method == 'random': # random
-        if edit_cost_name == 'LETTER':
-            edit_cost_constants = random.sample(range(1, 10), 3)
-            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
-        elif edit_cost_name == 'LETTER2':
-            random.seed(time.time())
-            edit_cost_constants = random.sample(range(1, 10), 5)
-#            edit_cost_constants = [item * 0.1 for item in edit_cost_constants]
-        elif edit_cost_name == 'NON_SYMBOLIC':
-            edit_cost_constants = random.sample(range(1, 10), 6)
-            if Gn_median[0].graph['node_attrs'] == []:
-                edit_cost_constants[2] = 0
-            if Gn_median[0].graph['edge_attrs'] == []:
-                edit_cost_constants[5] = 0
-        else:
-            edit_cost_constants = random.sample(range(1, 10), 6)
-        print('edit cost constants used:', edit_cost_constants)
-    elif fit_method == 'expert': # expert
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                edit_cost_constants = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                edit_cost_constants = [0.675, 0.675, 0.75, 0.425, 0.425]
-            else:
-                edit_cost_constants = [3, 3, 1, 3, 3, 1] 
-        else:
-            edit_cost_constants = init_ecc
-    elif fit_method == 'k-graphs':
-        itr_max = 6
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                init_costs = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
-            elif edit_cost_name == 'NON_SYMBOLIC':
-                init_costs = [0, 0, 1, 1, 1, 0]
-                if Gn_median[0].graph['node_attrs'] == []:
-                    init_costs[2] = 0
-                if Gn_median[0].graph['edge_attrs'] == []:
-                    init_costs[5] = 0
-            else:
-                init_costs = [3, 3, 1, 3, 3, 1] 
-        else:
-            init_costs = init_ecc
-        algo_options = '--threads 1 --initial-solutions ' \
-                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
-                      'algo_options': algo_options, 'stabilizer': None}
-        # fit on k-graph subset
-        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn_median, 
-                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
-                init_costs=init_costs, dataset=dataset, Kmatrix=Kmatrix_median, 
-                parallel=True)
-    elif fit_method == 'whole-dataset':
-        itr_max = 6
-        if init_ecc is None:
-            if edit_cost_name == 'LETTER':
-                init_costs = [0.9, 1.7, 0.75] 
-            elif edit_cost_name == 'LETTER2':
-                init_costs = [0.675, 0.675, 0.75, 0.425, 0.425]
-            else:
-                init_costs = [3, 3, 1, 3, 3, 1] 
-        else:
-            init_costs = init_ecc
-        algo_options = '--threads 1 --initial-solutions ' \
-                        + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
-        params_ged = {'lib': 'gedlibpy', 'cost': edit_cost_name, 'method': 'IPFP', 
-                    'algo_options': algo_options, 'stabilizer': None}
-        # fit on all subset
-        edit_cost_constants, _, _, _, _, _, _ = fit_GED_to_kernel_distance(Gn, 
-                node_label, edge_label, gkernel, itr_max, params_ged=params_ged, 
-                init_costs=init_costs, dataset=dataset, parallel=True)
-    elif fit_method == 'precomputed':
-        pass
-    
-    return edit_cost_constants
-
-
-def compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
-                                     gkernel, edit_cost_name, 
-                                     Kmatrix_median=None):
-    # reform graphs.
-    set_median = loadGXL(fname_sm)
-    gen_median = loadGXL(fname_gm)
-#    print(gen_median.nodes(data=True))
-#    print(gen_median.edges(data=True))
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
-#        dataset == 'Fingerprint':
-#        for g in Gn_median:
-#            reform_attributes(g)
-        reform_attributes(set_median, Gn_median[0].graph['node_attrs'], 
-                          Gn_median[0].graph['edge_attrs'])
-        reform_attributes(gen_median, Gn_median[0].graph['node_attrs'], 
-                          Gn_median[0].graph['edge_attrs'])
-    
-    if edit_cost_name == 'LETTER' or edit_cost_name == 'LETTER2' or edit_cost_name == 'NON_SYMBOLIC':
-        node_label = None
-        edge_label = None
-    else:
-        node_label = 'chem'
-        edge_label = 'valence'
-        
-    # compute Gram matrix for median set.
-    if Kmatrix_median is None:
-        Kmatrix_median = compute_kernel(Gn_median, gkernel, node_label, edge_label, False)
-        
-    # compute distance in kernel space for set median.
-    kernel_sm = []
-    for G_median in Gn_median:
-        km_tmp = compute_kernel([set_median, G_median], gkernel, node_label, edge_label, False)
-        kernel_sm.append(km_tmp[0, 1])
-    Kmatrix_sm = np.concatenate((np.array([kernel_sm]), np.copy(Kmatrix_median)), axis=0)
-    Kmatrix_sm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_sm]).T, Kmatrix_sm), axis=1)
-#    Kmatrix_sm = compute_kernel([set_median] + Gn_median, gkernel, 
-#                                node_label, edge_label, False)
-    dis_k_sm = dis_gstar(0, range(1, 1+len(Gn_median)), 
-                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_sm, withterm3=False)
-#    print(gen_median.nodes(data=True))
-#    print(gen_median.edges(data=True))
-#    print(set_median.nodes(data=True))
-#    print(set_median.edges(data=True))
-    
-    # compute distance in kernel space for generalized median.
-    kernel_gm = []
-    for G_median in Gn_median:
-        km_tmp = compute_kernel([gen_median, G_median], gkernel, node_label, edge_label, False)
-        kernel_gm.append(km_tmp[0, 1])
-    Kmatrix_gm = np.concatenate((np.array([kernel_gm]), np.copy(Kmatrix_median)), axis=0)
-    Kmatrix_gm = np.concatenate((np.array([[km_tmp[0, 0]] + kernel_gm]).T, Kmatrix_gm), axis=1)
-#    Kmatrix_gm = compute_kernel([gen_median] + Gn_median, gkernel, 
-#                                node_label, edge_label, False)
-    dis_k_gm = dis_gstar(0, range(1, 1+len(Gn_median)), 
-                         [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False)
-    
-    # compute distance in kernel space for each graph in median set.
-    dis_k_gi = []
-    for idx in range(len(Gn_median)):
-        dis_k_gi.append(dis_gstar(idx+1, range(1, 1+len(Gn_median)), 
-                             [1 / len(Gn_median)] * len(Gn_median), Kmatrix_gm, withterm3=False))
-
-    print('dis_k_sm:', dis_k_sm)
-    print('dis_k_gm:', dis_k_gm)
-    print('dis_k_gi:', dis_k_gi)
-    idx_dis_k_gi_min = np.argmin(dis_k_gi)
-    dis_k_gi_min = dis_k_gi[idx_dis_k_gi_min]
-    print('min dis_k_gi:', dis_k_gi_min)    
-    
-    return dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min
-
-
-def median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, fit_method,
-                               graph_dir=None, initial_solutions=1,
-                               edit_cost_constants=None, group_min=None, 
-                               dataset=None, edit_cost_name=None, init_ecc=None,
-                               Kmatrix=None, parallel=True):
-#    dataset = dataset.lower()
-    
-#    # compute distances in kernel space.
-#    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-#                                              Kmatrix=None, gkernel=gkernel)
-#    # ged.
-#    gmfile = np.load('results/test_k_closest_graphs/ged_mat.fit_on_whole_dataset.with_medians.gm.npz')
-#    ged_mat = gmfile['ged_mat']
-#    dis_mat = ged_mat[0:len(Gn), 0:len(Gn)]
-    
-#    # choose k closest graphs
-#    time0 = time.time()
-#    sod_ks_min, group_min = get_closest_k_graphs(dis_mat, k, parallel)
-#    time_spent = time.time() - time0
-#    print('closest graphs:', sod_ks_min, group_min)
-#    print('time spent:', time_spent)
-#    group_min = (12, 13, 22, 29) # closest w.r.t path kernel
-#    group_min = (77, 85, 160, 171) # closest w.r.t ged
-#    group_min = (0,1,2,3,4,5,6,7,8,9,10,11) # closest w.r.t treelet kernel
-    Gn_median = [Gn[g].copy() for g in group_min]
-    if Kmatrix is not None:
-        Kmatrix_median = np.copy(Kmatrix[group_min,:])
-        Kmatrix_median = Kmatrix_median[:,group_min]
-    else:
-        Kmatrix_median = None
-        
-
-    # 1. fit edit cost constants. 
-    time0 = time.time()
-    edit_cost_constants = fit_edit_cost_constants(fit_method, edit_cost_name,
-        edit_cost_constants=edit_cost_constants, initial_solutions=initial_solutions,
-        Gn_median=Gn_median, node_label=node_label, edge_label=edge_label,
-        gkernel=gkernel, dataset=dataset, init_ecc=init_ecc,
-        Gn=Gn, Kmatrix_median=Kmatrix_median)
-    time_fitting = time.time() - time0
-    
-    
-    # 2. compute set median and gen median using IAM (C++ through bash).
-    print('\nstart computing set median and gen median using IAM (C++ through bash)...\n')
-    group_fnames = [Gn[g].graph['filename'] for g in group_min]
-    time0 = time.time()
-    sod_sm, sod_gm, fname_sm, fname_gm = iam_bash(group_fnames, edit_cost_constants,
-            cost=edit_cost_name, initial_solutions=initial_solutions,
-            graph_dir=graph_dir, dataset=dataset)
-    time_generating = time.time() - time0
-    print('\nmedians computed.\n')
-    
-    
-    # 3. compute distances to real median.
-    print('\nstart computing distances to true median....\n')
-    Gn_median = [Gn[g].copy() for g in group_min]
-    dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min = \
-        compute_distances_to_true_median(Gn_median, fname_sm, fname_gm,
-                                         gkernel, edit_cost_name, 
-                                         Kmatrix_median=Kmatrix_median)
-    idx_dis_k_gi_min = group_min[idx_dis_k_gi_min]
-    print('index min dis_k_gi:', idx_dis_k_gi_min)
-    print('sod_sm:', sod_sm)
-    print('sod_gm:', sod_gm)
-    
-    # collect return values.
-    return (sod_sm, sod_gm), \
-           (dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min), \
-           (time_fitting, time_generating)
-
-
-def reform_attributes(G, na_names=[], ea_names=[]):
-    if not na_names == []: 
-        for node in G.nodes:
-            G.nodes[node]['attributes'] = [G.node[node][a_name] for a_name in na_names]
-    if not ea_names == []:
-        for edge in G.edges:
-            G.edges[edge]['attributes'] = [G.edge[edge][a_name] for a_name in ea_names]
-
-
-def get_closest_k_graphs(dis_mat, k, parallel):
-    k_graph_groups = combinations(range(0, len(dis_mat)), k)
-    sod_ks_min = np.inf
-    if parallel:
-        len_combination = get_combination_length(len(dis_mat), k)
-        len_itr_max = int(len_combination if len_combination < 1e7 else 1e7)
-#        pos_cur = 0
-        graph_groups_slices = split_iterable(k_graph_groups, len_itr_max, len_combination)
-        for graph_groups_cur in graph_groups_slices:
-#        while True:
-#            graph_groups_cur = islice(k_graph_groups, pos_cur, pos_cur + len_itr_max)
-            graph_groups_cur_list = list(graph_groups_cur) 
-            print('current position:', graph_groups_cur_list[0])
-            len_itr_cur = len(graph_groups_cur_list)
-#            if len_itr_cur < len_itr_max:
-#                break
-
-            itr = zip(graph_groups_cur_list, range(0, len_itr_cur))
-            sod_k_list = np.empty(len_itr_cur)
-            graphs_list = [None] * len_itr_cur
-            n_jobs = multiprocessing.cpu_count()
-            chunksize = int(len_itr_max / n_jobs + 1)
-            n_jobs = multiprocessing.cpu_count()
-            def init_worker(dis_mat_toshare):
-                global G_dis_mat
-                G_dis_mat = dis_mat_toshare
-            pool = Pool(processes=n_jobs, initializer=init_worker, initargs=(dis_mat,))
-#            iterator = tqdm(pool.imap_unordered(_get_closest_k_graphs_parallel, 
-#                                                itr, chunksize),
-#                            desc='Choosing k closest graphs', file=sys.stdout)
-            iterator = pool.imap_unordered(_get_closest_k_graphs_parallel, itr, chunksize)
-            for graphs, i, sod_ks in iterator:
-                sod_k_list[i] = sod_ks
-                graphs_list[i] = graphs
-            pool.close()
-            pool.join()
-            
-            arg_min = np.argmin(sod_k_list)
-            sod_ks_cur = sod_k_list[arg_min]
-            group_cur = graphs_list[arg_min]
-            if sod_ks_cur < sod_ks_min:
-                sod_ks_min = sod_ks_cur
-                group_min = group_cur
-                print('get closer graphs:', sod_ks_min, group_min)
-    else:        
-        for items in tqdm(k_graph_groups, desc='Choosing k closest graphs', file=sys.stdout):
-    #        if items[0] != itmp:
-    #            itmp = items[0]
-    #            print(items)
-            k_graph_pairs = combinations(items, 2)
-            sod_ks = 0
-            for i1, i2 in k_graph_pairs:
-                sod_ks += dis_mat[i1, i2]
-            if sod_ks < sod_ks_min:
-                sod_ks_min = sod_ks
-                group_min = items
-                print('get closer graphs:', sod_ks_min, group_min)
-                
-    return sod_ks_min, group_min
-
-
-def _get_closest_k_graphs_parallel(itr):
-    k_graph_pairs = combinations(itr[0], 2)
-    sod_ks = 0
-    for i1, i2 in k_graph_pairs:
-        sod_ks += G_dis_mat[i1, i2]
-
-    return itr[0], itr[1], sod_ks
-    
-
-def split_iterable(iterable, n, len_iter):
-    it = iter(iterable)
-    for i in range(0, len_iter, n):
-        piece = islice(it, n)
-        yield piece
-
-
-def get_combination_length(n, k):
-    len_combination = 1
-    for i in range(n, n - k, -1):
-        len_combination *= i
-    return int(len_combination / math.factorial(k))
-
-
-###############################################################################
-
-def test_k_closest_graphs():
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-#    gkernel = 'untilhpathkernel'
-#    gkernel = 'weisfeilerlehmankernel'
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    k = 5
-    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
-    
-#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-#                                     'precomputed', edit_costs=edit_costs, 
-##                                     'k-graphs',
-#                                     parallel=False)
-#        
-#    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-#        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-#                                     'expert', parallel=False)
-        
-    sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-        = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, k, 
-                                     'expert', parallel=False)
-    return
-
-
-def test_k_closest_graphs_with_cv():
-    gkernel = 'untilhpathkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    
-    k = 4
-    
-    y_all = ['3', '1', '4', '6', '7', '8', '9', '2']
-    repeats = 50
-    collection_path = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/monoterpenoides/'
-    graph_dir = collection_path + 'gxl/'
-    
-    sod_sm_list = []
-    sod_gm_list = []
-    dis_k_sm_list = []
-    dis_k_gm_list = []
-    dis_k_gi_min_list = []
-    for y in y_all:
-        print('\n-------------------------------------------------------')
-        print('class of y:', y)
-        
-        sod_sm_list.append([])
-        sod_gm_list.append([])
-        dis_k_sm_list.append([])
-        dis_k_gm_list.append([])
-        dis_k_gi_min_list.append([])
-    
-        for repeat in range(repeats):
-            print('\nrepeat ', repeat)
-            collection_file = collection_path + 'monoterpenoides_' + y + '_' + str(repeat) + '.xml'
-            Gn, _ = loadDataset(collection_file, extra_params=graph_dir)
-            sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min \
-                = median_on_k_closest_graphs(Gn, node_label, edge_label, gkernel, 
-                                             k, 'whole-dataset', graph_dir=graph_dir,
-                                             parallel=False)
-            
-            sod_sm_list[-1].append(sod_sm)
-            sod_gm_list[-1].append(sod_gm)
-            dis_k_sm_list[-1].append(dis_k_sm)
-            dis_k_gm_list[-1].append(dis_k_gm)
-            dis_k_gi_min_list[-1].append(dis_k_gi_min)
-            
-        print('\nsods of the set median for this class:', sod_sm_list[-1])
-        print('\nsods of the gen median for this class:', sod_gm_list[-1])
-        print('\ndistances in kernel space of set median for this class:', 
-              dis_k_sm_list[-1])
-        print('\ndistances in kernel space of gen median for this class:', 
-              dis_k_gm_list[-1])
-        print('\ndistances in kernel space of min graph for this class:', 
-              dis_k_gi_min_list[-1])
-        
-        sod_sm_list[-1] = np.mean(sod_sm_list[-1])
-        sod_gm_list[-1] = np.mean(sod_gm_list[-1])
-        dis_k_sm_list[-1] = np.mean(dis_k_sm_list[-1])
-        dis_k_gm_list[-1] = np.mean(dis_k_gm_list[-1])
-        dis_k_gi_min_list[-1] = np.mean(dis_k_gi_min_list[-1])
-        
-    print()
-    print('\nmean sods of the set median for each class:', sod_sm_list)
-    print('\nmean sods of the gen median for each class:', sod_gm_list)
-    print('\nmean distance in kernel space of set median for each class:', 
-          dis_k_sm_list)
-    print('\nmean distances in kernel space of gen median for each class:', 
-          dis_k_gm_list)
-    print('\nmean distances in kernel space of min graph for each class:', 
-          dis_k_gi_min_list)
-    
-    print('\nmean sods of the set median of all:', np.mean(sod_sm_list))
-    print('\nmean sods of the gen median of all:', np.mean(sod_gm_list))
-    print('\nmean distances in kernel space of set median of all:', 
-            np.mean(dis_k_sm_list))
-    print('\nmean distances in kernel space of gen median of all:', 
-            np.mean(dis_k_gm_list))
-    print('\nmean distances in kernel space of min graph of all:', 
-            np.mean(dis_k_gi_min_list))
-    
-    return
-    
-
-if __name__ == '__main__':
-    test_k_closest_graphs()
-#    test_k_closest_graphs_with_cv()
\ No newline at end of file
diff --git a/gklearn/preimage/test_median_graph_estimator.py b/gklearn/preimage/test_median_graph_estimator.py
deleted file mode 100644
index 2a930df..0000000
--- a/gklearn/preimage/test_median_graph_estimator.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Mar 16 17:26:40 2020
-
-@author: ljia
-"""
-	
-def test_median_graph_estimator():
-	from gklearn.utils.graphfiles import loadDataset
-	from gklearn.preimage.median_graph_estimator import MedianGraphEstimator
-	from gklearn.gedlib import librariesImport, gedlibpy
-	from gklearn.preimage.utils import get_same_item_indices
-	from gklearn.preimage.ged import convertGraph
-	import multiprocessing
-
-	# estimator parameters.
-	init_type = 'MEDOID'
-	num_inits = 1
-	threads = multiprocessing.cpu_count()
-	time_limit = 60000
-	
-	# algorithm parameters.
-	algo = 'IPFP'
-	initial_solutions = 40
-	algo_options_suffix = ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1'
-
-	edit_cost_name = 'LETTER2'
-	edit_cost_constants = [0.02987291, 0.0178211, 0.01431966, 0.001, 0.001]
-	ds_name = 'COIL-DEL'
-	
-	# Load dataset.
-	# dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
-	dataset = '../../datasets/Letter-high/Letter-high_A.txt'
-	Gn, y_all = loadDataset(dataset)
-	y_idx = get_same_item_indices(y_all)
-	for i, (y, values) in enumerate(y_idx.items()):
-		Gn_i = [Gn[val] for val in values]
-		break
-	
-	# Set up the environment.
-	ged_env = gedlibpy.GEDEnv()
-	# gedlibpy.restart_env()
-	ged_env.set_edit_cost(edit_cost_name, edit_cost_constant=edit_cost_constants)
-	for G in Gn_i:
-		ged_env.add_nx_graph(convertGraph(G, edit_cost_name), '')
-	graph_ids = ged_env.get_all_graph_ids()
-	set_median_id = ged_env.add_graph('set_median')
-	gen_median_id = ged_env.add_graph('gen_median')
-	ged_env.init(init_option='EAGER_WITHOUT_SHUFFLED_COPIES')
-	
-	# Set up the estimator.
-	mge = MedianGraphEstimator(ged_env, constant_node_costs(edit_cost_name))
-	mge.set_refine_method(algo, '--threads ' + str(threads) + ' --initial-solutions ' + str(initial_solutions) + ' --ratio-runs-from-initial-solutions 1')
-	
-	mge_options = '--time-limit ' + str(time_limit) + ' --stdout 2 --init-type ' + init_type
-	mge_options += ' --random-inits ' + str(num_inits) + ' --seed ' + '1'  + ' --refine FALSE'# @todo: std::to_string(rng())
-	
-	# Select the GED algorithm.
-	algo_options = '--threads ' + str(threads) + algo_options_suffix
-	mge.set_options(mge_options)
-	mge.set_init_method(algo, algo_options)
-	mge.set_descent_method(algo, algo_options)
-	
-	# Run the estimator.
-	mge.run(graph_ids, set_median_id, gen_median_id)
-	
-	# Get SODs.
-	sod_sm = mge.get_sum_of_distances('initialized')
-	sod_gm = mge.get_sum_of_distances('converged')
-	print('sod_sm, sod_gm: ', sod_sm, sod_gm)
-	
-	# Get median graphs.
-	set_median = ged_env.get_nx_graph(set_median_id)
-	gen_median = ged_env.get_nx_graph(gen_median_id)
-	
-	return set_median, gen_median
-	
-
-
-def constant_node_costs(edit_cost_name):
-	if edit_cost_name == 'NON_SYMBOLIC' or edit_cost_name == 'LETTER2' or edit_cost_name == 'LETTER':
-		return False
-#	 elif edit_cost_name != '':
-# # 		throw ged::Error("Invalid dataset " + dataset + ". Usage: ./median_tests <AIDS|Mutagenicity|Letter-high|Letter-med|Letter-low|monoterpenoides|SYNTHETICnew|Fingerprint|COIL-DEL>");
-#		 return False
-	# return True
-
-
-if __name__ == '__main__':
-	set_median, gen_median = test_median_graph_estimator()
\ No newline at end of file
diff --git a/gklearn/preimage/test_others.py b/gklearn/preimage/test_others.py
deleted file mode 100644
index a277a17..0000000
--- a/gklearn/preimage/test_others.py
+++ /dev/null
@@ -1,686 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jul  4 12:20:16 2019
-
-@author: ljia
-"""
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.median import draw_Letter_graph
-from gklearn.preimage.ged import GED, ged_median
-from gklearn.preimage.utils import get_same_item_indices, compute_kernel, gram2distances, \
-    dis_gstar, remove_edges
-
-
-# --------------------------- These are tests --------------------------------#
-    
-def test_who_is_the_closest_in_kernel_space(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # create the "median" graph.
-    gnew = g2.copy()
-    gnew.remove_node(0)
-    nx.draw_networkx(gnew)
-    plt.show()
-    print(gnew.nodes(data=True))
-    Gn = [gnew] + Gn
-    
-    # compute gram matrix
-    Kmatrix = compute_kernel(Gn, 'untilhpathkernel', True)
-    # the distance matrix
-    dmatrix = gram2distances(Kmatrix)
-    print(np.sort(dmatrix[idx_gi[0] + 1]))
-    print(np.argsort(dmatrix[idx_gi[0] + 1]))
-    print(np.sort(dmatrix[idx_gi[1] + 1]))
-    print(np.argsort(dmatrix[idx_gi[1] + 1]))
-    # for all g in Gn, compute (d(g1, g) + d(g2, g)) / 2
-    dis_median = [(dmatrix[i, idx_gi[0] + 1] + dmatrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
-    print(np.sort(dis_median))
-    print(np.argsort(dis_median))
-    return
-
-
-def test_who_is_the_closest_in_GED_space(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # create the "median" graph.
-    gnew = g2.copy()
-    gnew.remove_node(0)
-    nx.draw_networkx(gnew)
-    plt.show()
-    print(gnew.nodes(data=True))
-    Gn = [gnew] + Gn
-    
-    # compute GEDs
-    ged_matrix = np.zeros((len(Gn), len(Gn)))
-    for i1 in tqdm(range(len(Gn)), desc='computing GEDs', file=sys.stdout):
-        for i2 in range(len(Gn)):
-            dis, _, _ = GED(Gn[i1], Gn[i2], lib='gedlib')
-            ged_matrix[i1, i2] = dis
-    print(np.sort(ged_matrix[idx_gi[0] + 1]))
-    print(np.argsort(ged_matrix[idx_gi[0] + 1]))
-    print(np.sort(ged_matrix[idx_gi[1] + 1]))
-    print(np.argsort(ged_matrix[idx_gi[1] + 1]))
-    # for all g in Gn, compute (GED(g1, g) + GED(g2, g)) / 2
-    dis_median = [(ged_matrix[i, idx_gi[0] + 1] + ged_matrix[i, idx_gi[1] + 1]) / 2 for i in range(len(Gn))]
-    print(np.sort(dis_median))
-    print(np.argsort(dis_median))
-    return
-
-
-def test_will_IAM_give_the_median_graph_we_wanted(Gn):
-    idx_gi = [0, 6]
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-#    del Gn[idx_gi[0]]
-#    del Gn[idx_gi[1] - 1]
-    g_median = test_iam_with_more_graphs_as_init([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
-#    g_median = test_iam_with_more_graphs_as_init(Gn, Gn, c_ei=1, c_er=1, c_es=1)
-    nx.draw_networkx(g_median)
-    plt.show()
-    print(g_median.nodes(data=True))
-    print(g_median.edges(data=True))
-    
-    
-def test_new_IAM_allGraph_deleteNodes(Gn):
-    idx_gi = [0, 6]
-#    g1 = Gn[idx_gi[0]].copy()
-#    g2 = Gn[idx_gi[1]].copy()
-
-#    g1 = nx.Graph(name='haha')
-#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
-#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
-#    g2 = nx.Graph(name='hahaha')
-#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
-#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
-#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
-    
-    g1 = nx.Graph(name='haha')
-    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
-    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    g2 = nx.Graph(name='hahaha')
-    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
-    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-
-#    g2 = g1.copy()
-#    g2.add_nodes_from([(3, {'atom': 'O'})])
-#    g2.add_nodes_from([(4, {'atom': 'C'})])
-#    g2.add_edges_from([(1, 3, {'bond_type': '1'})])
-#    g2.add_edges_from([(3, 4, {'bond_type': '1'})])
-
-#    del Gn[idx_gi[0]]
-#    del Gn[idx_gi[1] - 1]
-    
-    nx.draw_networkx(g1)
-    plt.show()
-    print(g1.nodes(data=True))
-    print(g1.edges(data=True))
-    nx.draw_networkx(g2)
-    plt.show()
-    print(g2.nodes(data=True))
-    print(g2.edges(data=True))
-    
-    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations([g1, g2], [g1, g2], c_ei=1, c_er=1, c_es=1)
-#    g_median = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(Gn, Gn, c_ei=1, c_er=1, c_es=1)
-    nx.draw_networkx(g_median)
-    plt.show()
-    print(g_median.nodes(data=True))
-    print(g_median.edges(data=True))
-    
-    
-def test_the_simple_two(Gn, gkernel):
-    from gk_iam import gk_iam_nearest_multi
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 2 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-    
-#    g_tmp = iam([g1, g2])
-#    nx.draw_networkx(g_tmp)
-#    plt.show()
-    
-    # compute 
-#    k_list = [] # kernel between each graph and itself.
-#    k_g1_list = [] # kernel between each graph and g1
-#    k_g2_list = [] # kernel between each graph and g2
-#    for ig, g in tqdm(enumerate(Gn), desc='computing self kernels', file=sys.stdout): 
-#        ktemp = compute_kernel([g, g1, g2], 'marginalizedkernel', False)
-#        k_list.append(ktemp[0][0, 0])
-#        k_g1_list.append(ktemp[0][0, 1])
-#        k_g2_list.append(ktemp[0][0, 2])
-        
-    km = compute_kernel(Gn_mix, gkernel, True)
-#    k_list = np.diag(km) # kernel between each graph and itself.
-#    k_g1_list = km[idx_gi[0]] # kernel between each graph and g1
-#    k_g2_list = km[idx_gi[1]] # kernel between each graph and g2    
-
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                               range(len(Gn), len(Gn) + 2), km,
-                                               k, r_max,gkernel)
-        dis_best.append(dhat)
-        g_best.append(ghat_list)
-        
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-images are')
-        for g in g_best[idx]:
-            nx.draw_networkx(g)
-            plt.show()
-            print(g.nodes(data=True))
-            print(g.edges(data=True))
-            
-    
-def test_remove_bests(Gn, gkernel):
-    from gk_iam import gk_iam_nearest_multi
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]]
-    g2 = Gn[idx_gi[1]]
-    # remove the best 2 graphs.
-    del Gn[idx_gi[0]]
-    del Gn[idx_gi[1] - 1]
-#    del Gn[8]
-    
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-
-    
-    # compute
-    km = compute_kernel(Gn_mix, gkernel, True)
-    g_best = []
-    dis_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('alpha =', alpha)
-        dhat, ghat_list = gk_iam_nearest_multi(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                               range(len(Gn), len(Gn) + 2), km, 
-                                               k, r_max, gkernel)
-        dis_best.append(dhat)
-        g_best.append(ghat_list)
-        
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-        print('the corresponding pre-images are')
-        for g in g_best[idx]:
-            draw_Letter_graph(g)
-#            nx.draw_networkx(g)
-#            plt.show()
-            print(g.nodes(data=True))
-            print(g.edges(data=True))
-            
-            
-###############################################################################
-# Tests on dataset Letter-H.
-            
-def test_gkiam_letter_h():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # recursions
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 10 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, 
-                Gn_let, [alpha] * len(Gn_let), range(len(Gn_let), len(Gn_mix)), 
-                km, k, r_max, gkernel, c_ei=1.7, c_er=1.7, c_es=1.7,
-                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let, ged_cost='LETTER', 
-                                     ged_method='IPFP', saveGXL='gedlib-letter')
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        sod_ks_min_list.append(sod_ks)
-        nb_updated_list.append(nb_updated)
-        
-                
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
-    print('\nnumber of updates for each letter: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-
-#def compute_letter_median_by_average(Gn):
-#    return g_median
-    
-
-def test_iam_letter_h():
-    from iam import test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    
-    lmbda = 0.03 # termination probalility
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_list = []
-    sod_min_list = []
-    for letter in idx_dict:        
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        g_best = []
-        dis_best = []
-        time0 = time.time()
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            ghat_list, dhat = test_iam_moreGraphsAsInit_tryAllPossibleBestGraphs_deleteNodesInIterations(
-                Gn_let, Gn_let, c_ei=1.7, c_er=1.7, c_es=1.7,
-                ged_cost='LETTER', ged_method='IPFP', saveGXL='gedlib-letter')
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in kernel space. (alpha range not considered.)
-        gkernel = 'structuralspkernel'        
-        sod_tmp = []
-        Gn_mix = g_best[0] + Gn_let
-        km = compute_kernel(Gn_mix, gkernel, True)
-        for ig, g in tqdm(enumerate(g_best[0]), desc='computing kernel sod', file=sys.stdout):
-            dtemp = dis_gstar(ig, range(len(g_best[0]), len(Gn_mix)), 
-                              [alpha_range[0]] * len(Gn_let), km, withterm3=False)
-            sod_tmp.append(dtemp)
-        sod_list.append(sod_tmp)
-        sod_min_list.append(np.min(sod_tmp))
-        
-                
-    print('\nsods in kernel space: ', sod_list)
-    print('\nsmallest sod in kernel space for each letter: ', sod_min_list)
-    print('\ntimes:', time_list)
-    
-    
-def test_random_preimage_letter_h():
-    from preimage_random import preimage_random
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    #    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#            'extra_params': {}} # node symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-#    lmbda = 0.03 # termination probalility
-    r_max = 3 # 10 # recursions
-    l = 500
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    #alpha_range = np.linspace(0.1, 0.9, 9)
-    k = 10 # 5 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_list = []
-    sod_min_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list = preimage_random(Gn_let, Gn_let, [alpha] * len(Gn_let), 
-                                                   range(len(Gn_let), len(Gn_mix)), km, 
-                                                   k, r_max, gkernel, c_ei=1.7, 
-                                                   c_er=1.7, c_es=1.7)
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let)
-        sod_list.append(sod_tmp)
-        sod_min_list.append(np.min(sod_tmp))
-        
-                
-    print('\nsods in graph space: ', sod_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_min_list)               
-    print('\ntimes:', time_list)
-    
-    
-
-    
-    
-    
-    
-def test_gkiam_mutag():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Letter-med', 'dataset': '../datasets/Letter-med/Letter-med_A.txt',
-#          'extra_params': {}} # node nsymb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-    gkernel = 'structuralspkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # recursions
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    
-    # classify graphs according to letters.
-    idx_dict = get_same_item_indices(y_all)
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    for letter in idx_dict:
-        print('\n-------------------------------------------------------\n')
-        Gn_let = [Gn[i].copy() for i in idx_dict[letter]]
-        Gn_mix = Gn_let + [g.copy() for g in Gn_let]
-        
-        alpha_range = np.linspace(1 / len(Gn_let), 1 / len(Gn_let), 1)
-        
-        # compute
-        time0 = time.time()
-        km = compute_kernel(Gn_mix, gkernel, True)
-        g_best = []
-        dis_best = []
-        # for each alpha
-        for alpha in alpha_range:
-            print('alpha =', alpha)
-            dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn_let, Gn_let, [alpha] * len(Gn_let), 
-                                                   range(len(Gn_let), len(Gn_mix)), km, 
-                                                   k, r_max, gkernel, c_ei=1.7, 
-                                                   c_er=1.7, c_es=1.7)
-            dis_best.append(dhat)
-            g_best.append(ghat_list)
-        time_list.append(time.time() - time0)
-            
-        # show best graphs and save them to file.
-        for idx, item in enumerate(alpha_range):
-            print('when alpha is', item, 'the shortest distance is', dis_best[idx])
-            print('the corresponding pre-images are')
-            for g in g_best[idx]:
-                draw_Letter_graph(g, savepath='results/gk_iam/')
-#            nx.draw_networkx(g)
-#            plt.show()
-                print(g.nodes(data=True))
-                print(g.edges(data=True))
-                
-        # compute the corresponding sod in graph space. (alpha range not considered.)
-        sod_tmp, _ = ged_median(g_best[0], Gn_let)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        sod_ks_min_list.append(sod_ks)
-        nb_updated_list.append(nb_updated)
-        
-                
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each letter: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each letter: ', sod_ks_min_list) 
-    print('\nnumber of updates for each letter: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-    
-###############################################################################
-# Re-test.
-    
-def retest_the_simple_two():
-    from gk_iam import gk_iam_nearest_multi
-    
-    # The two simple graphs.
-#    g1 = nx.Graph(name='haha')
-#    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'})])
-#    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'})])
-#    g2 = nx.Graph(name='hahaha')
-#    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'O'}), (2, {'atom': 'C'}),
-#                       (3, {'atom': 'O'}), (4, {'atom': 'C'})])
-#    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-#                       (2, 3, {'bond_type': '1'}), (3, 4, {'bond_type': '1'})])
-    
-    g1 = nx.Graph(name='haha')
-    g1.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'S'}), (4, {'atom': 'S'})])
-    g1.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    g2 = nx.Graph(name='hahaha')
-    g2.add_nodes_from([(0, {'atom': 'C'}), (1, {'atom': 'C'}), (2, {'atom': 'C'}),
-                       (3, {'atom': 'O'}), (4, {'atom': 'O'})])
-    g2.add_edges_from([(0, 1, {'bond_type': '1'}), (1, 2, {'bond_type': '1'}),
-                       (2, 3, {'bond_type': '1'}), (2, 4, {'bond_type': '1'})])
-    
-#    # randomly select two molecules
-#    np.random.seed(1)
-#    idx_gi = [0, 6] # np.random.randint(0, len(Gn), 2)
-#    g1 = Gn[idx_gi[0]]
-#    g2 = Gn[idx_gi[1]]
-#    Gn_mix = [g.copy() for g in Gn]
-#    Gn_mix.append(g1.copy())
-#    Gn_mix.append(g2.copy())
-    
-    Gn = [g1.copy(), g2.copy()]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # recursions
-#    l = 500
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 2 # k nearest neighbors
-    epsilon = 1e-6
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    c_ei=1
-    c_er=1
-    c_es=1
-    
-    Gn_mix = Gn + [g1.copy(), g2.copy()]
-    
-    # compute         
-    time0 = time.time()
-    km = compute_kernel(Gn_mix, gkernel, True)
-    time_km = time.time() - time0
-
-    time_list = []
-    sod_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []       
-    g_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
-            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
-            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        sod_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        nb_updated_list.append(nb_updated)       
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', sod_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.show()
-        print(g_best[idx][0].nodes(data=True))
-        print(g_best[idx][0].edges(data=True))
-        
-#        for g in g_best[idx]:
-#            draw_Letter_graph(g, savepath='results/gk_iam/')
-##            nx.draw_networkx(g)
-##            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-    # compute the corresponding sod in graph space.
-    for idx, item in enumerate(alpha_range):
-        sod_tmp, _ = ged_median(g_best[0], [g1, g2], ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest sod in kernel space for each alpha: ', sod_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-            
-        
-
-if __name__ == '__main__':
-#    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-#          'extra_params': {}}  # node/edge symb
-#    ds = {'name': 'Letter-high', 'dataset': '../datasets/Letter-high/Letter-high_A.txt',
-#          'extra_params': {}} # node nsymb
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/monoterpenoides/trainset_9.ds',
-#          'extra_params': {}}
-#    ds = {'name': 'Acyclic', 'dataset': '../datasets/acyclic/dataset_bps.ds',
-#        'extra_params': {}} # node symb
-#    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:20]
-    
-#    import networkx.algorithms.isomorphism as iso
-#    G1 = nx.MultiDiGraph()
-#    G2 = nx.MultiDiGraph()
-#    G1.add_nodes_from([1,2,3], fill='red')
-#    G2.add_nodes_from([10,20,30,40], fill='red')
-#    nx.add_path(G1, [1,2,3,4], weight=3, linewidth=2.5)
-#    nx.add_path(G2, [10,20,30,40], weight=3)
-#    nm = iso.categorical_node_match('fill', 'red')
-#    print(nx.is_isomorphic(G1, G2, node_match=nm))
-#    
-#    test_new_IAM_allGraph_deleteNodes(Gn)
-#    test_will_IAM_give_the_median_graph_we_wanted(Gn)
-#    test_who_is_the_closest_in_GED_space(Gn)
-#    test_who_is_the_closest_in_kernel_space(Gn)
-    
-#    test_the_simple_two(Gn, 'untilhpathkernel')
-#    test_remove_bests(Gn, 'untilhpathkernel')
-#    test_gkiam_letter_h()
-#    test_iam_letter_h()
-#    test_random_preimage_letter_h
-    
-###############################################################################
-# retests.
-    retest_the_simple_two()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_iam.py b/gklearn/preimage/test_preimage_iam.py
deleted file mode 100644
index 9b05dd9..0000000
--- a/gklearn/preimage/test_preimage_iam.py
+++ /dev/null
@@ -1,620 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.utils import remove_edges, compute_kernel, get_same_item_indices
-from gklearn.preimage.ged import ged_median
-
-from gklearn.preimage.preimage_iam import preimage_iam 
-
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_iam_grid_k_median_nb():       
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    nb_updated_k_list = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list.append([])
-        nb_updated_k_list.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
-                preimage_iam(Gn, Gn_median,
-                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-                gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                            'removeNodes': removeNodes, 'connected': connected_iam},
-                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                            'saveGXL': saveGXL})
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat_list)
-            print('\nnumber of updates of the best graph by IAM: ', nb_updated)
-            nb_updated_list[idx_nb].append(nb_updated)
-            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k)
-            nb_updated_k_list[idx_nb].append(nb_updated_k)
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_iam/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
-          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-    
-    
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_iam_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 3 # iteration limit for pre-image.
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    # parameters for IAM function
-#    c_vi = 0.037
-#    c_vr = 0.038
-#    c_vs = 0.075
-#    c_ei = 0.001
-#    c_er = 0.001
-#    c_es = 0.0
-    c_vi = 4
-    c_vr = 4
-    c_vs = 2
-    c_ei = 1
-    c_er = 1
-    c_es = 1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    # parameters for GED function
-#    ged_cost='CHEM_1'
-    ged_cost = 'CONSTANT'
-    ged_method = 'IPFP'
-    edit_cost_constant = [c_vi, c_vr, c_vs, c_ei, c_er, c_es]
-    ged_stabilizer = 'min'
-    ged_repeat = 50
-    params_ged = {'lib': 'gedlibpy', 'cost': ged_cost, 'method': ged_method, 
-                  'edit_cost_constant': edit_cost_constant, 
-                  'stabilizer': ged_stabilizer, 'repeat': ged_repeat}
-    
-    # number of graphs; we what to compute the median of these graphs. 
-#    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    nb_median_range = [2]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    nb_updated_k_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat_list, dis_of_each_itr, nb_updated, nb_updated_k = \
-            preimage_iam(Gn, Gn_median,
-            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-            gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                        'removeNodes': removeNodes, 'connected': connected_iam},
-            params_ged=params_ged)
-            
-        time_total = time.time() - time0 + time_km
-        print('\ntime: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        print('\nnumber of updates of the best graph: ', nb_updated)
-        nb_updated_list.append(nb_updated)
-        print('\nnumber of updates of k nearest graphs: ', nb_updated_k)
-        nb_updated_k_list.append(nb_updated_k)
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                with_labels=True)
-        plt.show()
-#        plt.savefig('results/preimage_iam/mutag_median_cs.001_nb' + str(nb_median) + 
-#                    '.png', format="PNG")
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, params_ged=params_ged)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-          nb_updated_list)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-          nb_updated_k_list)
-    print('\ntimes:', time_list)
-    
-    
-    
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-
-def test_gkiam_2combination_all_pairs():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = False
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    nb_update_mat = np.full((len(Gn), len(Gn)), np.inf)
-    # test on each pair of graphs.
-#    for idx1 in range(len(Gn) - 1, -1, -1):
-#        for idx2 in range(idx1, -1, -1):
-    for idx1 in range(187, 188):
-        for idx2 in range(167, 168):
-            g1 = Gn[idx1].copy()
-            g2 = Gn[idx2].copy()
-        #    Gn[10] = []
-        #    Gn[10] = []
-            
-            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-            plt.savefig("results/gk_iam/all_pairs/mutag187.png", format="PNG")
-            plt.show()
-            plt.clf()
-            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-            plt.savefig("results/gk_iam/all_pairs/mutag167.png", format="PNG")
-            plt.show()
-            plt.clf()
-
-            ###################################################################            
-#            Gn_mix = [g.copy() for g in Gn]
-#            Gn_mix.append(g1.copy())
-#            Gn_mix.append(g2.copy())
-#            
-#            # compute
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-#            
-#            # write Gram matrix to file and read it.
-#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
-            
-            ###################################################################
-            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-            km = gmfile['gm']
-            time_km = gmfile['gmtime']
-            # modify mixed gram matrix.
-            for i in range(len(Gn)):
-                km[i, len(Gn)] = km[i, idx1]
-                km[i, len(Gn) + 1] = km[i, idx2]
-                km[len(Gn), i] = km[i, idx1]
-                km[len(Gn) + 1, i] = km[i, idx2]
-            km[len(Gn), len(Gn)] = km[idx1, idx1]
-            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-            ###################################################################
-#            # use only the two graphs in median set as candidates.
-#            Gn = [g1.copy(), g2.copy()]
-#            Gn_mix = Gn + [g1.copy(), g2.copy()]
-#            # compute         
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-    
-            
-            time_list = []
-            dis_ks_min_list = []
-            sod_gs_list = []
-            sod_gs_min_list = []
-            nb_updated_list = []
-            nb_updated_k_list = [] 
-            g_best = []
-            # for each alpha
-            for alpha in alpha_range:
-                print('\n-------------------------------------------------------\n')
-                print('alpha =', alpha)
-                time0 = time.time()
-                dhat, ghat_list, sod_ks, nb_updated, nb_updated_k = \
-                    preimage_iam(Gn, [g1, g2],
-                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-                    gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk,
-                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                                'removeNodes': removeNodes, 'connected': connected_iam},
-                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                                'saveGXL': saveGXL})
-                time_total = time.time() - time0 + time_km
-                print('time: ', time_total)
-                time_list.append(time_total)
-                dis_ks_min_list.append(dhat)
-                g_best.append(ghat_list)
-                nb_updated_list.append(nb_updated)
-                nb_updated_k_list.append(nb_updated_k)
-                
-            # show best graphs and save them to file.
-            for idx, item in enumerate(alpha_range):
-                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-                print('one of the possible corresponding pre-images is')
-                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                        with_labels=True)
-                plt.savefig('results/gk_iam/mutag' + str(idx1) + '_' + str(idx2) 
-                            + '_alpha' + str(item) + '.png', format="PNG")
-#                plt.show()
-                plt.clf()
-#                print(g_best[idx][0].nodes(data=True))
-#                print(g_best[idx][0].edges(data=True))
-                
-        #        for g in g_best[idx]:
-        #            draw_Letter_graph(g, savepath='results/gk_iam/')
-        ##            nx.draw_networkx(g)
-        ##            plt.show()
-        #            print(g.nodes(data=True))
-        #            print(g.edges(data=True))
-                    
-            # compute the corresponding sod in graph space.
-            for idx, item in enumerate(alpha_range):
-                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                             ged_method=ged_method, saveGXL=saveGXL)
-                sod_gs_list.append(sod_tmp)
-                sod_gs_min_list.append(np.min(sod_tmp))
-                
-            print('\nsods in graph space: ', sod_gs_list)
-            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-            print('\nnumber of updates of the best graph for each alpha: ', 
-                  nb_updated_list)
-            print('\nnumber of updates of the k nearest graphs for each alpha: ', 
-                  nb_updated_k_list)
-            print('\ntimes:', time_list)
-            nb_update_mat[idx1, idx2] = nb_updated_list[0]
-            
-            str_fw = 'graphs %d and %d: %d.\n' % (idx1, idx2, nb_updated_list[0])
-            with open('results/gk_iam/all_pairs/nb_updates.txt', 'r+') as file:
-                content = file.read()
-                file.seek(0, 0)
-                file.write(str_fw + content)
-    
-    
-
-def test_gkiam_2combination():
-    from gk_iam import gk_iam_nearest_multi
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 20 # k nearest neighbors
-    epsilon = 1e-6
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    c_ei=1
-    c_er=1
-    c_es=1
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [10, 11] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-#    Gn[10] = []
-#    Gn[10] = []
-    
-#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
-#    plt.show()
-#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
-#    plt.show() 
-    
-    Gn_mix = [g.copy() for g in Gn]
-    Gn_mix.append(g1.copy())
-    Gn_mix.append(g2.copy())
-    
-    # compute
-#    time0 = time.time()
-#    km = compute_kernel(Gn_mix, gkernel, True)
-#    time_km = time.time() - time0
-    
-    # write Gram matrix to file and read it.
-#    np.savez('results/gram_matrix.gm', gm=km, gmtime=time_km)
-    gmfile = np.load('results/gram_matrix.gm.npz')
-    km = gmfile['gm']
-    time_km = gmfile['gmtime']
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []       
-    g_best = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat_list, sod_ks, nb_updated = gk_iam_nearest_multi(Gn, [g1, g2],
-            [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-            gkernel, c_ei=c_ei, c_er=c_er, c_es=c_es, epsilon=epsilon, 
-            ged_cost=ged_cost, ged_method=ged_method, saveGXL=saveGXL)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        nb_updated_list.append(nb_updated)       
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/gk_iam/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.show()
-        print(g_best[idx][0].nodes(data=True))
-        print(g_best[idx][0].edges(data=True))
-        
-#        for g in g_best[idx]:
-#            draw_Letter_graph(g, savepath='results/gk_iam/')
-##            nx.draw_networkx(g)
-##            plt.show()
-#            print(g.nodes(data=True))
-#            print(g.edges(data=True))
-            
-    # compute the corresponding sod in graph space.
-    for idx, item in enumerate(alpha_range):
-        sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_gkiam_2combination()
-#    test_gkiam_2combination_all_pairs()
-    
-###############################################################################
-# tests on different numbers of median-sets.
-    test_preimage_iam_median_nb()
-    
-###############################################################################
-# tests on different values on grid of median-sets and k.
-#    test_preimage_iam_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_mix.py b/gklearn/preimage/test_preimage_mix.py
deleted file mode 100644
index 888de86..0000000
--- a/gklearn/preimage/test_preimage_mix.py
+++ /dev/null
@@ -1,539 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.ged import ged_median
-from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
-from gklearn.preimage.preimage_iam import preimage_iam_random_mix
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_mix_grid_k_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list_iam = []
-    nb_updated_list_random = []
-    nb_updated_k_list_iam = []
-    nb_updated_k_list_random = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list_iam.append([])
-        nb_updated_list_random.append([])
-        nb_updated_k_list_iam.append([])
-        nb_updated_k_list_random.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-                nb_updated_k_iam, nb_updated_k_random = \
-                preimage_iam_random_mix(Gn, Gn_median,
-                alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-                l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-                InitRandomWithAllDk=InitRandomWithAllDk,
-                params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                            'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                            'removeNodes': removeNodes, 'connected': connected_iam},
-                params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                            'saveGXL': saveGXL})
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat_list)
-            print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
-            nb_updated_list_iam[idx_nb].append(nb_updated_iam)
-            print('\nnumber of updates of the best graph by random generation: ', 
-                  nb_updated_random)
-            nb_updated_list_random[idx_nb].append(nb_updated_random)
-            print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
-            nb_updated_k_list_iam[idx_nb].append(nb_updated_k_iam)
-            print('\nnumber of updates of k nearest graphs by random generation: ', 
-                  nb_updated_k_random)
-            nb_updated_k_list_random[idx_nb].append(nb_updated_k_random) 
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list_iam)
-    print('\nnumber of updates of the best graph for each set of median graphs and k by random generation: ', 
-          nb_updated_list_random)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by IAM: ', 
-          nb_updated_k_list_iam)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs and k by random generation: ', 
-          nb_updated_k_list_random)
-    print('\ntimes:', time_list)
-    
-    
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_mix_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list_iam = []
-    nb_updated_list_random = []
-    nb_updated_k_list_iam = []
-    nb_updated_k_list_random = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-            nb_updated_k_iam, nb_updated_k_random = \
-            preimage_iam_random_mix(Gn, Gn_median,
-            alpha_range, range(len(Gn), len(Gn) + nb_median), km, k, r_max, 
-            l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-            InitRandomWithAllDk=InitRandomWithAllDk,
-            params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                        'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                        'removeNodes': removeNodes, 'connected': connected_iam},
-            params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                        'saveGXL': saveGXL})
-            
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat_list)
-        print('\nnumber of updates of the best graph by IAM: ', nb_updated_iam)
-        nb_updated_list_iam.append(nb_updated_iam)
-        print('\nnumber of updates of the best graph by random generation: ', 
-              nb_updated_random)
-        nb_updated_list_random.append(nb_updated_random)
-        print('\nnumber of updates of k nearest graphs by IAM: ', nb_updated_k_iam)
-        nb_updated_k_list_iam.append(nb_updated_k_iam)
-        print('\nnumber of updates of k nearest graphs by random generation: ', 
-              nb_updated_k_random)
-        nb_updated_k_list_random.append(nb_updated_k_random) 
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat_list[0], labels=nx.get_node_attributes(ghat_list[0], 'atom'), 
-                with_labels=True)
-        plt.savefig('results/preimage_mix/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
-#        plt.show()
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat_list[0]], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs by IAM: ', 
-          nb_updated_list_iam)
-    print('\nnumber of updates of the best graph for each set of median graphs by random generation: ', 
-          nb_updated_list_random)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by IAM: ', 
-          nb_updated_k_list_iam)
-    print('\nnumber of updates of k nearest graphs for each set of median graphs by random generation: ', 
-          nb_updated_k_list_random)
-    print('\ntimes:', time_list)
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-
-def test_preimage_mix_2combination_all_pairs():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    l_max = 500 # update limit for random generation
-    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    epsilon = 1e-6
-    InitIAMWithAllDk = True
-    InitRandomWithAllDk = True
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    # parameters for IAM function
-    c_ei=1
-    c_er=1
-    c_es=1
-    ite_max_iam = 50
-    epsilon_iam = 0.001
-    removeNodes = True
-    connected_iam = False
-    
-    nb_update_mat_iam = np.full((len(Gn), len(Gn)), np.inf)
-    nb_update_mat_random = np.full((len(Gn), len(Gn)), np.inf)
-    # test on each pair of graphs.
-#    for idx1 in range(len(Gn) - 1, -1, -1):
-#        for idx2 in range(idx1, -1, -1):
-    for idx1 in range(187, 188):
-        for idx2 in range(167, 168):
-            g1 = Gn[idx1].copy()
-            g2 = Gn[idx2].copy()
-        #    Gn[10] = []
-        #    Gn[10] = []
-            
-            nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-            plt.savefig("results/preimage_mix/mutag187.png", format="PNG")
-            plt.show()
-            plt.clf()
-            nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-            plt.savefig("results/preimage_mix/mutag167.png", format="PNG")
-            plt.show()
-            plt.clf()
-
-            ###################################################################            
-#            Gn_mix = [g.copy() for g in Gn]
-#            Gn_mix.append(g1.copy())
-#            Gn_mix.append(g2.copy())
-#            
-#            # compute
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-#            
-#            # write Gram matrix to file and read it.
-#            np.savez('results/gram_matrix_uhpath_itr7_pq0.8.gm', gm=km, gmtime=time_km)
-            
-            ###################################################################
-            gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-            km = gmfile['gm']
-            time_km = gmfile['gmtime']
-            # modify mixed gram matrix.
-            for i in range(len(Gn)):
-                km[i, len(Gn)] = km[i, idx1]
-                km[i, len(Gn) + 1] = km[i, idx2]
-                km[len(Gn), i] = km[i, idx1]
-                km[len(Gn) + 1, i] = km[i, idx2]
-            km[len(Gn), len(Gn)] = km[idx1, idx1]
-            km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-            km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-            km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-            ###################################################################
-#            # use only the two graphs in median set as candidates.
-#            Gn = [g1.copy(), g2.copy()]
-#            Gn_mix = Gn + [g1.copy(), g2.copy()]
-#            # compute         
-#            time0 = time.time()
-#            km = compute_kernel(Gn_mix, gkernel, True)
-#            time_km = time.time() - time0
-    
-            
-            time_list = []
-            dis_ks_min_list = []
-            sod_gs_list = []
-            sod_gs_min_list = []
-            nb_updated_list_iam = []
-            nb_updated_list_random = []
-            nb_updated_k_list_iam = []
-            nb_updated_k_list_random = []
-            g_best = []
-            # for each alpha
-            for alpha in alpha_range:
-                print('\n-------------------------------------------------------\n')
-                print('alpha =', alpha)
-                time0 = time.time()
-                dhat, ghat_list, dis_of_each_itr, nb_updated_iam, nb_updated_random, \
-                    nb_updated_k_iam, nb_updated_k_random = \
-                    preimage_iam_random_mix(Gn, [g1, g2],
-                    [alpha, 1 - alpha], range(len(Gn), len(Gn) + 2), km, k, r_max, 
-                    l_max, gkernel, epsilon=epsilon, InitIAMWithAllDk=InitIAMWithAllDk, 
-                    InitRandomWithAllDk=InitRandomWithAllDk,
-                    params_iam={'c_ei': c_ei, 'c_er': c_er, 'c_es': c_es, 
-                                'ite_max': ite_max_iam, 'epsilon': epsilon_iam,
-                                'removeNodes': removeNodes, 'connected': connected_iam},
-                    params_ged={'ged_cost': ged_cost, 'ged_method': ged_method, 
-                                'saveGXL': saveGXL})
-                time_total = time.time() - time0 + time_km
-                print('time: ', time_total)
-                time_list.append(time_total)
-                dis_ks_min_list.append(dhat)
-                g_best.append(ghat_list)
-                nb_updated_list_iam.append(nb_updated_iam)       
-                nb_updated_list_random.append(nb_updated_random)
-                nb_updated_k_list_iam.append(nb_updated_k_iam)       
-                nb_updated_k_list_random.append(nb_updated_k_random) 
-                
-            # show best graphs and save them to file.
-            for idx, item in enumerate(alpha_range):
-                print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-                print('one of the possible corresponding pre-images is')
-                nx.draw(g_best[idx][0], labels=nx.get_node_attributes(g_best[idx][0], 'atom'), 
-                        with_labels=True)
-                plt.savefig('results/preimage_mix/mutag' + str(idx1) + '_' + str(idx2) 
-                            + '_alpha' + str(item) + '.png', format="PNG")
-#                plt.show()
-                plt.clf()
-#                print(g_best[idx][0].nodes(data=True))
-#                print(g_best[idx][0].edges(data=True))
-                
-        #        for g in g_best[idx]:
-        #            draw_Letter_graph(g, savepath='results/gk_iam/')
-        ##            nx.draw_networkx(g)
-        ##            plt.show()
-        #            print(g.nodes(data=True))
-        #            print(g.edges(data=True))
-                    
-            # compute the corresponding sod in graph space.
-            for idx, item in enumerate(alpha_range):
-                sod_tmp, _ = ged_median([g_best[0]], [g1, g2], ged_cost=ged_cost, 
-                                             ged_method=ged_method, saveGXL=saveGXL)
-                sod_gs_list.append(sod_tmp)
-                sod_gs_min_list.append(np.min(sod_tmp))
-                
-            print('\nsods in graph space: ', sod_gs_list)
-            print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-            print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-            print('\nnumber of updates of the best graph for each alpha by IAM: ', nb_updated_list_iam)
-            print('\nnumber of updates of the best graph for each alpha by random generation: ', 
-                  nb_updated_list_random)
-            print('\nnumber of updates of k nearest graphs for each alpha by IAM: ', 
-                  nb_updated_k_list_iam)
-            print('\nnumber of updates of k nearest graphs for each alpha by random generation: ', 
-                  nb_updated_k_list_random)
-            print('\ntimes:', time_list)
-            nb_update_mat_iam[idx1, idx2] = nb_updated_list_iam[0]
-            nb_update_mat_random[idx1, idx2] = nb_updated_list_random[0]
-            
-            str_fw = 'graphs %d and %d: %d times by IAM, %d times by random generation.\n' \
-                % (idx1, idx2, nb_updated_list_iam[0], nb_updated_list_random[0])
-            with open('results/preimage_mix/nb_updates.txt', 'r+') as file:
-                content = file.read()
-                file.seek(0, 0)
-                file.write(str_fw + content)
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_preimage_mix_2combination_all_pairs()
-    
-###############################################################################
-# tests on different numbers of median-sets.
-#    test_preimage_mix_median_nb()
-    
-###############################################################################
-# tests on different values on grid of median-sets and k.
-    test_preimage_mix_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/test_preimage_random.py b/gklearn/preimage/test_preimage_random.py
deleted file mode 100644
index bb77d2f..0000000
--- a/gklearn/preimage/test_preimage_random.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Sep  5 15:59:00 2019
-
-@author: ljia
-"""
-
-import numpy as np
-import networkx as nx
-import matplotlib.pyplot as plt
-import time
-import random
-#from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset
-from gklearn.preimage.preimage_random import preimage_random
-from gklearn.preimage.ged import ged_median
-from gklearn.preimage.utils import compute_kernel, get_same_item_indices, remove_edges
-
-
-###############################################################################
-# tests on different values on grid of median-sets and k.
-
-def test_preimage_random_grid_k_median_nb():    
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-#    k = 5 # k nearest neighbors
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    # number of nearest neighbors.
-    k_range = [5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    g_best = []
-    for idx_nb, nb_median in enumerate(nb_median_range):
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        
-        time_list.append([])
-        dis_ks_min_list.append([])
-        sod_gs_list.append([])
-        sod_gs_min_list.append([])
-        nb_updated_list.append([])
-        g_best.append([])   
-        
-        for k in k_range:
-            print('\n++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n')
-            print('k =', k)
-            time0 = time.time()
-            dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
-                range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
-                
-            time_total = time.time() - time0 + time_km
-            print('time: ', time_total)
-            time_list[idx_nb].append(time_total)
-            print('\nsmallest distance in kernel space: ', dhat) 
-            dis_ks_min_list[idx_nb].append(dhat)
-            g_best[idx_nb].append(ghat)
-            print('\nnumber of updates of the best graph: ', nb_updated)
-            nb_updated_list[idx_nb].append(nb_updated)
-            
-            # show the best graph and save it to file.
-            print('the shortest distance is', dhat)
-            print('one of the possible corresponding pre-images is')
-            nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
-                    with_labels=True)
-            plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
-                        '_k' + str(k) + '.png', format="PNG")
-    #        plt.show()
-            plt.clf()
-    #        print(ghat_list[0].nodes(data=True))
-    #        print(ghat_list[0].edges(data=True))
-        
-            # compute the corresponding sod in graph space.
-            sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
-                                         ged_method=ged_method, saveGXL=saveGXL)
-            sod_gs_list[idx_nb].append(sod_tmp)
-            sod_gs_min_list[idx_nb].append(np.min(sod_tmp))
-            print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs and k: ', 
-          sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs and k: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs and k by IAM: ', 
-          nb_updated_list)
-    print('\ntimes:', time_list)
-    
-
-
-
-###############################################################################
-# tests on different numbers of median-sets.
-
-def test_preimage_random_median_nb():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:50]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 5 # iteration limit for pre-image.
-    l = 500 # update limit for random generation
-#    alpha_range = np.linspace(0.5, 0.5, 1)
-    k = 5 # k nearest neighbors
-    # parameters for GED function
-    ged_cost='CHEM_1'
-    ged_method='IPFP'
-    saveGXL='gedlib'
-    
-    # number of graphs; we what to compute the median of these graphs. 
-    nb_median_range = [2, 3, 4, 5, 10, 20, 30, 40, 50, 100]
-    
-    # find out all the graphs classified to positive group 1.
-    idx_dict = get_same_item_indices(y_all)
-    Gn = [Gn[i] for i in idx_dict[1]]
-    
-#    # compute Gram matrix.
-#    time0 = time.time()
-#    km = compute_kernel(Gn, gkernel, True)
-#    time_km = time.time() - time0    
-#    # write Gram matrix to file.
-#    np.savez('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm', gm=km, gmtime=time_km)
-        
-    
-    time_list = []
-    dis_ks_min_list = []
-    sod_gs_list = []
-    sod_gs_min_list = []
-    nb_updated_list = []
-    g_best = []
-    for nb_median in nb_median_range:
-        print('\n-------------------------------------------------------')
-        print('number of median graphs =', nb_median)
-        random.seed(1)
-        idx_rdm = random.sample(range(len(Gn)), nb_median)
-        print('graphs chosen:', idx_rdm)
-        Gn_median = [Gn[idx].copy() for idx in idx_rdm]
-        
-#        for g in Gn_median:
-#            nx.draw(g, labels=nx.get_node_attributes(g, 'atom'), with_labels=True)
-##            plt.savefig("results/preimage_mix/mutag.png", format="PNG")
-#            plt.show()
-#            plt.clf()                         
-                    
-        ###################################################################
-        gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03_mutag_positive.gm.npz')
-        km_tmp = gmfile['gm']
-        time_km = gmfile['gmtime']
-        # modify mixed gram matrix.
-        km = np.zeros((len(Gn) + nb_median, len(Gn) + nb_median))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                km[i, j] = km_tmp[i, j]
-                km[j, i] = km[i, j]
-        for i in range(len(Gn)):
-            for j, idx in enumerate(idx_rdm):
-                km[i, len(Gn) + j] = km[i, idx]
-                km[len(Gn) + j, i] = km[i, idx]
-        for i, idx1 in enumerate(idx_rdm):
-            for j, idx2 in enumerate(idx_rdm):
-                km[len(Gn) + i, len(Gn) + j] = km[idx1, idx2]
-                
-        ###################################################################
-        alpha_range = [1 / nb_median] * nb_median
-        time0 = time.time()
-        dhat, ghat, nb_updated = preimage_random(Gn, Gn_median, alpha_range, 
-            range(len(Gn), len(Gn) + nb_median), km, k, r_max, l, gkernel)
-            
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        print('\nsmallest distance in kernel space: ', dhat) 
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat)
-        print('\nnumber of updates of the best graph: ', nb_updated)
-        nb_updated_list.append(nb_updated)
-        
-        # show the best graph and save it to file.
-        print('the shortest distance is', dhat)
-        print('one of the possible corresponding pre-images is')
-        nx.draw(ghat, labels=nx.get_node_attributes(ghat, 'atom'), 
-                with_labels=True)
-        plt.savefig('results/preimage_random/mutag_median_nb' + str(nb_median) + 
-                    '.png', format="PNG")
-#        plt.show()
-        plt.clf()
-#        print(ghat_list[0].nodes(data=True))
-#        print(ghat_list[0].edges(data=True))
-    
-        # compute the corresponding sod in graph space.
-        sod_tmp, _ = ged_median([ghat], Gn_median, ged_cost=ged_cost, 
-                                     ged_method=ged_method, saveGXL=saveGXL)
-        sod_gs_list.append(sod_tmp)
-        sod_gs_min_list.append(np.min(sod_tmp))
-        print('\nsmallest sod in graph space: ', np.min(sod_tmp))
-        
-    print('\nsods in graph space: ', sod_gs_list)
-    print('\nsmallest sod in graph space for each set of median graphs: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each set of median graphs: ', 
-          dis_ks_min_list) 
-    print('\nnumber of updates of the best graph for each set of median graphs: ', 
-          nb_updated_list)
-    print('\ntimes:', time_list)
-    
-    
-
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-    
-def test_random_preimage_2combination():
-    ds = {'name': 'MUTAG', 'dataset': '../datasets/MUTAG/MUTAG_A.txt',
-          'extra_params': {}}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['extra_params'])
-#    Gn = Gn[0:12]
-    remove_edges(Gn)
-    gkernel = 'marginalizedkernel'
-    
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, gkernel=gkernel)
-#    print(dis_max, dis_min, dis_mean)
-    
-    lmbda = 0.03 # termination probalility
-    r_max = 10 # iteration limit for pre-image.
-    l = 500
-    alpha_range = np.linspace(0, 1, 11)
-    k = 5 # k nearest neighbors
-    
-    # randomly select two molecules
-    np.random.seed(1)
-    idx_gi = [187, 167] # np.random.randint(0, len(Gn), 2)
-    g1 = Gn[idx_gi[0]].copy()
-    g2 = Gn[idx_gi[1]].copy()
-    
-#    nx.draw(g1, labels=nx.get_node_attributes(g1, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag10.png", format="PNG")
-#    plt.show()
-#    nx.draw(g2, labels=nx.get_node_attributes(g2, 'atom'), with_labels=True)
-#    plt.savefig("results/random_preimage/mutag11.png", format="PNG")
-#    plt.show()    
-    
-    ######################################################################
-#    Gn_mix = [g.copy() for g in Gn]
-#    Gn_mix.append(g1.copy())
-#    Gn_mix.append(g2.copy())
-#    
-##    g_tmp = iam([g1, g2])
-##    nx.draw_networkx(g_tmp)
-##    plt.show()
-#    
-#    # compute 
-#    time0 = time.time()
-#    km = compute_kernel(Gn_mix, gkernel, True)
-#    time_km = time.time() - time0
-    
-    ###################################################################
-    idx1 = idx_gi[0]
-    idx2 = idx_gi[1]
-    gmfile = np.load('results/gram_matrix_marg_itr10_pq0.03.gm.npz')
-    km = gmfile['gm']
-    time_km = gmfile['gmtime']
-    # modify mixed gram matrix.
-    for i in range(len(Gn)):
-        km[i, len(Gn)] = km[i, idx1]
-        km[i, len(Gn) + 1] = km[i, idx2]
-        km[len(Gn), i] = km[i, idx1]
-        km[len(Gn) + 1, i] = km[i, idx2]
-    km[len(Gn), len(Gn)] = km[idx1, idx1]
-    km[len(Gn), len(Gn) + 1] = km[idx1, idx2]
-    km[len(Gn) + 1, len(Gn)] = km[idx2, idx1]
-    km[len(Gn) + 1, len(Gn) + 1] = km[idx2, idx2]
-            
-    ###################################################################
-
-    time_list = []
-    nb_updated_list = []
-    g_best = []
-    dis_ks_min_list = []
-    # for each alpha
-    for alpha in alpha_range:
-        print('\n-------------------------------------------------------\n')
-        print('alpha =', alpha)
-        time0 = time.time()
-        dhat, ghat, nb_updated = preimage_random(Gn, [g1, g2], [alpha, 1 - alpha], 
-                                          range(len(Gn), len(Gn) + 2), km,
-                                          k, r_max, l, gkernel)
-        time_total = time.time() - time0 + time_km
-        print('time: ', time_total)
-        time_list.append(time_total)
-        dis_ks_min_list.append(dhat)
-        g_best.append(ghat)
-        nb_updated_list.append(nb_updated)
-        
-    # show best graphs and save them to file.
-    for idx, item in enumerate(alpha_range):
-        print('when alpha is', item, 'the shortest distance is', dis_ks_min_list[idx])
-        print('one of the possible corresponding pre-images is')
-        nx.draw(g_best[idx], labels=nx.get_node_attributes(g_best[idx], 'atom'), 
-                with_labels=True)
-        plt.show()
-        plt.savefig('results/random_preimage/mutag_alpha' + str(item) + '.png', format="PNG")
-        plt.clf()
-        print(g_best[idx].nodes(data=True))
-        print(g_best[idx].edges(data=True))
-            
-#        # compute the corresponding sod in graph space. (alpha range not considered.)
-#        sod_tmp, _ = median_distance(g_best[0], Gn_let)
-#        sod_gs_list.append(sod_tmp)
-#        sod_gs_min_list.append(np.min(sod_tmp))
-#        sod_ks_min_list.append(sod_ks)
-#        nb_updated_list.append(nb_updated)
-                      
-#    print('\nsmallest sod in graph space for each alpha: ', sod_gs_min_list)  
-    print('\nsmallest distance in kernel space for each alpha: ', dis_ks_min_list) 
-    print('\nnumber of updates for each alpha: ', nb_updated_list)             
-    print('\ntimes:', time_list)
-    
-###############################################################################
-
-    
-if __name__ == '__main__':
-###############################################################################
-# test on the combination of the two randomly chosen graphs. (the same as in the
-# random pre-image paper.)
-#    test_random_preimage_2combination()
-    
-###############################################################################
-# tests all algorithms on different numbers of median-sets.
-    test_preimage_random_median_nb()
-    
-###############################################################################
-# tests all algorithms on different values on grid of median-sets and k.
-#    test_preimage_random_grid_k_median_nb()
\ No newline at end of file
diff --git a/gklearn/preimage/timer.py b/gklearn/preimage/timer.py
deleted file mode 100644
index b1cecec..0000000
--- a/gklearn/preimage/timer.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Mon Mar 23 09:52:50 2020
-
-@author: ljia
-"""
-import time
-
-class Timer(object):
-	"""A timer class that can be used by methods that support time limits.
-	
-	Note
-	----
-	This is the Python implementation of `the C++ code in GEDLIB <https://github.com/dbblumenthal/gedlib/blob/master/src/env/timer.hpp>`__.
-	"""
-	
-	def __init__(self, time_limit_in_sec):
-		"""Constructs a timer for a given time limit.
-		
-		Parameters
-		----------
-		time_limit_in_sec : string
-			The time limit in seconds.
-		"""		
-		self.__time_limit_in_sec = time_limit_in_sec
-		self.__start_time = time.time()
-	
-	
-	def expired(self):
-		"""Checks if the time limit has expired. 
-		
-		Return
-		------
-		Boolean true if the time limit has expired and false otherwise.
-"""
-		if self.__time_limit_in_sec > 0:
-			runtime = time.time() - self.__start_time
-			return runtime >= self.__time_limit_in_sec
-		return False
\ No newline at end of file
diff --git a/gklearn/preimage/utils.py b/gklearn/preimage/utils.py
deleted file mode 100644
index b91e4c0..0000000
--- a/gklearn/preimage/utils.py
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Oct 17 19:05:07 2019
-
-Useful functions.
-@author: ljia
-"""
-#import networkx as nx
-
-import multiprocessing
-import numpy as np
-
-from gklearn.kernels.marginalizedKernel import marginalizedkernel
-from gklearn.kernels.untilHPathKernel import untilhpathkernel
-from gklearn.kernels.spKernel import spkernel
-import functools
-from gklearn.utils.kernels import deltakernel, gaussiankernel, kernelproduct, polynomialkernel
-from gklearn.kernels.structuralspKernel import structuralspkernel
-from gklearn.kernels.treeletKernel import treeletkernel
-from gklearn.kernels.weisfeilerLehmanKernel import weisfeilerlehmankernel
-
-
-def remove_edges(Gn):
-    for G in Gn:
-        for _, _, attrs in G.edges(data=True):
-            attrs.clear()
-            
-def dis_gstar(idx_g, idx_gi, alpha, Kmatrix, term3=0, withterm3=True):
-    term1 = Kmatrix[idx_g, idx_g]
-    term2 = 0
-    for i, a in enumerate(alpha):
-        term2 += a * Kmatrix[idx_g, idx_gi[i]]
-    term2 *= 2
-    if withterm3 == False:
-        for i1, a1 in enumerate(alpha):
-            for i2, a2 in enumerate(alpha):
-                term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    return np.sqrt(term1 - term2 + term3)
-
-
-def compute_kernel(Gn, graph_kernel, node_label, edge_label, verbose, parallel='imap_unordered'):
-    if graph_kernel == 'marginalizedkernel':
-        Kmatrix, _ = marginalizedkernel(Gn, node_label=node_label, edge_label=edge_label,
-                                  p_quit=0.03, n_iteration=10, remove_totters=False,
-                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-    elif graph_kernel == 'untilhpathkernel':
-        Kmatrix, _ = untilhpathkernel(Gn, node_label=node_label, edge_label=edge_label,
-                                  depth=7, k_func='MinMax', compute_method='trie',
-                                  parallel=parallel,
-                                  n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-    elif graph_kernel == 'spkernel':
-        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
-        Kmatrix = np.empty((len(Gn), len(Gn)))
-#        Kmatrix[:] = np.nan
-        Kmatrix, _, idx = spkernel(Gn, node_label=node_label, node_kernels=
-                              {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel},
-                              n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-#        for i, row in enumerate(idx):
-#            for j, col in enumerate(idx):
-#                Kmatrix[row, col] = Kmatrix_tmp[i, j]
-    elif graph_kernel == 'structuralspkernel':
-        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
-        sub_kernels = {'symb': deltakernel, 'nsymb': gaussiankernel, 'mix': mixkernel}
-        Kmatrix, _ = structuralspkernel(Gn, node_label=node_label, 
-                              edge_label=edge_label, node_kernels=sub_kernels,
-                              edge_kernels=sub_kernels,
-                              parallel=parallel, n_jobs=multiprocessing.cpu_count(), 
-                              verbose=verbose)
-    elif graph_kernel == 'treeletkernel':
-        pkernel = functools.partial(polynomialkernel, d=2, c=1e5)
-#        pkernel = functools.partial(gaussiankernel, gamma=1e-6)
-        mixkernel = functools.partial(kernelproduct, deltakernel, gaussiankernel)
-        Kmatrix, _ = treeletkernel(Gn, node_label=node_label, edge_label=edge_label,
-                                   sub_kernel=pkernel, parallel=parallel,
-                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-    elif graph_kernel == 'weisfeilerlehmankernel':
-        Kmatrix, _ = weisfeilerlehmankernel(Gn, node_label=node_label, edge_label=edge_label,
-                                   height=4, base_kernel='subtree', parallel=None,
-                                   n_jobs=multiprocessing.cpu_count(), verbose=verbose)
-        
-    # normalization
-    Kmatrix_diag = Kmatrix.diagonal().copy()
-    for i in range(len(Kmatrix)):
-        for j in range(i, len(Kmatrix)):
-            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
-            Kmatrix[j][i] = Kmatrix[i][j]
-    return Kmatrix
-            
-
-def gram2distances(Kmatrix):
-    dmatrix = np.zeros((len(Kmatrix), len(Kmatrix)))
-    for i1 in range(len(Kmatrix)):
-        for i2 in range(len(Kmatrix)):
-            dmatrix[i1, i2] = Kmatrix[i1, i1] + Kmatrix[i2, i2] - 2 * Kmatrix[i1, i2]
-    dmatrix = np.sqrt(dmatrix)
-    return dmatrix
-
-
-def kernel_distance_matrix(Gn, node_label, edge_label, Kmatrix=None, 
-                           gkernel=None, verbose=True):
-    dis_mat = np.empty((len(Gn), len(Gn)))
-    if Kmatrix is None:
-        Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, verbose)
-    for i in range(len(Gn)):
-        for j in range(i, len(Gn)):
-            dis = Kmatrix[i, i] + Kmatrix[j, j] - 2 * Kmatrix[i, j]
-            if dis < 0:
-                if dis > -1e-10:
-                    dis = 0
-                else:
-                    raise ValueError('The distance is negative.')
-            dis_mat[i, j] = np.sqrt(dis)
-            dis_mat[j, i] = dis_mat[i, j]
-    dis_max = np.max(np.max(dis_mat))
-    dis_min = np.min(np.min(dis_mat[dis_mat != 0]))
-    dis_mean = np.mean(np.mean(dis_mat))
-    return dis_mat, dis_max, dis_min, dis_mean
-
-
-def get_same_item_indices(ls):
-    """Get the indices of the same items in a list. Return a dict keyed by items.
-    """
-    idx_dict = {}
-    for idx, item in enumerate(ls):
-        if item in idx_dict:
-            idx_dict[item].append(idx)
-        else:
-            idx_dict[item] = [idx]
-    return idx_dict
-
-
-def k_nearest_neighbors_to_median_in_kernel_space(Gn, Kmatrix=None, gkernel=None,
-                                                  node_label=None, edge_label=None):
-    dis_k_all = [] # distance between g_star and each graph.
-    alpha = [1 / len(Gn)] * len(Gn)
-    if Kmatrix is None:
-        Kmatrix = compute_kernel(Gn, gkernel, node_label, edge_label, True)
-    term3 = 0
-    for i1, a1 in enumerate(alpha):
-        for i2, a2 in enumerate(alpha):
-            term3 += a1 * a2 * Kmatrix[idx_gi[i1], idx_gi[i2]]
-    for ig, g in tqdm(enumerate(Gn_init), desc='computing distances', file=sys.stdout):
-        dtemp = dis_gstar(ig, idx_gi, alpha, Kmatrix, term3=term3)
-        dis_all.append(dtemp)
-
-
-def normalize_distance_matrix(D):
-    max_value = np.amax(D)
-    min_value = np.amin(D)
-    return (D - min_value) / (max_value - min_value)
\ No newline at end of file
diff --git a/gklearn/preimage/visualization.py b/gklearn/preimage/visualization.py
deleted file mode 100644
index 81b814b..0000000
--- a/gklearn/preimage/visualization.py
+++ /dev/null
@@ -1,585 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Dec 19 17:16:23 2019
-
-@author: ljia
-"""
-import numpy as np
-from sklearn.manifold import TSNE, Isomap
-import matplotlib.pyplot as plt
-from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes, mark_inset
-from tqdm import tqdm
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL
-from gklearn.preimage.utils import kernel_distance_matrix, compute_kernel, dis_gstar, get_same_item_indices
-
-
-def visualize_graph_dataset(dis_measure, visual_method, draw_figure, 
-                            draw_params={}, dis_mat=None, Gn=None, 
-                            median_set=None):
-    
-    
-    def draw_zoomed_axes(Gn_embedded, ax):
-        margin = 0.01
-        if dis_measure == 'graph-kernel':
-            index = -2
-        elif dis_measure == 'ged':
-            index = -1
-        x1 = np.min(Gn_embedded[median_set + [index], 0]) - margin * np.max(Gn_embedded)
-        x2 = np.max(Gn_embedded[median_set + [index], 0]) + margin * np.max(Gn_embedded)
-        y1 = np.min(Gn_embedded[median_set + [index], 1]) - margin * np.max(Gn_embedded)
-        y2 = np.max(Gn_embedded[median_set + [index], 1]) + margin * np.max(Gn_embedded)
-        if (x1 < 0 and y1 < 0) or ((x1 > 0 and y1 > 0)):
-            loc = 2
-        else:
-            loc = 3
-        axins = zoomed_inset_axes(ax, 4, loc=loc) # zoom-factor: 2.5, location: upper-left
-        draw_figure(axins, Gn_embedded, dis_measure=dis_measure, 
-                    median_set=median_set, **draw_params)
-        axins.set_xlim(x1, x2) # apply the x-limits
-        axins.set_ylim(y1, y2) # apply the y-limits
-        plt.yticks(visible=False)
-        plt.xticks(visible=False)
-        loc1 = 1 if loc == 2 else 3
-        mark_inset(ax, axins, loc1=2, loc2=4, fc="none", ec="0.5")  
-        
-        
-    if dis_mat is None:
-        if dis_measure == 'graph-kernel':
-            gkernel = 'untilhpathkernel'
-            node_label = 'atom'
-            edge_label = 'bond_type'
-            dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                                      Kmatrix=None, gkernel=gkernel)
-        elif dis_measure == 'ged':
-            pass
-        
-    if visual_method == 'tsne':
-        Gn_embedded = TSNE(n_components=2, metric='precomputed').fit_transform(dis_mat)
-    elif visual_method == 'isomap':
-        Gn_embedded = Isomap(n_components=2, metric='precomputed').fit_transform(dis_mat)
-    print(Gn_embedded.shape)
-    fig, ax = plt.subplots()
-    draw_figure(plt, Gn_embedded, dis_measure=dis_measure, legend=True, 
-                median_set=median_set, **draw_params)        
-#    draw_zoomed_axes(Gn_embedded, ax)
-    plt.show()
-    plt.clf()
-    
-    return
-
-
-def draw_figure(ax, Gn_embedded, dis_measure=None, y_idx=None, legend=False,
-                median_set=None):
-    from matplotlib import colors as mcolors
-    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
-#    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
-#              '#c6dbef', '#deebf7']
-#    for i, values in enumerate(y_idx.values()):
-#        for item in values:
-##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
-#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')
-#    ax.scatter(Gn_embedded[:,0], Gn_embedded[:,1], c='b')        
-    h1 = ax.scatter(Gn_embedded[median_set, 0], Gn_embedded[median_set, 1], c='b')
-    if dis_measure == 'graph-kernel':
-        h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
-        h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
-        h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r') #c='g', marker='+') # set median
-    elif dis_measure == 'ged':
-        h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
-        h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r') #c='g', marker='+') # set median        
-    if legend:
-#    fig.subplots_adjust(bottom=0.17)
-        if dis_measure == 'graph-kernel':
-            ax.legend([h1, h2, h3, h4], 
-                      ['k closest graphs', 'true median', 'gen median', 'set median'])
-        elif dis_measure == 'ged':       
-            ax.legend([h1, h3, h4], ['k closest graphs', 'gen median', 'set median'])
-#    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
-#    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
-#            bbox_inches='tight')
-#    plt.show()
-            
-    
-###############################################################################
-    
-def visualize_distances_in_kernel():
-    
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    fname_medians = 'expert.treelet'
-    # add set median.
-    fname_sm = 'results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
-    set_median = loadGXL(fname_sm)
-    Gn.append(set_median)
-    # add generalized median (estimated pre-image.)
-    fname_gm = 'results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
-    gen_median = loadGXL(fname_gm)
-    Gn.append(gen_median)
-    
-    # compute distance matrix
-    median_set = [22, 29, 54, 74]
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    Gn_median_set = [Gn[i].copy() for i in median_set]
-    Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
-                                    edge_label, True)
-    Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
-    dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                              Kmatrix=Kmatrix, gkernel=gkernel)
-    print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-    print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-    print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
-    # add distances for the image of exact median \psi.
-    dis_k_median_list = []
-    for idx, g in enumerate(Gn):
-        dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
-                                           [1 / len(Gn_median_set)] * len(Gn_median_set),
-                                           Kmatrix_median, withterm3=False))
-    dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
-    for i in range(len(Gn)):
-        for j in range(i, len(Gn)):
-            dis_mat_median[i, j] = dis_mat[i, j]
-            dis_mat_median[j, i] = dis_mat_median[i, j]
-    for i in range(len(Gn)):
-        dis_mat_median[i, -1] = dis_k_median_list[i]
-        dis_mat_median[-1, i] = dis_k_median_list[i]
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    # visualization.
-#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
-#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
-    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
-                            median_set=median_set)
-        
-    
-def visualize_distances_in_ged():
-    from gklearn.preimage.fitDistance import compute_geds
-    from gklearn.preimage.ged import GED
-    ds = {'name': 'monoterpenoides', 
-          'dataset': '../datasets/monoterpenoides/dataset_10+.ds'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    # add set median.
-    fname_medians = 'expert.treelet'
-    fname_sm = 'preimage/results/test_k_closest_graphs/set_median.' + fname_medians + '.gxl'
-    set_median = loadGXL(fname_sm)
-    Gn.append(set_median)
-    # add generalized median (estimated pre-image.)
-    fname_gm = 'preimage/results/test_k_closest_graphs/gen_median.' + fname_medians + '.gxl'
-    gen_median = loadGXL(fname_gm)
-    Gn.append(gen_median)
-    
-    # compute/load ged matrix.
-#    # compute.
-##    k = 4
-##    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
-#    edit_costs = [3, 3, 1, 3, 3, 1]
-##    edit_costs = [7, 3, 5, 9, 2, 6]
-#    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-#    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-#                'algo_options': algo_options, 'stabilizer': None, 
-#                'edit_cost_constant': edit_costs}    
-#    _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
-#    np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm', ged_mat=ged_mat)
-    # load from file.
-    gmfile = np.load('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm.npz')
-    ged_mat = gmfile['ged_mat']
-#    # change medians.
-#    edit_costs = [3, 3, 1, 3, 3, 1]
-#    algo_options = '--threads 8 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-#    params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-#                'algo_options': algo_options, 'stabilizer': None, 
-#                'edit_cost_constant': edit_costs}
-#    for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
-#        dis, _, _ = GED(Gn[idx], set_median, **params_ged)
-#        ged_mat[idx, -2] = dis
-#        ged_mat[-2, idx] = dis
-#        dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
-#        ged_mat[idx, -1] = dis
-#        ged_mat[-1, idx] = dis
-#    np.savez('results/test_k_closest_graphs/ged_mat.' + fname_medians + '.with_medians.gm', 
-#             ged_mat=ged_mat)
-
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-
-    # visualization.
-    median_set = [22, 29, 54, 74]
-    visualize_graph_dataset('ged', 'tsne', draw_figure, 
-                            draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
-                            median_set=median_set)
-    
-###############################################################################
-    
-    
-def visualize_distances_in_kernel_monoterpenoides():
-    import os
-
-    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__))  + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn_original, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    
-    # compute distance matrix
-#    median_set = [22, 29, 54, 74]
-    gkernel = 'treeletkernel'
-    fit_method = 'expert'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    ds_name = 'monoterpenoides'
-    fname_medians = fit_method + '.' + gkernel
-    dir_output = 'results/xp_monoterpenoides/'
-    repeat = 0
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    for i, (y, values) in enumerate(y_idx.items()):
-        print('\ny =', y)
-        k = len(values)
-        
-        Gn = [Gn_original[g].copy() for g in values]
-        # add set median.
-        fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
-            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-        set_median = loadGXL(fname_sm)
-        Gn.append(set_median)
-        # add generalized median (estimated pre-image.)
-        fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
-            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-        gen_median = loadGXL(fname_gm)
-        Gn.append(gen_median)
-    
-        # compute distance matrix
-        median_set = range(0, len(values))
-    
-        Gn_median_set = [Gn[i].copy() for i in median_set]
-        Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
-                                        edge_label, False)
-        Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
-        dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                                  Kmatrix=Kmatrix, gkernel=gkernel)
-        print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-        print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-        print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
-        # add distances for the image of exact median \psi.
-        dis_k_median_list = []
-        for idx, g in enumerate(Gn):
-            dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
-                                               [1 / len(Gn_median_set)] * len(Gn_median_set),
-                                               Kmatrix_median, withterm3=False))
-        dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                dis_mat_median[i, j] = dis_mat[i, j]
-                dis_mat_median[j, i] = dis_mat_median[i, j]
-        for i in range(len(Gn)):
-            dis_mat_median[i, -1] = dis_k_median_list[i]
-            dis_mat_median[-1, i] = dis_k_median_list[i]
-            
-    
-        # visualization.
-#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
-#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
-        visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-                                draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
-                                median_set=median_set)
-        
-    
-def visualize_distances_in_ged_monoterpenoides():
-    from gklearn.preimage.fitDistance import compute_geds
-    from gklearn.preimage.ged import GED
-    import os
-    
-    ds = {'dataset': '../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn_original, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    
-    # compute distance matrix
-#    median_set = [22, 29, 54, 74]
-    gkernel = 'treeletkernel'
-    fit_method = 'expert'
-    ds_name = 'monoterpenoides'
-    fname_medians = fit_method + '.' + gkernel
-    dir_output = 'results/xp_monoterpenoides/'
-    repeat = 0
-#    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
-    edit_costs = [3, 3, 1, 3, 3, 1]
-#    edit_costs = [7, 3, 5, 9, 2, 6]
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    for i, (y, values) in enumerate(y_idx.items()):
-        print('\ny =', y)
-        k = len(values)
-        
-        Gn = [Gn_original[g].copy() for g in values]
-        # add set median.
-        fname_sm = dir_output + 'medians/' + str(int(y)) + '/set_median.k' + str(int(k)) \
-            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-        set_median = loadGXL(fname_sm)
-        Gn.append(set_median)
-        # add generalized median (estimated pre-image.)
-        fname_gm = dir_output + 'medians/' + str(int(y)) + '/gen_median.k' + str(int(k)) \
-            + '.y' + str(int(y)) + '.repeat' + str(repeat) + '.gxl'
-        gen_median = loadGXL(fname_gm)
-        Gn.append(gen_median)
-    
-    
-        # compute/load ged matrix.
-        # compute.
-        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-        params_ged = {'dataset': ds_name, 'lib': 'gedlibpy', 'cost': 'CONSTANT', 
-                      'method': 'IPFP', 'algo_options': algo_options, 
-                      'stabilizer': None, 'edit_cost_constant': edit_costs}    
-        _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
-        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) \
-            + '.with_medians.gm', ged_mat=ged_mat)
-#        # load from file.
-#        gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm.npz')
-#        ged_mat = gmfile['ged_mat']
-#        # change medians.
-#        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-#        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-#                    'algo_options': algo_options, 'stabilizer': None, 
-#                    'edit_cost_constant': edit_costs}
-#        for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
-#            dis, _, _ = GED(Gn[idx], set_median, **params_ged)
-#            ged_mat[idx, -2] = dis
-#            ged_mat[-2, idx] = dis
-#            dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
-#            ged_mat[idx, -1] = dis
-#            ged_mat[-1, idx] = dis
-#        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + str(int(y)) + '.with_medians.gm', 
-#                 ged_mat=ged_mat)
-
-        # visualization.
-        median_set = range(0, len(values))
-        visualize_graph_dataset('ged', 'tsne', draw_figure, 
-                                draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
-                                median_set=median_set)
-        
-        
-###############################################################################
-    
-    
-def visualize_distances_in_kernel_letter_h():
-    
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-#    Gn = Gn[0:50]
-    
-    # compute distance matrix
-#    median_set = [22, 29, 54, 74]
-    gkernel = 'structuralspkernel'
-    fit_method = 'expert'
-    node_label = None
-    edge_label = None
-    ds_name = 'letter-h'
-    fname_medians = fit_method + '.' + gkernel
-    dir_output = 'results/xp_letter_h/'
-    k = 150
-    repeat = 0
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    for i, (y, values) in enumerate(y_idx.items()):
-        print('\ny =', y)
-        
-        Gn = [Gn_original[g].copy() for g in values]
-        # add set median.
-        fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
-            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
-        set_median = loadGXL(fname_sm)
-        Gn.append(set_median)
-        # add generalized median (estimated pre-image.)
-        fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
-            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
-        gen_median = loadGXL(fname_gm)
-        Gn.append(gen_median)
-    
-        # compute distance matrix
-        median_set = range(0, len(values))
-    
-        Gn_median_set = [Gn[i].copy() for i in median_set]
-        Kmatrix_median = compute_kernel(Gn + Gn_median_set, gkernel, node_label, 
-                                        edge_label, False)
-        Kmatrix = Kmatrix_median[0:len(Gn), 0:len(Gn)]
-        dis_mat, _, _, _ = kernel_distance_matrix(Gn, node_label, edge_label, 
-                                                  Kmatrix=Kmatrix, gkernel=gkernel)
-        print('average distances: ', np.mean(np.mean(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-        print('min distances: ', np.min(np.min(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-        print('max distances: ', np.max(np.max(dis_mat[0:len(Gn)-2, 0:len(Gn)-2])))
-
-        # add distances for the image of exact median \psi.
-        dis_k_median_list = []
-        for idx, g in enumerate(Gn):
-            dis_k_median_list.append(dis_gstar(idx, range(len(Gn), len(Gn) + len(Gn_median_set)), 
-                                               [1 / len(Gn_median_set)] * len(Gn_median_set),
-                                               Kmatrix_median, withterm3=False))
-        dis_mat_median = np.zeros((len(Gn) + 1, len(Gn) + 1))
-        for i in range(len(Gn)):
-            for j in range(i, len(Gn)):
-                dis_mat_median[i, j] = dis_mat[i, j]
-                dis_mat_median[j, i] = dis_mat_median[i, j]
-        for i in range(len(Gn)):
-            dis_mat_median[i, -1] = dis_k_median_list[i]
-            dis_mat_median[-1, i] = dis_k_median_list[i]
-            
-    
-        # visualization.
-#    visualize_graph_dataset('graph-kernel', 'tsne', Gn)
-#    visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-#                            draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median)
-        visualize_graph_dataset('graph-kernel', 'tsne', draw_figure, 
-                                draw_params={'y_idx': y_idx}, dis_mat=dis_mat_median,
-                                median_set=median_set)
-        
-    
-def visualize_distances_in_ged_letter_h():
-    from fitDistance import compute_geds
-    from preimage.test_k_closest_graphs import reform_attributes
-    
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn_original, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-#    Gn = Gn[0:50]
-    
-    # compute distance matrix
-#    median_set = [22, 29, 54, 74]
-    gkernel = 'structuralspkernel'
-    fit_method = 'expert'
-    ds_name = 'letter-h'
-    fname_medians = fit_method + '.' + gkernel
-    dir_output = 'results/xp_letter_h/'
-    k = 150
-    repeat = 0
-#    edit_costs = [0.16229209837639536, 0.06612870523413916, 0.04030113378793905, 0.20723547009415202, 0.3338607220394598, 0.27054392518077297]
-    edit_costs = [3, 3, 1, 3, 3, 1]
-#    edit_costs = [7, 3, 5, 9, 2, 6]
-    
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    for i, (y, values) in enumerate(y_idx.items()):
-        print('\ny =', y)
-        
-        Gn = [Gn_original[g].copy() for g in values]
-        # add set median.
-        fname_sm = dir_output + 'medians/' + y + '/set_median.k' + str(int(k)) \
-            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
-        set_median = loadGXL(fname_sm)
-        Gn.append(set_median)
-        # add generalized median (estimated pre-image.)
-        fname_gm = dir_output + 'medians/' + y + '/gen_median.k' + str(int(k)) \
-            + '.y' + y + '.repeat' + str(repeat) + '.gxl'
-        gen_median = loadGXL(fname_gm)
-        Gn.append(gen_median)
-    
-    
-        # compute/load ged matrix.
-        # compute.
-        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-        params_ged = {'dataset': 'Letter', 'lib': 'gedlibpy', 'cost': 'CONSTANT', 
-                      'method': 'IPFP', 'algo_options': algo_options, 
-                      'stabilizer': None, 'edit_cost_constant': edit_costs}    
-        for g in Gn:
-            reform_attributes(g)
-        _, ged_mat, _ = compute_geds(Gn, params_ged=params_ged, parallel=True)
-        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm', ged_mat=ged_mat)
-#        # load from file.
-#        gmfile = np.load('dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm.npz')
-#        ged_mat = gmfile['ged_mat']
-#        # change medians.
-#        algo_options = '--threads 1 --initial-solutions 40 --ratio-runs-from-initial-solutions 1'
-#        params_ged = {'lib': 'gedlibpy', 'cost': 'CONSTANT', 'method': 'IPFP', 
-#                    'algo_options': algo_options, 'stabilizer': None, 
-#                    'edit_cost_constant': edit_costs}
-#        for idx in tqdm(range(len(Gn) - 2), desc='computing GEDs', file=sys.stdout):
-#            dis, _, _ = GED(Gn[idx], set_median, **params_ged)
-#            ged_mat[idx, -2] = dis
-#            ged_mat[-2, idx] = dis
-#            dis, _, _ = GED(Gn[idx], gen_median, **params_ged)
-#            ged_mat[idx, -1] = dis
-#            ged_mat[-1, idx] = dis
-#        np.savez(dir_output + 'ged_mat.' + fname_medians + '.y' + y + '.with_medians.gm', 
-#                 ged_mat=ged_mat)
-
-    
-        # visualization.
-        median_set = range(0, len(values))
-        visualize_graph_dataset('ged', 'tsne', draw_figure, 
-                                draw_params={'y_idx': y_idx}, dis_mat=ged_mat,
-                                median_set=median_set)
-
-
-if __name__ == '__main__':
-    visualize_distances_in_kernel_letter_h()
-#    visualize_distances_in_ged_letter_h()
-#    visualize_distances_in_kernel_monoterpenoides()
-#    visualize_distances_in_kernel_monoterpenoides()
-#    visualize_distances_in_kernel()
-#    visualize_distances_in_ged()
-    
-    
-    
-    
-    
-    
-    
-#def draw_figure_dis_k(ax, Gn_embedded, y_idx=None, legend=False):
-#    from matplotlib import colors as mcolors
-#    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
-##    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
-##              '#c6dbef', '#deebf7']
-#    for i, values in enumerate(y_idx.values()):
-#        for item in values:
-##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
-#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')        
-#    h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
-#    h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
-#    h3 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='gold') # gen median
-#    h4 = ax.scatter(Gn_embedded[-3, 0], Gn_embedded[-3, 1], c='r', marker='+') # set median
-#    if legend:
-##    fig.subplots_adjust(bottom=0.17)
-#        ax.legend([h1, h2, h3, h4], ['k clostest graphs', 'true median', 'gen median', 'set median'])
-##    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
-##    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
-##            bbox_inches='tight')
-##    plt.show()
-    
-    
-    
-#def draw_figure_ged(ax, Gn_embedded, y_idx=None, legend=False):
-#    from matplotlib import colors as mcolors
-#    colors = list(dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS))
-##    colors = ['#08306b', '#08519c', '#2171b5', '#4292c6', '#6baed6', '#9ecae1',
-##              '#c6dbef', '#deebf7']
-#    for i, values in enumerate(y_idx.values()):
-#        for item in values:
-##            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c=colors[i]) # , c='b')        
-#            ax.scatter(Gn_embedded[item,0], Gn_embedded[item,1], c='b')        
-#    h1 = ax.scatter(Gn_embedded[[12, 13, 22, 29], 0], Gn_embedded[[12, 13, 22, 29], 1], c='r')
-##    h2 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='darkorchid') # \psi
-#    h3 = ax.scatter(Gn_embedded[-1, 0], Gn_embedded[-1, 1], c='gold') # gen median
-#    h4 = ax.scatter(Gn_embedded[-2, 0], Gn_embedded[-2, 1], c='r', marker='+') # set median
-#    if legend:
-##    fig.subplots_adjust(bottom=0.17)
-#        ax.legend([h1, h3, h4], ['k clostest graphs', 'gen median', 'set median'])
-##    fig.legend(handles, labels, loc='lower center', ncol=2, frameon=False) # , ncol=5, labelspacing=0.1, handletextpad=0.4, columnspacing=0.6)
-##    plt.savefig('symbolic_and_non_comparison_vertical_short.eps', format='eps', dpi=300, transparent=True,
-##            bbox_inches='tight')
-##    plt.show()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_fit_method.py b/gklearn/preimage/xp_fit_method.py
deleted file mode 100644
index ead2786..0000000
--- a/gklearn/preimage/xp_fit_method.py
+++ /dev/null
@@ -1,935 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jan 14 15:39:29 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-import os
-import time
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix, compute_kernel
-from gklearn.preimage.find_best_k import getRelations
-
-
-def get_dataset(ds_name):
-    if ds_name == 'Letter-high': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Letter-med': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/MED/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Letter-low': # node non-symb
-        dataset = 'cpp_ext/data/collections/Letter.xml'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/LOW/' 
-        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-        for G in Gn:
-            reform_attributes(G, na_names=['x', 'y'])
-            G.graph['node_labels'] = []
-            G.graph['edge_labels'] = []
-            G.graph['node_attrs'] = ['x', 'y']
-            G.graph['edge_attrs'] = []
-    elif ds_name == 'Fingerprint':
-#        dataset = 'cpp_ext/data/collections/Fingerprint.xml'
-#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
-#        Gn, y_all = loadDataset(dataset, extra_params=graph_dir)
-#        for G in Gn:
-#            reform_attributes(G)
-        dataset = '../../datasets/Fingerprint/Fingerprint_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/Fingerprint/node_attrs/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'SYNTHETIC':
-        pass
-    elif ds_name == 'SYNTHETICnew':
-        dataset = '../../datasets/SYNTHETICnew/SYNTHETICnew_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/SYNTHETICnew'
-#        dataset = '../../datasets/Letter-high/Letter-high_A.txt'
-#        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'Synthie':
-        pass
-    elif ds_name == 'COIL-DEL':
-        dataset = '../../datasets/COIL-DEL/COIL-DEL_A.txt'
-        graph_dir = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/generated_datsets/COIL-DEL/'
-        Gn, y_all = loadDataset(dataset)
-    elif ds_name == 'COIL-RAG':
-        pass
-    elif ds_name == 'COLORS-3':
-        pass
-    elif ds_name == 'FRANKENSTEIN':
-        pass
-    
-    return Gn, y_all, graph_dir
-
-
-def init_output_file(ds_name, gkernel, fit_method, dir_output):
-#    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'edit cost', 
-              'GED method', 'attr distance', 'fit method', 'k', 
-              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
-              'median set'])
-    f_detail.close()
-    
-#    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'edit cost', 
-              'GED method', 'attr distance', 'fit method', 'k', 
-              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'fitting time', 'generating time', 'total time',
-              '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    return fn_output_detail, fn_output_summary
-
-
-def xp_fit_method_for_non_symbolic(parameters, save_results=True, initial_solutions=1,
-                                   Gn_data=None, k_dis_data=None, Kmatrix=None,
-                                   is_separate=False):
-    
-    # 1. set parameters.
-    print('1. setting parameters...')
-    ds_name = parameters['ds_name']
-    gkernel = parameters['gkernel']
-    edit_cost_name = parameters['edit_cost_name']
-    ged_method = parameters['ged_method']
-    attr_distance = parameters['attr_distance']
-    fit_method = parameters['fit_method']
-    init_ecc = parameters['init_ecc']
-
-    node_label = None
-    edge_label = None
-    dir_output = 'results/xp_fit_method/'    
-      
-    
-    # 2. get dataset.
-    print('2. getting dataset...')
-    if Gn_data is None:
-        Gn, y_all, graph_dir = get_dataset(ds_name)
-    else:
-        Gn = Gn_data[0]
-        y_all = Gn_data[1]
-        graph_dir = Gn_data[2]
-        
-    
-    # 3. compute kernel distance matrix.
-    print('3. computing kernel distance matrix...')
-    if k_dis_data is None:
-        dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, 
-            None, Kmatrix=Kmatrix, gkernel=gkernel)
-    else:
-#        dis_mat = k_dis_data[0]
-#        dis_max = k_dis_data[1]
-#        dis_min = k_dis_data[2]
-#        dis_mean = k_dis_data[3]
-#        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min, dis_mean)
-        pass
-
-
-    if save_results:
-        # create result files.
-        print('creating output files...')
-        fn_output_detail, fn_output_summary = init_output_file(ds_name, gkernel, 
-                                                               fit_method, dir_output)
-
-            
-    # start repeats.    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [0]
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-#        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-        time_fitting_mean_list = []
-        time_generating_mean_list = []
-        time_total_mean_list = []
-        
-        # 3. start generating and computing over targets.
-        print('4. starting generating and computing over targets......')
-        for i, (y, values) in enumerate(y_idx.items()):
-#            y = 'I'
-#            values = y_idx[y]
-#            values = values[0:10]            
-            print('\ny =', y)
-#            if y.strip() == 'A':
-#                continue
-            
-            k = len(values)
-            print('\n--------- k =', k, '----------')
-            
-            if k < 2:
-                print('\nk = ', k, ', skip.\n')
-                continue
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            time_fitting_list = []
-            time_generating_list = []
-            time_total_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            # get Gram matrix for this part of data.
-            if Kmatrix is not None:
-                if is_separate:
-                    Kmatrix_sub = Kmatrix[i].copy()
-                else:
-                    Kmatrix_sub = Kmatrix[values,:]
-                    Kmatrix_sub = Kmatrix_sub[:,values]
-            else:
-                Kmatrix_sub = None
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-#                from notebooks.utils.plot_all_graphs import draw_Fingerprint_graph
-#                for Gn in Gn_median:
-#                    draw_Fingerprint_graph(Gn, save=None)
-                
-                # GENERATING & COMPUTING!!
-                res_sods, res_dis_ks, res_times = median_on_k_closest_graphs(Gn_median, 
-                        node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=graph_dir,
-                        edit_cost_constants=None, group_min=median_set_idx_idx, 
-                        dataset=ds_name, initial_solutions=initial_solutions,
-                        edit_cost_name=edit_cost_name, init_ecc=init_ecc,
-                        Kmatrix=Kmatrix_sub, parallel=False)
-                sod_sm = res_sods[0]
-                sod_gm = res_sods[1] 
-                dis_k_sm = res_dis_ks[0]
-                dis_k_gm = res_dis_ks[1]
-                dis_k_gi = res_dis_ks[2]
-                dis_k_gi_min = res_dis_ks[3]
-                idx_dis_k_gi_min = res_dis_ks[4]
-                time_fitting = res_times[0]
-                time_generating = res_times[1]                    
-                
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, 
-                              edit_cost_name, ged_method, attr_distance,
-                              fit_method, k, y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, time_fitting, time_generating,
-                              time_fitting + time_generating, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                time_fitting_list.append(time_fitting)
-                time_generating_list.append(time_generating)
-                time_total_list.append(time_fitting + time_generating)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-#                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(y) + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='default')
-                
-                # plot median graphs.
-                if ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
-                    set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                    gen_median = loadGXL(fn_pre_gm_new + '.gxl')                
-                    draw_Letter_graph(set_median, fn_pre_sm_new)
-                    draw_Letter_graph(gen_median, fn_pre_gm_new)
-                    draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            time_fitting_mean_list.append(np.mean(time_fitting_list))
-            time_generating_mean_list.append(np.mean(time_generating_list))
-            time_total_mean_list.append(np.mean(time_total_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, 
-                          edit_cost_name, ged_method, attr_distance,
-                          fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, 
-                          time_fitting_mean_list[-1], time_generating_mean_list[-1],
-                          time_total_mean_list[-1], nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        time_fitting_mean = np.mean(time_fitting_list)
-        time_generating_mean = np.mean(time_generating_list)
-        time_total_mean = np.mean(time_total_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, 
-                      edit_cost_name, ged_method, attr_distance,
-                      fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean,
-                      time_fitting_mean, time_generating_mean, time_total_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-    
-    
-def compute_gm_for_each_class(Gn, y_all, gkernel, parallel='imap_unordered', is_separate=True):
-    
-    if is_separate:
-        print('the Gram matrix is computed for each class.')
-        y_idx = get_same_item_indices(y_all)
-        Kmatrix = []
-        run_time = []
-        k_dis_data = []
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('The ', str(i), ' class:')
-            Gn_i = [Gn[val] for val in values]
-            time0 = time.time()            
-            Kmatrix.append(compute_kernel(Gn_i, gkernel, None, None, True, parallel=parallel))
-            run_time.append(time.time() - time0)
-            k_dis_data.append(kernel_distance_matrix(Gn_i, None, None, 
-                Kmatrix=Kmatrix[i], gkernel=gkernel, verbose=True))
-        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
-        dis_max = np.max([item[1] for item in k_dis_data])
-        dis_min = np.min([item[2] for item in k_dis_data])
-        dis_mean = np.mean([item[3] for item in k_dis_data])
-        print('pair distances - dis_max, dis_min, dis_mean:', dis_max, dis_min,
-              dis_mean)
-
-    else:
-        time0 = time.time()
-        Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel=parallel)
-        run_time = time.time() - time0
-        np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-                 Kmatrix=Kmatrix, run_time=run_time, is_separate=is_separate)
-        k_dis_data = kernel_distance_matrix(Gn, None, None, 
-            Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-        print('the Gram matrix is computed for the whole dataset.')
-        print('pair distances - dis_max, dis_min, dis_mean:', k_dis_data[1], 
-              k_dis_data[2], k_dis_data[3])
-    
-    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    k_dis_data = [dis_mat, dis_max, dis_min, dis_mean]
-    return Kmatrix, run_time, k_dis_data
-    
-
-if __name__ == "__main__":
-#    #### xp 1: Letter-high, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-high'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_edges(G) != 0]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=None, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['random', 'expert', 'k-graphs']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
-        
-        
-#    #### xp 2: Letter-high, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-high'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=None, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['random', 'expert', 'k-graphs']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean])
-        
-        
-#    #### xp 3: SYNTHETICnew, sspkernel, using NON_SYMBOLIC.
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.structuralspkernel.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-#    # normalization
-#    Kmatrix_diag = Kmatrix.diagonal().copy()
-#    for i in range(len(Kmatrix)):
-#        for j in range(i, len(Kmatrix)):
-#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
-#            Kmatrix[j][i] = Kmatrix[i][j]
-##    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
-##             Kmatrix=Kmatrix, run_time=run_time)
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'SYNTHETICnew'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:10]
-##    y_all = y_all[0:10]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#    # compute pair distances.
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'NON_SYMBOLIC',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=1,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    ### xp 4: SYNTHETICnew, spkernel, using NON_SYMBOLIC.
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    # normalization
-#    Kmatrix_diag = Kmatrix.diagonal().copy()
-#    for i in range(len(Kmatrix)):
-#        for j in range(i, len(Kmatrix)):
-#            Kmatrix[i][j] /= np.sqrt(Kmatrix_diag[i] * Kmatrix_diag[j])
-#            Kmatrix[j][i] = Kmatrix[i][j]
-#    run_time = 21821.35
-#    np.savez('results/xp_fit_method/Kmatrix.SYNTHETICnew.spkernel.gm',
-#             Kmatrix=Kmatrix, run_time=run_time)
-#    
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'SYNTHETICnew'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    # remove graphs without nodes and edges.
-##    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_node(G) != 0
-##          and nx.number_of_edges(G) != 0)]
-##    idx = [G[0] for G in Gn]
-##    Gn = [G[1] for G in Gn]
-##    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:5]
-##    y_all = y_all[0:5]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#    
-#    # compute/read Gram matrix and pair distances.
-##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True)
-##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-##         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'NON_SYMBOLIC',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=1,
-#                                       Gn_data=[Gn, y_all, graph_dir],
-#                                       k_dis_data=[dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-    
-    
-#    #### xp 5: Fingerprint, sspkernel, using LETTER2, only node attrs.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Fingerprint'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
-##          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-#    y_idx = get_same_item_indices(y_all)
-#    # remove unused labels.
-#    for G in Gn:
-#        G.graph['edge_attrs'] = []
-#        for edge in G.edges:
-#            del G.edges[edge]['attributes']
-#            del G.edges[edge]['orient']
-#            del G.edges[edge]['angle']
-##    Gn = Gn[805:815]
-##    y_all = y_all[805:815]
-#    for G in Gn:
-#        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-#            
-#    # compute/read Gram matrix and pair distances.
-##    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-##    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-##         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [1,1,1,1,1]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    #### xp 6: Letter-med, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-med'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-#    #### xp 7: Letter-low, sspkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-low'
-#    gkernel = 'structuralspkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-    
-#    #### xp 8: Letter-med, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-med'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.525, 0.525, 0.75, 0.475, 0.475]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-
-#    #### xp 9: Letter-low, spkernel.
-#    # load dataset.
-#    print('getting dataset and computing kernel distance matrix first...')
-#    ds_name = 'Letter-low'
-#    gkernel = 'spkernel'
-#    Gn, y_all, graph_dir = get_dataset(ds_name)
-#    # remove graphs without nodes and edges.
-#    Gn = [(idx, G) for idx, G in enumerate(Gn) if (nx.number_of_nodes(G) != 0
-#          and nx.number_of_edges(G) != 0)]
-#    idx = [G[0] for G in Gn]
-#    Gn = [G[1] for G in Gn]
-#    y_all = [y_all[i] for i in idx]
-##    Gn = Gn[0:50]
-##    y_all = y_all[0:50]
-#    
-#    # compute/read Gram matrix and pair distances.
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-##    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-##    Kmatrix = gmfile['Kmatrix']
-##    run_time = gmfile['run_time']
-##    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-##    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-##    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-##    Kmatrix = np.zeros((len(Gn), len(Gn)))
-##    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-#    
-#    # fitting and computing.
-#    fit_methods = ['k-graphs', 'expert', 'random', 'random', 'random']
-#    for fit_method in fit_methods:
-#        print('\n-------------------------------------')
-#        print('fit method:', fit_method)
-#        parameters = {'ds_name': ds_name,
-#                      'gkernel': gkernel,
-#                      'edit_cost_name': 'LETTER2',
-#                      'ged_method': 'mIPFP',
-#                      'attr_distance': 'euclidean',
-#                      'fit_method': fit_method,
-#                      'init_ecc': [0.075, 0.075, 0.25, 0.075, 0.075]}
-#        print('parameters: ', parameters)
-#        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-#                                       initial_solutions=40,
-#                                       Gn_data = [Gn, y_all, graph_dir],
-#                                       k_dis_data = [dis_mat, dis_max, dis_min, dis_mean],
-#                                       Kmatrix=Kmatrix)
-        
-        
-    #### xp 5: COIL-DEL, sspkernel, using LETTER2, only node attrs.
-    # load dataset.
-    print('getting dataset and computing kernel distance matrix first...')
-    ds_name = 'COIL-DEL'
-    gkernel = 'structuralspkernel'
-    Gn, y_all, graph_dir = get_dataset(ds_name)
-    # remove graphs without nodes and edges.
-    Gn = [(idx, G) for idx, G in enumerate(Gn) if nx.number_of_nodes(G) != 0]
-#          and nx.number_of_edges(G) != 0)]
-    idx = [G[0] for G in Gn]
-    Gn = [G[1] for G in Gn]
-    y_all = [y_all[i] for i in idx]
-    # remove unused labels.
-    for G in Gn:
-        G.graph['edge_labels'] = []
-        for edge in G.edges:
-            del G.edges[edge]['bond_type']
-            del G.edges[edge]['valence']
-#    Gn = Gn[805:815]
-#    y_all = y_all[805:815]
-    for G in Gn:
-        G.graph['filename'] = 'graph' + str(G.graph['name']) + '.gxl'
-            
-    # compute/read Gram matrix and pair distances.
-    is_separate = True
-    Kmatrix, run_time, k_dis_data = compute_gm_for_each_class(Gn, 
-                                                              y_all, 
-                                                              gkernel, 
-                                                              parallel='imap_unordered',
-                                                              is_separate=is_separate)
-#    Kmatrix = compute_kernel(Gn, gkernel, None, None, True, parallel='imap_unordered')
-#    np.savez('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm', 
-#         Kmatrix=Kmatrix)
-#    gmfile = np.load('results/xp_fit_method/Kmatrix.' + ds_name + '.' + gkernel + '.gm.npz')
-#    Kmatrix = gmfile['Kmatrix']
-#    run_time = gmfile['run_time']
-#    Kmatrix = Kmatrix[[0,1,2,3,4],:]
-#    Kmatrix = Kmatrix[:,[0,1,2,3,4]]
-#    print('\nTime to compute Gram matrix for the whole dataset: ', run_time)
-#    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, 
-#        Kmatrix=Kmatrix, gkernel=gkernel, verbose=True)
-#    Kmatrix = np.zeros((len(Gn), len(Gn)))
-#    dis_mat, dis_max, dis_min, dis_mean = 0, 0, 0, 0
-    
-    # fitting and computing.
-    fit_methods = ['k-graphs', 'random', 'random', 'random']
-    for fit_method in fit_methods:
-        print('\n-------------------------------------')
-        print('fit method:', fit_method)
-        parameters = {'ds_name': ds_name,
-                      'gkernel': gkernel,
-                      'edit_cost_name': 'LETTER2',
-                      'ged_method': 'mIPFP',
-                      'attr_distance': 'euclidean',
-                      'fit_method': fit_method,
-                      'init_ecc': [3,3,1,3,3]} # [0.525, 0.525, 0.001, 0.125, 0.125]}
-        xp_fit_method_for_non_symbolic(parameters, save_results=True, 
-                                       initial_solutions=40,
-                                       Gn_data=[Gn, y_all, graph_dir],
-                                       k_dis_data=k_dis_data,
-                                       Kmatrix=Kmatrix, 
-                                       is_separate=is_separate)
\ No newline at end of file
diff --git a/gklearn/preimage/xp_letter_h.py b/gklearn/preimage/xp_letter_h.py
deleted file mode 100644
index 1e16fcf..0000000
--- a/gklearn/preimage/xp_letter_h.py
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Tue Jan 14 15:39:29 2020
-
-@author: ljia
-"""
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices, kernel_distance_matrix
-from gklearn.preimage.find_best_k import getRelations
-
-
-def xp_letter_h_LETTER2_cost():
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-    
-    dis_mat, dis_max, dis_min, dis_mean = kernel_distance_matrix(Gn, None, None, Kmatrix=None, gkernel='structuralspkernel')
-    for G in Gn:
-        reform_attributes(G)
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'structuralspkernel'
-    node_label = None
-    edge_label = None
-    ds_name = 'letter-h'
-    dir_output = 'results/xp_letter_h/'
-    save_results = True
-    cost = 'LETTER2'
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [150]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    if save_results:
-        # create result files.
-        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_detail = open(dir_output + fn_output_detail, 'a')
-        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', 'median set'])
-        f_detail.close()
-        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-                  'repeats better dis_k gi -> GM'])
-        f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'F'
-#            values = y_idx[y]
-#            values = values[0:10]
-            
-            k = len(values)
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset='Letter', cost=cost, parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                              y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
-                
-                # plot median graphs.
-                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-                draw_Letter_graph(set_median, fn_pre_sm_new)
-                draw_Letter_graph(gen_median, fn_pre_gm_new)
-                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-
-
-def xp_letter_h():
-    ds = {'dataset': 'cpp_ext/data/collections/Letter.xml',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/data/datasets/Letter/HIGH/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'], extra_params=ds['graph_dir'])
-    for G in Gn:
-        reform_attributes(G)
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'structuralspkernel'
-    node_label = None
-    edge_label = None
-    ds_name = 'letter-h'
-    dir_output = 'results/xp_letter_h/'
-    save_results = False
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [150]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    if save_results:
-        # create result files.
-        fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_detail = open(dir_output + fn_output_detail, 'a')
-        csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', 'median set'])
-        f_detail.close()
-        fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-                  'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-                  'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-                  'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-                  '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-                  'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-                  'repeats better dis_k gi -> GM'])
-        f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'N'
-#            values = y_idx[y]
-#            values = values[0:10]
-            
-            k = len(values)
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset='Letter', parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                if save_results:
-                    f_detail = open(dir_output + fn_output_detail, 'a')
-                    csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                              y, repeat,
-                              sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                              dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                              dis_k_gi2gm, median_set_idx])
-                    f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + y + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib-letter')
-                
-                # plot median graphs.
-                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-                draw_Letter_graph(set_median, fn_pre_sm_new)
-                draw_Letter_graph(gen_median, fn_pre_gm_new)
-                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            if save_results:
-                f_summary = open(dir_output + fn_output_summary, 'a')
-                csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                          sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                          dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                          dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                          dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                          nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                          repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                          repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-                f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        if save_results:
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                      sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                      dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-            f_summary.close()
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-        
-
-if __name__ == "__main__":
-#    xp_letter_h()
-    xp_letter_h_LETTER2_cost()
\ No newline at end of file
diff --git a/gklearn/preimage/xp_monoterpenoides.py b/gklearn/preimage/xp_monoterpenoides.py
deleted file mode 100644
index 2270471..0000000
--- a/gklearn/preimage/xp_monoterpenoides.py
+++ /dev/null
@@ -1,249 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Thu Jan 16 11:03:11 2020
-
-@author: ljia
-"""
-
-import numpy as np
-import random
-import csv
-from shutil import copyfile
-import networkx as nx
-import matplotlib.pyplot as plt
-
-from gklearn.utils.graphfiles import loadDataset, loadGXL, saveGXL
-from gklearn.preimage.test_k_closest_graphs import median_on_k_closest_graphs, reform_attributes
-from gklearn.preimage.utils import get_same_item_indices
-from gklearn.preimage.find_best_k import getRelations
-
-def xp_monoterpenoides():
-    import os
-
-    ds = {'dataset': '../../datasets/monoterpenoides/dataset_10+.ds',
-          'graph_dir': os.path.dirname(os.path.realpath(__file__)) + '../../datasets/monoterpenoides/'}  # node/edge symb
-    Gn, y_all = loadDataset(ds['dataset'])
-#    ds = {'name': 'Letter-high', 
-#          'dataset': '../datasets/Letter-high/Letter-high_A.txt'}  # node/edge symb
-#    Gn, y_all = loadDataset(ds['dataset'])
-#    Gn = Gn[0:50]
-    gkernel = 'treeletkernel'
-    node_label = 'atom'
-    edge_label = 'bond_type'
-    ds_name = 'monoterpenoides'
-    dir_output = 'results/xp_monoterpenoides/'
-    
-    repeats = 1
-#    k_list = range(2, 11)
-    k_list = [0]
-    fit_method = 'k-graphs'
-    # get indices by classes.
-    y_idx = get_same_item_indices(y_all)
-    
-    # create result files.
-    fn_output_detail = 'results_detail.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    f_detail = open(dir_output + fn_output_detail, 'a')
-    csv.writer(f_detail).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'target', 'repeat', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', 'median set'])
-    f_detail.close()
-    fn_output_summary = 'results_summary.' + ds_name + '.' + gkernel + '.' + fit_method + '.csv'
-    f_summary = open(dir_output + fn_output_summary, 'a')
-    csv.writer(f_summary).writerow(['dataset', 'graph kernel', 'fit method', 'k', 
-              'target', 'SOD SM', 'SOD GM', 'dis_k SM', 'dis_k GM',
-              'min dis_k gi', 'SOD SM -> GM', 'dis_k SM -> GM', 'dis_k gi -> SM', 
-              'dis_k gi -> GM', '# SOD SM -> GM', '# dis_k SM -> GM', 
-              '# dis_k gi -> SM', '# dis_k gi -> GM', 'repeats better SOD SM -> GM', 
-              'repeats better dis_k SM -> GM', 'repeats better dis_k gi -> SM', 
-              'repeats better dis_k gi -> GM'])
-    f_summary.close()
-    
-    random.seed(1)
-    rdn_seed_list = random.sample(range(0, repeats * 100), repeats)
-    
-    for k in k_list:
-        print('\n--------- k =', k, '----------')
-        
-        sod_sm_mean_list = []
-        sod_gm_mean_list = []
-        dis_k_sm_mean_list = []
-        dis_k_gm_mean_list = []
-        dis_k_gi_min_mean_list = []
-#        nb_sod_sm2gm = [0, 0, 0]
-#        nb_dis_k_sm2gm = [0, 0, 0]
-#        nb_dis_k_gi2sm = [0, 0, 0]
-#        nb_dis_k_gi2gm = [0, 0, 0]
-#        repeats_better_sod_sm2gm = []
-#        repeats_better_dis_k_sm2gm = []
-#        repeats_better_dis_k_gi2sm = []
-#        repeats_better_dis_k_gi2gm = []
-        
-        for i, (y, values) in enumerate(y_idx.items()):
-            print('\ny =', y)
-#            y = 'I'
-#            values = y_idx[y]
-            
-            k = len(values)
-#            k = kkk
-            
-            sod_sm_list = []
-            sod_gm_list = []
-            dis_k_sm_list = []
-            dis_k_gm_list = []
-            dis_k_gi_min_list = []
-            nb_sod_sm2gm = [0, 0, 0]
-            nb_dis_k_sm2gm = [0, 0, 0]
-            nb_dis_k_gi2sm = [0, 0, 0]
-            nb_dis_k_gi2gm = [0, 0, 0]
-            repeats_better_sod_sm2gm = []
-            repeats_better_dis_k_sm2gm = []
-            repeats_better_dis_k_gi2sm = []
-            repeats_better_dis_k_gi2gm = []
-            
-            for repeat in range(repeats):
-                print('\nrepeat =', repeat)
-                random.seed(rdn_seed_list[repeat])
-                median_set_idx_idx = random.sample(range(0, len(values)), k)
-                median_set_idx = [values[idx] for idx in median_set_idx_idx]
-                print('median set: ', median_set_idx)
-                Gn_median = [Gn[g] for g in values]
-        
-                sod_sm, sod_gm, dis_k_sm, dis_k_gm, dis_k_gi, dis_k_gi_min, idx_dis_k_gi_min \
-                    = median_on_k_closest_graphs(Gn_median, node_label, edge_label, 
-                        gkernel, k, fit_method=fit_method, graph_dir=ds['graph_dir'],
-                        edit_costs=None, group_min=median_set_idx_idx, 
-                        dataset=ds_name, parallel=False)
-                    
-                # write result detail.
-                sod_sm2gm = getRelations(np.sign(sod_gm - sod_sm))
-                dis_k_sm2gm = getRelations(np.sign(dis_k_gm - dis_k_sm))
-                dis_k_gi2sm = getRelations(np.sign(dis_k_sm - dis_k_gi_min))
-                dis_k_gi2gm = getRelations(np.sign(dis_k_gm - dis_k_gi_min))
-                f_detail = open(dir_output + fn_output_detail, 'a')
-                csv.writer(f_detail).writerow([ds_name, gkernel, fit_method, k, 
-                          y, repeat,
-                          sod_sm, sod_gm, dis_k_sm, dis_k_gm, 
-                          dis_k_gi_min, sod_sm2gm, dis_k_sm2gm, dis_k_gi2sm,
-                          dis_k_gi2gm, median_set_idx])
-                f_detail.close()
-                
-                # compute result summary.
-                sod_sm_list.append(sod_sm)
-                sod_gm_list.append(sod_gm)
-                dis_k_sm_list.append(dis_k_sm)
-                dis_k_gm_list.append(dis_k_gm)
-                dis_k_gi_min_list.append(dis_k_gi_min)
-                # # SOD SM -> GM
-                if sod_sm > sod_gm:
-                    nb_sod_sm2gm[0] += 1
-                    repeats_better_sod_sm2gm.append(repeat)
-                elif sod_sm == sod_gm:
-                    nb_sod_sm2gm[1] += 1
-                elif sod_sm < sod_gm:
-                    nb_sod_sm2gm[2] += 1
-                # # dis_k SM -> GM
-                if dis_k_sm > dis_k_gm:
-                    nb_dis_k_sm2gm[0] += 1
-                    repeats_better_dis_k_sm2gm.append(repeat)
-                elif dis_k_sm == dis_k_gm:
-                    nb_dis_k_sm2gm[1] += 1
-                elif dis_k_sm < dis_k_gm:
-                    nb_dis_k_sm2gm[2] += 1
-                # # dis_k gi -> SM
-                if dis_k_gi_min > dis_k_sm:
-                    nb_dis_k_gi2sm[0] += 1
-                    repeats_better_dis_k_gi2sm.append(repeat)
-                elif dis_k_gi_min == dis_k_sm:
-                    nb_dis_k_gi2sm[1] += 1
-                elif dis_k_gi_min < dis_k_sm:
-                    nb_dis_k_gi2sm[2] += 1
-                # # dis_k gi -> GM
-                if dis_k_gi_min > dis_k_gm:
-                    nb_dis_k_gi2gm[0] += 1
-                    repeats_better_dis_k_gi2gm.append(repeat)
-                elif dis_k_gi_min == dis_k_gm:
-                    nb_dis_k_gi2gm[1] += 1
-                elif dis_k_gi_min < dis_k_gm:
-                    nb_dis_k_gi2gm[2] += 1
-                    
-                # save median graphs.
-                fname_sm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/set_median.gxl'
-                fn_pre_sm_new = dir_output + 'medians/set_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                copyfile(fname_sm, fn_pre_sm_new + '.gxl')
-                fname_gm = os.path.dirname(os.path.realpath(__file__)) + '/cpp_ext/output/tmp_ged/gen_median.gxl'
-                fn_pre_gm_new = dir_output + 'medians/gen_median.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                copyfile(fname_gm, fn_pre_gm_new + '.gxl')
-                G_best_kernel = Gn_median[idx_dis_k_gi_min].copy()
-#                reform_attributes(G_best_kernel)
-                fn_pre_g_best_kernel = dir_output + 'medians/g_best_kernel.' + fit_method \
-                    + '.k' + str(int(k)) + '.y' + str(int(y)) + '.repeat' + str(repeat)
-                saveGXL(G_best_kernel, fn_pre_g_best_kernel + '.gxl', method='gedlib')
-                
-#                # plot median graphs.
-#                set_median = loadGXL(fn_pre_sm_new + '.gxl')
-#                gen_median = loadGXL(fn_pre_gm_new + '.gxl')
-#                draw_Letter_graph(set_median, fn_pre_sm_new)
-#                draw_Letter_graph(gen_median, fn_pre_gm_new)
-#                draw_Letter_graph(G_best_kernel, fn_pre_g_best_kernel)
-                    
-            # write result summary for each letter. 
-            sod_sm_mean_list.append(np.mean(sod_sm_list))
-            sod_gm_mean_list.append(np.mean(sod_gm_list))
-            dis_k_sm_mean_list.append(np.mean(dis_k_sm_list))
-            dis_k_gm_mean_list.append(np.mean(dis_k_gm_list))
-            dis_k_gi_min_mean_list.append(np.mean(dis_k_gi_min_list))
-            sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean_list[-1] - sod_sm_mean_list[-1]))
-            dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_sm_mean_list[-1]))
-            dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean_list[-1] - dis_k_gi_min_mean_list[-1]))
-            f_summary = open(dir_output + fn_output_summary, 'a')
-            csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, y,
-                      sod_sm_mean_list[-1], sod_gm_mean_list[-1], 
-                      dis_k_sm_mean_list[-1], dis_k_gm_mean_list[-1],
-                      dis_k_gi_min_mean_list[-1], sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                      dis_k_gi2sm_mean, dis_k_gi2gm_mean, nb_sod_sm2gm, 
-                      nb_dis_k_sm2gm, nb_dis_k_gi2sm, nb_dis_k_gi2gm, 
-                      repeats_better_sod_sm2gm, repeats_better_dis_k_sm2gm, 
-                      repeats_better_dis_k_gi2sm, repeats_better_dis_k_gi2gm])
-            f_summary.close()
-            
-
-        # write result summary for each letter. 
-        sod_sm_mean = np.mean(sod_sm_mean_list)
-        sod_gm_mean = np.mean(sod_gm_mean_list)
-        dis_k_sm_mean = np.mean(dis_k_sm_mean_list)
-        dis_k_gm_mean = np.mean(dis_k_gm_mean_list)
-        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
-        sod_sm2gm_mean = getRelations(np.sign(sod_gm_mean - sod_sm_mean))
-        dis_k_sm2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_sm_mean))
-        dis_k_gi2sm_mean = getRelations(np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
-        dis_k_gi2gm_mean = getRelations(np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
-        f_summary = open(dir_output + fn_output_summary, 'a')
-        csv.writer(f_summary).writerow([ds_name, gkernel, fit_method, k, 'all',
-                  sod_sm_mean, sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean,
-                  dis_k_gi_min_mean, sod_sm2gm_mean, dis_k_sm2gm_mean, 
-                  dis_k_gi2sm_mean, dis_k_gi2gm_mean])
-        f_summary.close()
-            
-        
-    print('\ncomplete.')
-    
-    
-#Dessin median courrant
-def draw_Letter_graph(graph, file_prefix):
-    plt.figure()
-    pos = {}
-    for n in graph.nodes:
-        pos[n] = np.array([float(graph.node[n]['x']),float(graph.node[n]['y'])])
-    nx.draw_networkx(graph, pos)
-    plt.savefig(file_prefix + '.eps', format='eps', dpi=300)
-#    plt.show()
-    plt.clf()
-    
-
-if __name__ == "__main__":
-    xp_monoterpenoides()
\ No newline at end of file